(*******************************************************************)
(* UCalcClusteringTree.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(Arbre de Classification -- Extension du schma CART [Regression Tree] au cas endogne-multivarie)
@author(Ricco)
@created(08/09/2005)

L'inspiration est la mthode divisive monothtique de Lechevalier et Chavent (1999).

Classes de calcul et structure d'arbre. Le modle utilis est celui de << UCalcSpvTreeDefinition.pas >>
}

unit UCalcClusteringTree;

interface

USES
        UCalcStatDes, UDatasetdefinition, UDatasetImplementation,
        UOperatorDefinition,
        UDatasetExamples,
        UCalcRulesDefinition,
        UCalcTreeStructureDefinition,
        UCalcDistribution,
        UCompClusteringDefinition;


TYPE

    //tableau des variances utiliss pour la normalisation
    TTabVarianceNormalisation = array of double;

    //statistiques sur target relative  une feuille -- voir si des calculs spcifiques sont souhaitables
    TStatTargets = class(TLstCalcStatDesContinuous)
                   public
                   procedure merge(other: TStatTargets);
                   function  duplicate(): TStatTargets;
                   function  inertie(norm: TTabVarianceNormalisation): double;
                   end;

    //structure de feuille -- clustering
    TSplitLeafClus = class(TSplitLeaf)
                     private
                     //statistiques sur l'ensemble de variables
                     FStats :  TStatTargets;
                     public
                     constructor create(prmTargetAttributes: TLstAttributes; prmCond: TRuleCondition; nbExamples: integer); override;
                     procedure   BeginUpdate(); override;
                     procedure   AddExample(prmExample: integer); override;
                     procedure   EndUpdate(); override;
                     procedure   Merge(prmOther: TSplitLeaf); override;
                     destructor  Destroy; override;
                     property    Stats: TStatTargets read FStats;
                     end;

    //split
    TSplitAttributClus = class(TSplitAttribut)
                         protected
                         //calculer la distance pondre entre deux feuilles
                         function    computeDistance(l1,l2: TSplitLeafClus): double;
                         function    getClassSplitLeaf(): TClassSplitLeaf; override;
                         function    ComputeGoodness(): double; override;
                         function    ComputeAcceptSplit(): boolean; override;
                         procedure   OptimizeDiscreteLeaves(); override;
                         procedure   OptimizeContinuousLeaves(); override;
                         end;

    //liste de splis
    TLstSplitAttClus = class(TLstSplitAttributes)
                       protected
                       function  getClassSplitAttribut(): TClassSplitAttribut; override;
                       end;

    //noeud de l'arbre
    TMLTreeNodeClus = class(TMLTreeNode)
                      protected
                      //stats locales
                      FStats: TStatTargets;
                      //numro de cluster
                      FNumCluster: TTypeDiscrete;
                      function  isNoSplitNeeded(): boolean; override;
                      function  getClassLstSplitAttributes(): TClassLstSplitAttributes; override;
                      procedure computeLocalInfos(prmExamples: TExamples); override;
                      procedure getLeafInfos(prmLeaf: TSplitLeaf); override;
                      public
                      function   getHTMLLeafInfos(): string; override;
                      destructor destroy; override;
                      end;

    //structure d'arbre
    TMLTreeStructureClus = class(TMLTreeStructure)
                           private
                           //inertie totale
                           FTSS: double;
                           //valeurs pour la normalisation des distances
                           FNorm: TTabVarianceNormalisation;
                           protected
                           //statistiques sur la racine de l'arbre
                           FStats: TStatTargets;
                           //chantillon pour l'lagage
                           FPruningSet: TExamples;
                           function   getClassMLTreeNode(): TClassMLTreeNode; override;
                           //dcomposer app-pruning le cas chant, surcharg chez CART par ex.
                           procedure  defineTrainingSet(prmExamples: TExamples); virtual;
                           //prparer et effectuer les calculs prparatoires
                           procedure  prepareDataset(prmExamples: TExamples); override;
                           //numroter les feuilles
                           procedure  setClusterNumber();
                           //calculer le pourcentage d'inertie explique
                           function   computeBSS(): double;
                           public
                           constructor create(paramMethod: TOperatorParameter; prmTarget,prmInput: TLstAttributes; prmExamples: TExamples); override;
                           destructor  destroy(); override;
                           procedure   GrowingTree(); override;
                           procedure   PostGrowing(); override;
                           procedure   PostPruning(); override;
                           property    Normalization: TTabVarianceNormalisation read FNorm;
                           property    TSS: double read FTSS;
                           end;

    //classe de structure d'arbre
    TClassMLTreeStrucClus = class of TMLTreeStructureClus;

    //oprateur de calcul
    TCalcClusTree = class(TCalcClustering)
                    protected
                    FTree: TMLTreeStructureClus;
                    {description des informations spcifiques  la mthode}
                    function  getHTMLSpecificDescription(): string; virtual;
                    {envoyer la description HTML du clustering}
                    function  getHTMLClustering(): string; override;
                    {envoyer la description HTML de l'valuation}
                    function  GetHTMLEvaluation(): string; override;
                    {rcuprer la classe de construction d'arbre}
                    function  getClassTreeStructure(): TClassMLTreeStrucClus; virtual;
                    {new -- 14/05/2006 -- rcuprer la feuille associe  un individu  classer}
                    function  SetLeafExample(example: integer): TMLTreeNodeClus;
                    public
                    {dtruire}
                    destructor  Destroy(); override;
                    {lancer les calculs}
                    procedure   BuildClusters(prmExamples: TExamples); override;
                    {dcrit l'ensemble des clusters}
                    procedure FillClusAttDef(); override;
                    {renvoie le cluster d'un individu -- modifi 14/05/2006 -- appel  SetLeafExample}
                    function  SetClusterExample(example: integer): TTypeDiscrete; override;
                    {valuer la qualit du clustering - cela dpend du type de target utilis entre autres}
                    procedure EvaluateClustering(prmExamples: TExamples); override;
                    end;

    



implementation

uses
    Sysutils, Math,
    UCompClusteringTree, UStringAddBuffered, UConstConfiguration, ULogFile;

{ TSplitLeafClus }

procedure TSplitLeafClus.AddExample(prmExample: integer);
begin
 inherited AddExample(prmExample);
 FStats.AddValue(prmExample);
end;

procedure TSplitLeafClus.BeginUpdate;
begin
 inherited BeginUpdate();
 FStats.BeginUpdate();
end;

constructor TSplitLeafClus.create(prmTargetAttributes: TLstAttributes;
  prmCond: TRuleCondition; nbExamples: integer);
begin
 inherited Create(prmTargetAttributes,prmCond,nbExamples);
 FStats:= TStatTargets.Create(prmTargetAttributes,NIL);
end;

destructor TSplitLeafClus.Destroy;
begin
 FStats.Free();
 inherited destroy();
end;

procedure TSplitLeafClus.EndUpdate;
begin
 inherited EndUpdate();
 FStats.EndUpdate();
end;

procedure TSplitLeafClus.Merge(prmOther: TSplitLeaf);
begin
 inherited merge(prmOther);
 FStats.merge((prmOther as TSplitLeafClus).Stats);
end;

{ TStatTargets }

function TStatTargets.duplicate: TStatTargets;
var output: TStatTargets;
    j: integer;
begin
 output:= TStatTargets.Create(nil,nil);
 for j:= 0 to pred(self.Count) do
  output.AddStat(self.Stat(j).duplicate());
 result:= output;
end;

function TStatTargets.inertie(norm: TTabVarianceNormalisation): double;
var sum: double;
    j: integer;
begin
 sum:= 0.0;
 if (self.NbExamples > 0)
  then
   begin
    for j:= 0 to pred(self.Count) do
     sum:= sum + (self.Stat(j) as TCalcStatDesContinuous).TSS / norm[j];
   end;
 //
 result:= sum;
end;

procedure TStatTargets.merge(other: TStatTargets);
var j: integer;
begin
 //aucun contrle -- on considre que tout concorde
 for j:= 0 to pred(self.Count) do
  (self.Stat(j) as TCalcStatDesContinuous).merge(other.Stat(j) as TCalcStatDesContinuous);
end;

{ TSplitAttributClus }

function TSplitAttributClus.ComputeAcceptSplit: boolean;
var ok: boolean;
begin
 //deux enfants, tous non-vides
 ok:= (self.Count = 2);
 ok:= ok and (self.SplitLeaf[0].NbExamples > (self.PrmMethod as TOpPrmClusTree).LeavesSizeAfterSplit);
 ok:= ok and (self.SplitLeaf[1].NbExamples > (self.PrmMethod as TOpPrmClusTree).LeavesSizeAfterSplit);
 //qualit suffisante
 ok:= ok and (self.GoodnessSplit >= (self.PrmMethod as TOpPrmClusTree).GoodnessThreshold); 
 //and then...
 result:= ok;
end;

function TSplitAttributClus.computeDistance(l1,
  l2: TSplitLeafClus): double;
var dist,value,p1,p2: double;
    s1,s2: TCalcStatDesContinuous;
    j: integer;
begin
  dist:= 0.0;
  for j:= 0 to pred(l1.Stats.Count) do
   begin
    s1:= l1.Stats.Stat(j) as TCalcStatDesContinuous;
    s2:= l2.Stats.Stat(j) as TCalcStatDesContinuous;
    value:= SQR(s1.Average - s2.Average);
    //normalisation ventuelle...
    value:= value / (self.Node.TreeStructure as TMLTreeStructureClus).Normalization[j];
    //somme
    dist:= dist + value;
   end;
  //pondrer
  p1:= 1.0 * l1.NbExamples;
  p2:= 1.0 * l2.NbExamples;
  if ((p1 + p2) > 0.0)
   then
    begin
     dist:= dist * (p1 * p2) / (p1 + p2);
     //exprimer le gain en pourcentage de l'inertie totale
     result:= 100.0 * dist / (self.Node.TreeStructure as TMLTreeStructureClus).TSS;
    end
   else result:= 0.0;
end;

function TSplitAttributClus.ComputeGoodness: double;
var value: double;
begin
 //si le nombre d'enfants est diffrent de 2, on refuse !
 if (self.Count <> 2)
  then value:= -1.0 * MATH.MaxDouble
  else value:= self.computeDistance(self.SplitLeaf[0] as TSplitLeafClus,self.SplitLeaf[1] as TSplitLeafClus);
 //TraceLog.WriteToLogFile(format('goodness of %s --> %.4f',[Attribute.Name,value]));
 result:= value;
end;

function TSplitAttributClus.getClassSplitLeaf: TClassSplitLeaf;
begin
 result:= TSplitLeafClus;
end;

procedure TSplitAttributClus.OptimizeContinuousLeaves;
var ls,rs: TSplitLeafClus;
    exSorted: TExamples;
    i,j,example,numAtt: integer;
    v,vMax: double;
    m,mMax: TTypeContinue;
    tabL,tabR: array of double;
    pL,pR: double;
    clusNode: TMLTreeNodeClus;
    minP: double;
begin

 clusNode:= node as TMLTreeNodeClus;

 //taille minimale
 minP:= 1.0 * (self.PrmMethod as TOpPrmClusTree).LeavesSizeAfterSplit; 

 //toujours binaire donc
 ls:= self.SplitLeaf[0] as TSplitLeafClus;
 rs:= self.SplitLeaf[1] as TSplitLeafClus;

 //prendre la liste trie pour l'instant,  voir les optimisations plus tard
 //rcuprer celui envoy par le sommet - ouh l c'est pas beau
 numAtt:= Node.InputAttributes.GetIndex(Attribute);
 exSorted:= nil;
 if (numAtt<Node.InputSortedExamples.Count)
  then exSorted:= Node.InputSortedExamples.Items[numAtt] as TExamples;
 if not(assigned(exSorted)) OR (exSorted.Size<>Node.Examples.Size)
  //c'est la cata
  then
   begin
    TraceLog.WriteToLogFile('mauvaise rcupration de la liste trie');
    Raise Exception.Create('mauvaise rcupration de la liste trie');
   end;

 //crer les tableaux temporaire
 setLength(tabL,ls.Stats.Count);
 setLength(tabR,rs.Stats.Count);
 
 mMax:= self.Attribute.cValue[exSorted.Number[1]];
 vMax:= 0.0;
 //de gauche  droite
 pL:= 0.0;
 pR:= 1.0 * Node.Examples.Size;
 for j:= 0  to pred(clusNode.FStats.Count) do
  tabR[j]:= (clusNode.FStats.Stat(j) as TCalcStatDesContinuous).Sum;

 //pour chaque individu
 for i:= 1 to pred(exSorted.Size) do
  begin
   example:= exSorted.Number[i];
   //dcaler le seuil
   pL:= pL + 1.0;
   pR:= pR - 1.0;
   for j:= 0 to pred(clusNode.FStats.Count) do
    begin
     tabL[j]:= tabL[j] + clusNode.FStats.Stat(j).Attribute.cValue[example];
     tabR[j]:= tabR[j] - clusNode.FStats.Stat(j).Attribute.cValue[example];
    end;
   //pas de tests  effectuer sur les ex-aequos
   //new -- 09/09/2005 -- et si les effectifs sont trop petits
   if (self.Attribute.cValue[example]<self.Attribute.cValue[exSorted.Number[succ(i)]]) AND (pL >= minP) AND (pR >= minP)
    then
     begin
      m:= 0.5*(self.Attribute.cValue[example]+self.Attribute.cValue[exSorted.Number[succ(i)]]);
      //calcul rapide du gain pondr
      v:= 0.0;
      for j:= 0 to pred(clusNode.FStats.Count) do
       v:= v + SQR(tabL[j]/pL - tabR[j]/pR) / (clusNode.TreeStructure as TMLTreeStructureClus).Normalization[j];
      v:= v * (pL * pR) / (pL + pR);
      //suite standard
      if (v>vMax)
       then
        begin
         vMax:= v;
         mMax:= m;
        end;
     end;
  end;

 Finalize(tabL);
 Finalize(tabR);

 //replacer le seuil
 (ls.Condition as TRuleCondContinue).SetNewThresold(mMax);
 (rs.Condition as TRuleCondContinue).SetNewThresold(mMax);
 self.BeginUpdate();
 for i:= 1 to self.Node.Examples.Size do
  begin
   example:= self.Node.Examples.Number[i];
   m:= self.Attribute.cValue[example];
   //si ce n'est toi, c'est donc ton frre
   if ls.Condition.TestValue(m)
    then ls.AddExample(example)
    else rs.AddExample(example);
  end;
 self.EndUpdate();

 //info
 //TraceLog.WriteToLogFile(format('%s splited, with L = %d, R = %d at value = %.2f --> %.2f',[self.Attribute.Name,ls.NbExamples,rs.NbExamples,mMax,vMax]));

end;

procedure TSplitAttributClus.OptimizeDiscreteLeaves;
var i,iMin,j,jMin: integer;
    l_i,l_j: TSplitLeafClus;
    dist,minDist: double;
begin
  //-------------------------------
  //-- arbre binaire au final... --
  //-------------------------------

  //la meilleure solution est un CAH local sur les modalits du split-attribut
  //on calcule donc la "distance" pondre entre chaque sommet

  //tant que l'on a plus de 2 enfants
  while (self.Count>2) do
  begin
   //initialisations
   minDist:= 1.0e308;
   iMin:= -1;
   jMin:= -1;
   //calculer les distances deux  deux des sommets
   //bcp d'lments sont calculs plusieurs fois ici mais les conserver puis les rechercher serait plus long encore
   for i:= 0 to self.Count-2 do
    begin
     l_i:= self.SplitLeaf[i] as TSplitLeafClus;
     for j:= succ(i) to self.Count-1 do
      begin
       l_j:= self.SplitLeaf[j] as TSplitLeafClus;
       //calculer la distance entre les deux sommets
       dist:= 0.0;
       //si l'un des deux est vide, on est ok !!!
       //sinon, on cherche donc...
       if (l_i.NbExamples > 0) and (l_j.NbExamples > 0)
        then dist:= self.computeDistance(l_i,l_j);
       //recherche du min
       if (dist<minDist)
        then
         begin
          minDist:= dist;
          iMin:= i;
          jMin:= j;
         end;
      end;
    end;
   //fusionner les sommets ?
   if (iMin<0) and (jMin<0)
    then
     //c'est la CATA !
     begin
      iMin:= 0;
      jMin:= 1;
      TraceLog.WriteToLogFile('//!\ ERREUR -- PBM FUSION FORCEE DANS CART !');
     end;
    //et on fusionne parce qu'il faut arriver au binaire
    l_i:= self.SplitLeaf[iMin] as TSplitLeafClus;
    l_j:= self.SplitLeaf[jMin] as TSplitLeafClus;
    //TraceLog.WriteToLogFile(format('MERGE : %d + %d',[l_i.NbExamples,l_j.NbExamples]));
    l_i.Merge(l_j);
    //TraceLog.WriteToLogFile(format('OUTPUT --> %d',[l_i.NbExamples]));
    //don't forget supprimer cette rf et librer l'objet corresp.!
    self.DeleteLeaf(jMin);
  end;
end;

{ TLstSplitAttClus }

function TLstSplitAttClus.getClassSplitAttribut: TClassSplitAttribut;
begin
 result:= TSplitAttributClus; 
end;

{ TMLTreeNodeClus }

procedure TMLTreeNodeClus.computeLocalInfos(prmExamples: TExamples);
begin
 inherited ComputeLocalInfos(prmExamples);
 FStats:= TStatTargets.Create(TargetAttributes,prmExamples);
end;

destructor TMLTreeNodeClus.destroy;
begin
 if assigned(FStats) then FreeAndNil(FStats);
 inherited;
end;

function TMLTreeNodeClus.getClassLstSplitAttributes: TClassLstSplitAttributes;
begin
 result:= TLstSplitAttClus; 
end;

function TMLTreeNodeClus.getHTMLLeafInfos: string;
begin
 result:= format(' then <b>cluster n%d</b>, with %d examples (%.2f%s)',
                [self.FNumCluster,FStats.NbExamples,100.0*FStats.NbExamples/(1.0*(self.TreeStructure as TMLTreeStructureClus).FStats.NbExamples),'%']);
end;

procedure TMLTreeNodeClus.getLeafInfos(prmLeaf: TSplitLeaf);
begin
 inherited getLeafInfos(prmLeaf);
 FStats:= (prmLeaf as TSplitLeafClus).Stats.duplicate(); 
end;

function TMLTreeNodeClus.isNoSplitNeeded: boolean;
var prm: TOpPrmClusTree;
begin
 prm:= self.PrmMethod as TOpPrmClusTree;
 //taille de sommet trop petit pour tre splitt
 result:= (self.FStats.NbExamples < prm.SizeBeforeSplit) OR (self.Depth >= prm.MaxDepth);
end;

{ TMLTreeStructureClus }

function TMLTreeStructureClus.computeBSS: double;
var wss,tss: double;
    j: integer;
begin
 //WSS
 wss:= 0.0;
 for j:= 0 to pred(self.CountLeaves) do
  wss:= wss + (self.Leaf[j] as TMLTreeNodeClus).FStats.inertie(FNorm);
 //TSS
 tss:= self.FStats.inertie(FNorm);
 //bss
 if (tss > 0.0)
  then result:= tss - wss
  else result:= 0.0;
end;

constructor TMLTreeStructureClus.create(paramMethod: TOperatorParameter;
  prmTarget, prmInput: TLstAttributes; prmExamples: TExamples);
begin
 setLength(FNorm,prmTarget.Count);
 inherited;
end;

procedure TMLTreeStructureClus.defineTrainingSet(prmExamples: TExamples);
begin
 FTrainingSet:= prmExamples;
end;

destructor TMLTreeStructureClus.destroy;
begin
 Finalize(FNorm);
 //FreeAndNil(FTrainingSet);
 //FreeAndNil(FPruningSet);
 FreeAndNil(FStats);
 inherited;
end;

function TMLTreeStructureClus.getClassMLTreeNode: TClassMLTreeNode;
begin
 result:= TMLTreeNodeClus;
end;

procedure TMLTreeStructureClus.GrowingTree();
var prmClus: TOpPrmClusTree;
begin
 prmClus:= PrmMethod as TOpPrmClusTree;
 TRY
 //utiliser le paramtre nombre max. de clusters
 while (self.CountLeaves < prmClus.MaxNbClusters) and  self.SplitFound() do
  begin
   //suivre l'volution de la mmoire alloue, elle doit tre stable au fil du temps
   //affichage dans le log
   //TraceLog.WriteToLogFile(format('volution mmoire >> feuilles (%3d), mmoire alloue (%10d)',[self.CountLeaves,AllocMemSize]));
  end;
 EXCEPT
 on e:Exception do
  begin
  TraceLog.WriteToLogFile(format('[CLUSTERING TREE] erreur growing tree --> %s',[e.Message]));
  Raise Exception.Create('error growing clustering tree');
  end;
 END;
end;

procedure TMLTreeStructureClus.PostGrowing;
begin
 //none
end;

procedure TMLTreeStructureClus.PostPruning;
begin
 //none
end;

procedure TMLTreeStructureClus.prepareDataset(prmExamples: TExamples);
var j: integer;
begin
 self.defineTrainingSet(prmExamples);
 //calculer les stats de normalisation -- sur les exemples d'apprentissage
 FStats:= TStatTargets.Create(self.TargetAttributes,FTrainingSet);
 //selon l'option choisie
 if (self.PrmMethod as TOpPrmClusTree).Normalization = 0
  then
   begin
    FTSS:= 0.0;
    for j:= 0 to pred(self.TargetAttributes.Count) do
     begin
      FNorm[j]:= 1.0;
      FTSS:= FTSS + (FStats.Stat(j) as TCalcStatDesContinuous).TSS;
     end;
   end
  else
   begin
    FTSS:= 1.0 * FTrainingSet.Size * self.TargetAttributes.Count;
    for j:= 0 to pred(self.TargetAttributes.Count) do
     begin
      FNorm[j]:= (FStats.Stat(j) as TCalcStatDesContinuous).TSS / (1.0 * FTrainingSet.Size);
     end;
   end;
 //contrle
 TraceLog.WriteToLogFile(format('[CLUSTERING TREE] TSS root = %.4f, TSS stat root = %.4f',[FTSS,FStats.inertie(FNorm)]));
end;

procedure TMLTreeStructureClus.setClusterNumber;
var k: integer;
begin
 //numroter les feuilles pour obtenir le n de cluster
 for k:= 0 to pred(self.CountLeaves) do
  (self.Leaf[k] as TMLTreeNodeClus).FNumCluster:= succ(k);
end;

{ TCalcClusTree }

procedure TCalcClusTree.BuildClusters(prmExamples: TExamples);
begin
 if assigned(FTree) then FTree.Free();
 FTree:= getClassTreeStructure.create(PrmCalc,Targets,Inputs,prmExamples);
 FTree.GrowingTree();
 FTree.PostGrowing();
 FTree.PostPruning();
 //numroter les feuilles --> numro de clusters
 FTree.setClusterNumber();
end;

destructor TCalcClusTree.Destroy;
begin
 if assigned(FTree) then FTree.Free();
 inherited;
end;

procedure TCalcClusTree.EvaluateClustering(prmExamples: TExamples);
begin
 //
end;

procedure TCalcClusTree.FillClusAttDef;
var k: integer;
begin
 FAttClus.LstValues.clear;
 for k:= 1 to FTree.CountLeaves do
  FAttClus.LstValues.getValue('c_ct_'+IntToStr(k));
end;

function TCalcClusTree.getClassTreeStructure: TClassMLTreeStrucClus;
begin
 result:= TMLTreeStructureClus; 
end;

function TCalcClusTree.getHTMLClustering: string;
var bs: TBufString;
    s: string;
begin
 bs:= TBufString.Create();
 bs.BeginUpdate();
 //description des squences ou de l'inertie explique
 s:= self.getHTMLSpecificDescription();
 bs.AddStr(s);
 //description de l'arbre
 s:= '<H3>Tree description</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Number of nodes</TD><TD align=right width=60>%d</TD></TR>',[FTree.CountNodes]);
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Number of leaves</TD><TD align=right width=60>%d</TD></TR>',[FTree.CountLeaves]);
 s:= s+'</table>';
 bs.AddStr(s);
 //maintenant dessin de l'arbre
 bs.AddStr('<P><H3>Tree</H3>');
 FTree.RootNode.getHTMLDescription(bs);
 bs.EndUpdate();
 result:= bs.BufS;
 bs.Free;
end;

function TCalcClusTree.GetHTMLEvaluation: string;
begin
 result:= '';
end;

function TCalcClusTree.getHTMLSpecificDescription: string;
var s: string;
    bss,tss: double;
begin
 //caractrisation de la partition
 s:= '<H3>Inertia Decomposition</H3>';
 bss:= FTree.computeBSS();
 tss:= FTree.TSS;
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH>Inertia</TH><TH>Value</TH><TH>Ratio</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Between-group</TD><TD align=right width=50>%.2f</TD><TD align=right width=50 %s>%.5f</TD></TR>',[bss,HTML_BGCOLOR_DATA_RED,bss/tss]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Within-group</TD><TD align=right width=50>%.2f</TD><TD align=right width=50>%.5f</TD></TR>',[tss-bss,(tss-bss)/tss]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>All</TD><TD align=right width=50>%.2f</TD><TD align=right>1</TD></TR>',[tss]);
 s:= s+'</table>';
 result:= s;
end;

function TCalcClusTree.SetClusterExample(example: integer): TTypeDiscrete;
var leaf: TMLTreeNodeClus;
begin
 leaf:= self.SetLeafExample(example) as TMLTreeNodeClus;
 if not(assigned(leaf))
  then result:= 0
  else result:= leaf.FNumCluster;
end;

function TCalcClusTree.SetLeafExample(example: integer): TMLTreeNodeClus;
var node: TMLTreeNodeClus;
    successor: TMLTreeNode;
    i: integer;
begin
 node:= FTree.RootNode as TMLTreeNodeClus;
 while not(node.isLeaf) do
  begin
   //dtecter le noeud suivant  dclencher
   i:= 0;
   repeat
    successor:= node.getSuccessor(i);
    inc(i);
   until (successor.Condition.TestValue(successor.Condition.Attribute.cValue[example]));
   //passer au suivant donc - mieux vaut ne trouver ici sinon on est mal
   node:= successor as TMLTreeNodeClus;
  end;
 //c'est une feuille, on prend la conclusion -- 24/04/2005 -- on peut le faire sans calcul du score !
 result:= node;
end;

end.
