(***************************************************************)
(* UCompSpvTreeID3.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(***************************************************************)

{
@abstract(dfinition du composant de calcul d'un arbre de dcision - algo ID3)
@author(Ricco)
@created(12/01/2004)

29/07/2004 -- rgles exportes au format SELFMIND
}
unit UCompSpvTreeID3;

interface

USES
        Forms,Classes,
        IniFiles,
        UCompDefinition,
        UCompSpvLDefinition,
        UOperatorDefinition,
        UCalcTreeStructureDefinition,
        UCalcSpvTreeDefinition,
        UDatasetDefinition,
        UDatasetImplementation,
        UDatasetExamples,
        UCalcDistribution,
        UCalcSpvStructScore;

TYPE

        {le gnrateur de composant Supervised ID3}
        TMLGCompSpvTree = class(TMLGenCompSpvLearning)
                                protected
                                procedure   GenCompInitializations(); override;
                                public
                                function    GetClassMLComponent: TClassMLComponent; override;
                                end;

        {le composant SpvTree}
        TMLCompSpvTree = class(TMLCompSpvLearning)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         end;

        {l'oprateur}
        TOpSpvTree = class(TOpSpvLearning)
                          protected
                          function    getClassParameter: TClassOperatorParameter; override;
                          function    getClassSpvLearning(): TClassCalcSpvLearning; override;
                          end;

        {paramtre de l'algo ID3}
        TOpPrmSpvTree = class(TOpPrmSpvLearning)
                        private
                        {taille sommet pour tre segment}
                        FSizeBeforeSplit: integer;
                        {taille des enfants gnrs - il faut qu'il y en ait deux au moins qui aient cette taille}
                        FSizeAfterSplit: integer;
                        {seuil d'acceptation d'un split}
                        FGoodnessThresold: double;
                        {Profondeur max de l'arbre}
                        FMaxDepth: integer;
                        protected
                        procedure   SetDefaultParameters(); override;
                        function    CreateDlgParameters(): TForm; override;
                        public
                        procedure   SetParameters(sb,sa,md: integer; gt: string);
                        function    getHTMLParameters(): string; override;
                        {I/O FLUX}
                        procedure   LoadFromStream(prmStream: TStream); override;
                        procedure   SaveToStream(prmStream: TStream); override;
                        {I/O Fichier INI}
                        procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                        procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                        property SizeBeforeSplit: integer read FSizeBeforeSplit;
                        property SizeAfterSplit: integer read FSizeAfterSplit;
                        property MaxDepth: integer read FMaxDepth;
                        property GoodnessThresold: double read FGoodnessThresold;
                        end;

        {algo d'arbre de dcision supervise}
        TCalcSpvTree = class(TCalcSpvLearning)
                       private
                       {structure d'arbre}
                       FTree: TMLTreeStructureSpv;
                       {liste interne pour la cration de l'arbre}
                       FClassAttLst: TLstAttributes;
                       protected
                       function    getClassTreeStructureSpv(): TClassMLTreeStructure; virtual; 
                       procedure   createStructures(); override;
                       procedure   destroyStructures(); override;
                       {apprentissage proprement dit}
                       function    coreLearning(examples: TExamples): boolean; override;
                       public
                       procedure   getScore(example: integer; var postProba: TTabScore); override;
                       //on laisse classification ici car il est possible d'affecter une conclusion sans passer par le calcul des probas.
                       procedure   classification(example: integer; var response: TTypeDiscrete); override;
                       function    getHTMLResults(): string; override;
                       {exporter les rgles de l'arbre de dcision au format SELFMIND -- new 28/07/2004}
                       function    getRules_SelfMindFormat(): TStringList;
                       property    Tree: TMLTreeStructureSpv read FTree;
                       end;

implementation

uses
        Sysutils, UStringAddBuffered,
        UDlgOpPrmSpvTreeID3, UDlgBaseOperatorParameter, UConstConfiguration,
        ULogFile, UStringsResources;

{ TMLGCompSpvTree }

procedure TMLGCompSpvTree.GenCompInitializations;
begin
 FMLComp:= mlcSpvLearning;
 //FMLNumIcon:= 22;
 //FMLCompName:= str_comp_name_spvl_id3;
 //FMLBitmapFileName:= 'MLSpvTreeID3.bmp';
end;

function TMLGCompSpvTree.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompSpvTree;
end;

{ TMLCompSpvTree }

function TMLCompSpvTree.getClassOperator: TClassOperator;
begin
 result:= TOpSpvTree;
end;

{ TOpSpvTree }

function TOpSpvTree.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSpvTree;
end;

function TOpSpvTree.getClassSpvLearning: TClassCalcSpvLearning;
begin
 result:= TCalcSpvTree;
end;

{ TCalcSpvTree }

procedure TCalcSpvTree.getScore(example: integer;
  var postProba: TTabScore);
var node: TMLTreeNodeSpv;
    successor: TMLTreeNode;
    i: integer;
begin
 node:= FTree.RootNode as TMLTreeNodeSpv;
 while not(node.isLeaf) do
  begin
   //dtecter le noeud suivant  dclencher
   i:= 0;
   repeat
    successor:= node.getSuccessor(i);
    inc(i);
   until (successor.Condition.TestValue(successor.Condition.Attribute.cValue[example]));
   //passer au suivant donc - mieux vaut ne trouver ici sinon on est mal
   node:= successor as TMLTreeNodeSpv;
  end;
 //c'est une feuille, on rcupre la distribution
 postProba.recupFromTabFrequence(node.DistClass.TabFreq);
 //si pas de proba, on prend celui de la racine
 if (postProba[0] = 0)
  then postProba.recupFromTabFrequence((self.FTree.RootNode as TMLTreeNodeSpv).DistClass.TabFreq);
end;

procedure TCalcSpvTree.classification(example: integer;
  var response: TTypeDiscrete);
var node: TMLTreeNodeSpv;
    successor: TMLTreeNode;
    i: integer;
begin
 node:= FTree.RootNode as TMLTreeNodeSpv;
 while not(node.isLeaf) do
  begin
   //dtecter le noeud suivant  dclencher
   i:= 0;
   repeat
    successor:= node.getSuccessor(i);
    inc(i);
   until (successor.Condition.TestValue(successor.Condition.Attribute.cValue[example]));
   //passer au suivant donc - mieux vaut ne trouver ici sinon on est mal
   node:= successor as TMLTreeNodeSpv;
  end;
 //c'est une feuille, on prend la conclusion -- 24/04/2005 -- on peut le faire sans calcul du score !
 response:= node.NodeConclusion;
end;

function TCalcSpvTree.coreLearning(examples: TExamples): boolean;
begin
 result:= true;
 try
 
 //TraceLog.WriteToLogFile(format('(!) mmoire alloue par le prog. avant la cration de la structure arbre = %d',[AllocMemSize]));

 FTree:= getClassTreeStructureSpv().create(OpPrmSpv, FClassAttLst,Descriptors,examples) as TMLTreeStructureSpv;
 
 FTree.GrowingTree();
 FTree.PostGrowing();
 FTree.PostPruning();

 TraceLog.WriteToLogFile(format('ARBRE DE DECISION >> nombre de feuilles : %d',[FTree.CountLeaves]));
 except
 result:= false;
 end;
end;

procedure TCalcSpvTree.createStructures;
begin
 FClassAttLst:= TLstAttributes.Create(FALSE,ClassAttribute.Size);
 FClassAttLst.Add(ClassAttribute);
end;

procedure TCalcSpvTree.destroyStructures;
begin
 FClassAttLst.Free;
 if assigned(FTree)
  then FreeAndNil(FTree);
end;

function TCalcSpvTree.getClassTreeStructureSpv: TClassMLTreeStructure;
begin
 result:= TMLTreeStructureSpv;
end;

function TCalcSpvTree.getHTMLResults: string;
var bs: TBufString;
    s: string;
begin
 bs:= TBufString.Create();
 bs.BeginUpdate();
 //description de l'arbre
 s:= '<H3>Tree description</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Number of nodes</TD><TD align=right width=60>%d</TD></TR>',[FTree.CountNodes]);
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Number of leaves</TD><TD align=right width=60>%d</TD></TR>',[FTree.CountLeaves]);
 s:= s+'</table>';
 bs.AddStr(s);
 //maintenant dessin de l'arbre
 bs.AddStr('<P><H3>Decision tree</H3>');
 FTree.RootNode.getHTMLDescription(bs);
 bs.EndUpdate();
 result:= bs.BufS;
 bs.Free;
end;

function TCalcSpvTree.getRules_SelfMindFormat: TStringList;
var lst: TStringList;
    rule: integer;
    leaf,node: TMLTreeNode;
begin
 //a marche pas parce que le calculateur est embedd dans un mta-supervised
 //curDiagram:= (self.OpPrmSpv.Operator.MLOwner as TMLComponent).Diagram;
 lst:= TStringList.Create();
 //---------------------------------
 //renvoyer les rgles dans la liste
 //---------------------------------
 //commentaires avec le nom du fichier de donnes
 lst.Add('#');
 lst.Add('# Rules from DECISION TREE');
 lst.Add('#');
 //nombre de rgles
 lst.Add('');
 lst.Add(format('Number of rules: %d;',[self.FTree.CountLeaves]));
 lst.Add('');
 //enquiller les rgles maintenant
 for rule:= 0 to pred(self.FTree.CountLeaves) do
  begin
   lst.Add(format('# Rule %d',[succ(rule)]));//numro de la rgle dans l'ordre des feuilles (de construction des feuilles)
   //remonter jusqu' la racine
   leaf:= self.FTree.Leaf[rule];
   node:= leaf;
   while (node<>self.FTree.RootNode) do
    begin
     if assigned(node.Condition)
      then
       begin
        if (node = leaf)
         then lst.Add(format('If %s',[node.Condition.getTXTDescription(TRUE)])) //premier de corde
         else lst.Add(format('And %s',[node.Condition.getTXTDescription(TRUE)])); //intermdiaire
       end;
     node:= node.Predecessor;
    end;
   lst.Add(leaf.getTXTLeafInfos());
   lst.Add('');
  end;
 //renvoyer la liste
 result:= lst;
end;

{ TOpPrmSpvTree }

function TOpPrmSpvTree.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmSpvTreeID3.CreateFromOpPrm(self);
end;

function TOpPrmSpvTree.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>ID3 parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Size before split</TD><TD align=right>%d</TD></TR>',[FSizeBeforeSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Size after split</TD><TD align=right>%d</TD></TR>',[SizeAfterSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max depth of leaves</TD><TD align=right>%d</TD></TR>',[FMaxDepth]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD width=200>Goodness of split thresold</TD><TD align=right>%.4f</TD></TR>',[FGoodnessThresold]); 
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmSpvTree.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FSizeBeforeSplit:= prmINI.ReadInteger(prmSection,'SizeBeforeSplit',FSizeBeforeSplit);
 FSizeAfterSplit:= prmINI.ReadInteger(prmSection,'SizeAfterSplit',FSizeAfterSplit);
 FGoodnessThresold:= prmINI.ReadFloat(prmSection,'GoodnessThresold',FGoodnessThresold);
 FMaxDepth:= prmINI.ReadInteger(prmSection,'MaxDepth',FMaxDepth);
end;

procedure TOpPrmSpvTree.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.ReadBuffer(FSizeAfterSplit,sizeof(FSizeAfterSplit));
 prmStream.ReadBuffer(FGoodnessThresold,sizeof(FGoodnessThresold));
 prmStream.ReadBuffer(FMaxDepth,sizeof(FMaxDepth));
end;

procedure TOpPrmSpvTree.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'SizeBeforeSplit',FSizeBeforeSplit);
 prmINI.WriteInteger(prmSection,'SizeAfterSplit',FSizeAfterSplit);
 prmINI.WriteFloat(prmSection,'GoodnessThresold',FGoodnessThresold);
 prmINI.WriteInteger(prmSection,'MaxDepth',FMaxDepth);
end;

procedure TOpPrmSpvTree.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.WriteBuffer(FSizeAfterSplit,sizeof(FSizeAfterSplit));
 prmStream.WriteBuffer(FGoodnessThresold,sizeof(FGoodnessThresold));
 prmStream.WriteBuffer(FMaxDepth,sizeof(FMaxDepth));
end;

procedure TOpPrmSpvTree.SetDefaultParameters;
begin
 FSizeBeforeSplit:= 200;
 FSizeAfterSplit:= 50;
 FGoodnessThresold:= 0.03;
 FMaxDepth:= 10;
end;

procedure TOpPrmSpvTree.SetParameters(sb, sa, md: integer; gt: string);
begin
 FSizeBeforeSplit:= sb;
 FSizeAfterSplit:= sa;
 FMaxDepth:= md;
 FGoodnessThresold:= StrToFloat(gt);;
end;

initialization
 RegisterClass(TMLGCompSpvTree);
end.
