(****************************************************************)
(* UCompSpvTreeCART.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(****************************************************************)

{
@abstract(dfinition du composant de calcul d'un arbre de dcision - algo CART)
@author(Ricco)
@created(11/11/2004)

}
unit UCompSpvTreeCART;

interface

USES
        Forms, Classes, IniFiles,
        UCalcTreeStructureDefinition,
        UCompDefinition,
        UCompSpvLDefinition,
        UOperatorDefinition,
        UCompSpvTreeID3,
        UCalcRndGenerator;

TYPE
        {gnrateur de composant CART}
        TMLGCompSpvTreeCART = class(TMLGCompSpvTree)
                              public
                              function    GetClassMLComponent: TClassMLComponent; override;  
                              end;

        {le composant SpvTree CART}
        TMLCompSpvTreeCART = class(TMLCompSpvTree)
                             protected
                             function    getClassOperator: TClassOperator; override;
                             end;

        {l'oprateur CART}
        TOpSpvTreeCART = class(TOpSpvTree)
                         protected
                         function    getClassParameter: TClassOperatorParameter; override;
                         function    getClassSpvLearning(): TClassCalcSpvLearning; override;
                         end;

        {paramtre de l'algo CART}
        TOpPrmSpvTreeCART = class(TOpPrmSpvTree)
                            private    
                            {taille sommet pour tre segment}
                            FSizeBeforeSplit: integer;
                            {taille du pruning set, en pourcentage de la taille de la base}
                            FPruningSetSize : integer;
                            {x-SE rule}
                            FSERule: double;
                            {gnrateur alatoire pour la subdivision}
                            FModeRndGenerator: TStartSeed;
                            {montrer ou pas toutes les squences d'arbres si > SEUIL_NB_SEQUENCES}
                            FShowAllTreeSeq: boolean;
                            protected
                            procedure   SetDefaultParameters(); override;
                            function    CreateDlgParameters(): TForm; override;
                            public
                            function    getHTMLParameters(): string; override;
                            procedure   LoadFromStream(prmStream: TStream); override;
                            procedure   SaveToStream(prmStream: TStream); override;
                            procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                            procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                            property    SizeBeforeSplit: integer read FSizeBeforeSplit write FSizeBeforeSplit;
                            property    PruningSetSize: integer read FPruningSetSize write FPruningSetSize;
                            property    SERule: double read FSERule write FSERule;
                            property    ModeRndGenerator: TStartSeed read FModeRndGenerator write FModeRndGenerator;
                            property    ShowAllTreeSeq: boolean read FShowAllTreeSeq write FShowAllTreeSeq;
                            end;

        {algo d'arbre de dcision supervise}
        TCalcSpvTreeCART = class(TCalcSpvTree)
                           protected
                           function    getClassTreeStructureSpv(): TClassMLTreeStructure; override;
                           function    getHTMLTreeSequence(): string;
                           public
                           function    getHTMLResults(): string; override;     
                           end;     
        

implementation

USES
        Sysutils,
        UCalcSpvTreeCART, UConstConfiguration, UDlgOpPrmSpvTreeCART,
  UDlgBaseOperatorParameter;

CONST
        SEUIL_NB_SEQUENCES = 15;

{ TCalcSpvTreeCART }

function TCalcSpvTreeCART.getClassTreeStructureSpv: TClassMLTreeStructure;
begin
 result:= TMLTreeStructureSpvCART;
end;

function TCalcSpvTreeCART.getHTMLResults: string;
var tmp: string;
    arbre: TMLTreeStructureSpvCART;    
begin
 arbre:= self.Tree as TMLTreeStructureSpvCART;
 //dcrire le partitionnement des donnes
 tmp:= '<H4>Data partition</H4>';
 tmp:= tmp+HTML_HEADER_TABLE_RESULT;
 tmp:= tmp+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Growing set</TD><TD align=right width=60>%d</TD></TR>',[arbre.getGrowingSetSize()]);
 tmp:= tmp+HTML_TABLE_COLOR_DATA_BLUE+format('<TD>Pruning set</TD><TD align=right width=60>%d</TD></TR>',[arbre.getPruningSetSize()]);
 tmp:= tmp+'</table>';
 //dcrire la squence d'arbres
 tmp:= tmp+self.getHTMLTreeSequence();
 //ajouter la sortie standard
 tmp:= tmp+inherited getHTMLResults();
 result:= tmp;
end;

function TCalcSpvTreeCART.getHTMLTreeSequence: string;
var i: integer;
    arbre: TMLTreeStructureSpvCART;
    tmp,colorLine: string;
    ok_NB_SEQ,curOk: boolean;
begin
 arbre:= self.Tree as TMLTreeStructureSpvCART;
 ok_Nb_SEQ:= (Length(arbre.TabTreeSequence)<=SEUIL_NB_SEQUENCES);
 tmp:= format('<H4>Trees sequence (# %d)</H4>',[Length(arbre.TabTreeSequence)]);;
 tmp:= tmp+HTML_HEADER_TABLE_RESULT;
 tmp:= tmp+HTML_TABLE_COLOR_HEADER_GRAY+'<TH>N</TH><TH># Leaves</TH><TH>Err (growing set)</TH><TH>Err (pruning set)</TH></TR>';
 for i:= high(arbre.TabTreeSequence) downto low(arbre.TabTreeSequence) do
  begin
   curOk:= ok_Nb_SEQ OR (self.OpPrmSpv as TOpPrmSpvTreeCART).ShowAllTreeSeq;
   //si a passe pas, un des cas suivant peut faire passer
   curOk:= curOk OR
           ((i=low(arbre.TabTreeSequence))
             or (i=high(arbre.TabTreeSequence))
             or (arbre.TabTreeSequence[i].NumTree = arbre.OptimalSubTree)
             or (arbre.TabTreeSequence[i].NumTree = arbre.SelectedSubTree)
           );
   //suite normale... si ok...
   if curOk
    then
     begin
       colorLine:= HTML_TABLE_COLOR_DATA_GRAY;
       if (arbre.TabTreeSequence[i].NumTree = arbre.OptimalSubTree)
        then colorLine:= HTML_TABLE_COLOR_DATA_GREEN;
       if (arbre.TabTreeSequence[i].NumTree = arbre.SelectedSubTree)
        then colorLine:= HTML_TABLE_COLOR_DATA_RED;
       tmp:= tmp+colorLine+format('<TD align="right">%d</TD><TD align="right">%d</TD><TD align="right">%.4f</TD><TD align="right">%.4f</TD></TR>',
                                 [arbre.TabTreeSequence[i].NumTree,arbre.TabTreeSequence[i].NbLeaves,arbre.TabTreeSequence[i].ErrGrow,arbre.TabTreeSequence[i].ErrPrune]);
     end;
  end;
 tmp:= tmp+'</table>';
 result:= tmp;
end;

{ TMLGCompSpvTreeCART }

function TMLGCompSpvTreeCART.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompSpvTreeCART;
end;

{ TMLCompSpvTreeCART }

function TMLCompSpvTreeCART.getClassOperator: TClassOperator;
begin
 result:= TOpSpvTreeCART;
end;

{ TOpSpvTreeCART }

function TOpSpvTreeCART.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSpvTreeCART;
end;

function TOpSpvTreeCART.getClassSpvLearning: TClassCalcSpvLearning;
begin
 result:= TCalcSpvTreeCART;
end;

{ TOpPrmSpvTreeCART }

function TOpPrmSpvTreeCART.CreateDlgParameters: TForm;
begin
 result:= TdlgOpprmSpvTreeCART.CreateFromOpPrm(self);
end;

function TOpPrmSpvTreeCART.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Classification tree (C-RT) parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Size before split</TD><TD align=right>%d</TD></TR>',[FSizeBeforeSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Pruning set size (%s)</TD><TD align=right>%d</TD></TR>',['%',FPruningSetSize]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>x-SE rule</TD><TD align=right>%.2f</TD></TR>',[FSERule]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Random generator</TD><TD align=right>%d</TD></TR>',[ord(FModeRndGenerator)]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Show all tree seq (even if > 15)</TD><TD align=right>%d</TD></TR>',[ord(FShowAllTreeSeq)]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmSpvTreeCART.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FSizeBeforeSplit:= prmINI.ReadInteger(prmSection,'size_before_split',FSizeBeforeSplit);
 FPruningSetSize:= prmINI.ReadInteger(prmSection,'pruning_set_size',FPruningSetSize);
 FSERule:= prmINI.ReadFloat(prmSection,'se_rule',FSERule);
 FModeRndGenerator:= TStartSeed(prmINI.ReadInteger(prmSection,'rnd_generator',ord(FModeRndGenerator)));
 FShowAllTreeSeq:= prmINI.ReadBool(prmSection,'show_all_tree_seq',FShowAllTreeSeq);
end;

procedure TOpPrmSpvTreeCART.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.ReadBuffer(FPruningSetSize,sizeof(FPruningSetSize));
 prmStream.ReadBuffer(FSERule,sizeof(FSERule));
 prmStream.ReadBuffer(FModeRndGenerator,sizeof(FModeRndGenerator));
 prmStream.ReadBuffer(FShowAllTreeSeq,sizeof(FShowAllTreeSeq));
end;

procedure TOpPrmSpvTreeCART.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'size_before_split',FSizeBeforeSplit);
 prmINI.WriteInteger(prmSection,'pruning_set_size',FPruningSetSize);
 prmINI.WriteFloat(prmSection,'se_rule',FSERule);
 prmINI.WriteInteger(prmSection,'rnd_generator',ord(FModeRndGenerator));
 prmINI.WriteBool(prmSection,'show_all_tree_seq',FShowAllTreeSeq);
end;

procedure TOpPrmSpvTreeCART.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.WriteBuffer(FPruningSetSize,sizeof(FPruningSetSize));
 prmStream.WriteBuffer(FSERule,sizeof(FSERule));
 prmStream.WriteBuffer(FModeRndGenerator,sizeof(FModeRndGenerator));
 prmStream.WriteBuffer(FShowAllTreeSeq,sizeof(FShowAllTreeSeq));
end;

procedure TOpPrmSpvTreeCART.SetDefaultParameters;
begin
 FSizeBeforeSplit:= 10;
 FPruningSetSize := 33;
 FSERule:= 1;
 FModeRndGenerator:= seedStandard;
 FShowAllTreeSeq:= FALSE;
end;

initialization
 RegisterClass(TMLGCompSpvTreeCART);
end.
