(*******************************************************************)
(* UCompClusteringTreeCART.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(Arbre de Classification -- Extension du schma CART [Regression Tree] au cas endogne-multivarie)
@author(Ricco)
@created(09/09/2005)

Utilisation du cadre CART pour construire un arbre de classification. L'inspiration est la
mthode divisive monothtique de Lechevalier et Chavent (1999), la rsolution thorique est
inspire du chapitre 8 de Breiman et al.(1984).

Composant.
}

unit UCompClusteringTreeCART;

interface

USES
        Contnrs, Forms, Classes, IniFiles,
        UCompDefinition,
        UOperatorDefinition,
        UCompClusteringDefinition,
        UDatasetDefinition, UDatasetExamples,
        UCalcStatDes, UCompClusteringTree;

TYPE
        {gnrateur de composant}
        TGenClusTreeCART = class(TGenClusTree)
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

        {composant CT}
        TMLCompClusTreeCART = class(TMLCompClusTree)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         function    getGenericAttName(): string; override;
                         end;

        {oprateur CT}
        TOpClusTreeCART = class(TOpClusTree)
                     protected
                     function    getClassParameter: TClassOperatorParameter; override;
                     function    getClassCalcClustering(): TClassCalcClustering; override;
                     end;

        {paramtre oprateur CT}
        TOpPrmClusTreeCART = class(TOpPrmClusTree)
                    protected
                    {taille du pruning set, en pourcentage de la taille de la base}
                    FPruningSetSize : integer;
                    {x-SE rule}
                    //FSERule: double;
                    //new -- 11/09/2005 -- critre d'arrt "loi du coude" -- pente de la courbe
                    FThresholdSlope: double;
                    {montrer ou pas toutes les squences d'arbres si > SEUIL_NB_SEQUENCES}
                    FShowAllTreeSeq: boolean;
                    function    CreateDlgParameters(): TForm; override;
                    procedure   SetDefaultParameters(); override;
                    public
                    //
                    procedure   LoadFromStream(prmStream: TStream); override;
                    procedure   SaveToStream(prmStream: TStream); override;
                    procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                    procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                    function    getHTMLParameters(): string; override;
                    //
                    property    PruningSetSize: integer read FPruningSetSize write FPruningSetSize;
                    //property    SERule: double read FSERule write FSERule;
                    property    ThresholdSlope: double read FThresholdSlope write FThresholdSlope;
                    property    ShowAllTreeSeq: boolean read FShowAllTreeSeq write FShowAllTreeSeq;
                    end;


implementation

uses
     Sysutils,
     UCalcClusteringTreeCART, UDlgOpPrmClusteringTreeCART,
     UConstConfiguration;

{ TGenClusTreeCART }

function TGenClusTreeCART.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompClusTreeCART;
end;

{ TMLCompClusTreeCART }

function TMLCompClusTreeCART.getClassOperator: TClassOperator;
begin
 result:= TOpClusTreeCART;
end;

function TMLCompClusTreeCART.getGenericAttName: string;
begin
 result:= 'CTP';
end;

{ TOpClusTreeCART }

function TOpClusTreeCART.getClassCalcClustering: TClassCalcClustering;
begin
 result:= TCalcClusTreeCTP;
end;

function TOpClusTreeCART.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmClusTreeCART;
end;

{ TOpPrmClusTreeCART }

function TOpPrmClusTreeCART.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmClusteringTreeCART.CreateFromOpPrm(self);
end;

function TOpPrmClusTreeCART.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Tree Parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Rnd generator</TD><TD width=40 align="right">%d</TD></TR>',[ord(ModeRndGenerator)]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max Number of Clusters</TD><TD align="right">%d</TD></TR>',[FMaxNbClusters]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Distance normalization</TD><TD align="right">%d</TD></TR>',[FNormalization]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Min. size for split</TD><TD align="right">%d</TD></TR>',[FSizeBeforeSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Min. size of leaves</TD><TD align="right">%d</TD></TR>',[FLeavesSizeAfterSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max. depth</TD><TD align="right">%d</TD></TR>',[FMaxDepth]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Goodness threshold</TD><TD align="right">%.2f</TD></TR>',[FGoodnessThreshold]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Pruning set size</TD><TD align="right">%d %s</TD></TR>',[FPruningSetSize,'%']);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Slope threshold</TD><TD align="right">%.3f</TD></TR>',[FThresholdSlope]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Show all tree sequence</TD><TD align="right">%d</TD></TR>',[ord(FShowAllTreeSeq)]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmClusTreeCART.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FPruningSetSize:= prmINI.ReadInteger(prmSection,'pruning_set_size',FPruningSetSize);
 FThresholdSlope:= prmINI.ReadFloat(prmSection,'threshold_slope',FThresholdSlope);
 FShowAllTreeSeq:= prmINI.ReadBool(prmSection,'show_all_sequence',FShowAllTreeSeq);
end;

procedure TOpPrmClusTreeCART.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FPruningSetSize,sizeof(FPruningSetSize));
 prmStream.ReadBuffer(FThresholdSlope,sizeof(FThresholdSlope));
 prmStream.ReadBuffer(FShowAllTreeSeq,sizeof(FShowAllTreeSeq));
end;

procedure TOpPrmClusTreeCART.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'pruning_set_size',FPruningSetSize);
 prmINI.WriteFloat(prmSection,'threshold_slope',FThresholdSlope);
 prmINI.WriteBool(prmSection,'show_all_sequence',FShowAllTreeSeq);
end;

procedure TOpPrmClusTreeCART.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FPruningSetSize,sizeof(FPruningSetSize));
 prmStream.WriteBuffer(FThresholdSlope,sizeof(FThresholdSlope));
 prmStream.WriteBuffer(FShowAllTreeSeq,sizeof(FShowAllTreeSeq));
end;

procedure TOpPrmClusTreeCART.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 //modifier les critres par dfaut de l'anctre
 //nombre de clusters maximum
 FMaxNbClusters:= 50;
 //paramtres ARBRE
 FSizeBeforeSplit:= 5;
 FLeavesSizeAfterSplit:= 2;
 //profondeur max.
 FMaxDepth:= 10;
 //gain min. en pourcentage de l'inertie totale --> construction hurdling
 FGoodnessThreshold:= 0.0;
 //critres de post-pruning
 FPruningSetSize:= 33;
 //FSERule:= 3.0;
 //new -- 11/09/2005 -- seuil empirique, les autres valeurs possibles sont -0.01 (permissif) et -0.03 (trs restrictif)
 FThresholdSlope:= -0.015;
 //affichage
 FShowAllTreeSeq:= TRUE;
end;

initialization
 RegisterClass(TGenClusTreeCART);
end.
