(*******************************************************************)
(* UCompClusteringTree.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(Arbre de Classification -- Extension du schma [Regression Tree] au cas endogne-multivarie)
@author(Ricco)
@created(08/09/2005)

L'inspiration est la mthode divisive monothtique de Lechevalier et Chavent (1999).

Composant.
}

unit UCompClusteringTree;

interface

USES
        Contnrs, Forms, Classes, IniFiles,
        UCompDefinition,
        UOperatorDefinition,
        UCompClusteringDefinition,
        UDatasetDefinition, UDatasetExamples,
        UCalcStatDes;

TYPE
       {gnrateur de composant}
        TGenClusTree = class(TMLGenComp)
                      protected
                      procedure   GenCompInitializations(); override;
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

        {composant CT}
        TMLCompClusTree = class(TMLCompClustering)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         function    getGenericAttName(): string; override;
                         end;

        {oprateur CT}
        TOpClusTree = class(TOperatorClusteringContinue)
                     protected
                     function    getClassParameter: TClassOperatorParameter; override;
                     function    getClassCalcClustering(): TClassCalcClustering; override;
                     function    CheckAttributes(): boolean; override;
                     end;

        {paramtre oprateur CT}
        TOpPrmClusTree = class(TOpPrmClustering)
                    protected
                    {nombre de clusters maximum}
                    FMaxNbClusters: integer;
                    {normalisation pour le calcul des distances}
                    FNormalization: integer;
                    {taille sommet pour tre segment}
                    FSizeBeforeSplit: integer;
                    {Taille minimale des feuilles produites}
                    FLeavesSizeAfterSplit: integer;
                    {profondeur max. de l'arbre}
                    FMaxDepth: integer;
                    {gain minimum en pourcentage de l'inertie totale}
                    FGoodnessThreshold: double;
                    {taille du pruning set, en pourcentage de la taille de la base}
                    //FPruningSetSize : integer;
                    {x-SE rule}
                    //FSERule: double;
                    {montrer ou pas toutes les squences d'arbres si > SEUIL_NB_SEQUENCES}
                    FShowAllTreeSeq: boolean;
                    function    CreateDlgParameters(): TForm; override;
                    procedure   SetDefaultParameters(); override;
                    public
                    //
                    procedure   LoadFromStream(prmStream: TStream); override;
                    procedure   SaveToStream(prmStream: TStream); override;
                    procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                    procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                    function    getHTMLParameters(): string; override;
                    //
                    property    MaxNbClusters: integer read FMaxNbClusters write FMaxNbClusters;
                    property    Normalization: integer read FNormalization write FNormalization;
                    property    SizeBeforeSplit: integer read FSizeBeforeSplit write FSizeBeforeSplit;
                    property    MaxDepth: integer read FMaxDepth write FMaxDepth;
                    property    GoodnessThreshold: double read FGoodnessThreshold write FGoodnessThreshold;
                    property    LeavesSizeAfterSplit: integer read FLeavesSizeAfterSplit write FLeavesSizeAfterSplit;
                    //property    PruningSetSize: integer read FPruningSetSize write FPruningSetSize;
                    end;


implementation

uses
    Sysutils,
    UDatasetImplementation, UCalcClusteringTree, UConstConfiguration,
  UDlgOpPrmClusteringTree;

{ TGenClusClusTree }

procedure TGenClusTree.GenCompInitializations;
begin
 FMLComp:= mlcClustering;
end;

function TGenClusTree.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompClusTree;
end;

{ TMLCompClusTree }

function TMLCompClusTree.getClassOperator: TClassOperator;
begin
 result:= TOpClusTree;
end;

function TMLCompClusTree.getGenericAttName: string;
begin
 result:= 'CT';
end;

{ TOpClusTree }

function TOpClusTree.CheckAttributes: boolean;
begin
 //ok si Targets tous continus et inputs quelconques
 result:= ((self.WorkData.LstAtts[asTarget].Count > 0) and self.WorkData.LstAtts[asTarget].isAllCategory(caContinue))
          AND
          (self.WorkData.LstAtts[asInput].Count > 0);
end;

function TOpClusTree.getClassCalcClustering: TClassCalcClustering;
begin
 result:= TCalcClusTree;
end;

function TOpClusTree.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmClusTree; 
end;

{ TOpPrmClusTree }

function TOpPrmClusTree.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmClusteringTree.CreateFromOpPrm(self);
end;

function TOpPrmClusTree.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Tree Parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Rnd generator</TD><TD width=40 align="right">%d</TD></TR>',[ord(ModeRndGenerator)]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max Number of Clusters</TD><TD align="right">%d</TD></TR>',[FMaxNbClusters]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Distance normalization</TD><TD align="right">%d</TD></TR>',[FNormalization]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Min. size for split</TD><TD align="right">%d</TD></TR>',[FSizeBeforeSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Min. size of leaves</TD><TD align="right">%d</TD></TR>',[FLeavesSizeAfterSplit]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max. depth</TD><TD align="right">%d</TD></TR>',[FMaxDepth]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Goodness threshold</TD><TD align="right">%.2f</TD></TR>',[FGoodnessThreshold]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmClusTree.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FMaxNbClusters:= prmINI.ReadInteger(prmSection,'max_clusters',FMaxNbClusters);
 FNormalization:= prmINI.ReadInteger(prmSection,'normalization',FNormalization);
 FSizeBeforeSplit:= prmINI.ReadInteger(prmSection,'size_before_split',FSizeBeforeSplit);
 FLeavesSizeAfterSplit:= prmINI.ReadInteger(prmSection,'size_leaves',FLeavesSizeAfterSplit);
 FMaxDepth:= prmINI.ReadInteger(prmSection,'max_depth',FMaxDepth);
 FGoodnessThreshold:= prmINI.ReadFloat(prmSection,'goodness_threshold',FGoodnessThreshold);
end;

procedure TOpPrmClusTree.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FMaxNbClusters,sizeof(FMaxNbClusters));
 prmStream.ReadBuffer(FNormalization,sizeof(FNormalization));
 prmStream.ReadBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.ReadBuffer(FLeavesSizeAfterSplit,sizeof(FLeavesSizeAfterSplit));
 prmStream.ReadBuffer(FMaxDepth,sizeof(FMaxDepth));
 prmStream.ReadBuffer(FGoodnessThreshold,sizeof(FGoodnessThreshold));
end;

procedure TOpPrmClusTree.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'max_clusters',FMaxNbClusters);
 prmINI.WriteInteger(prmSection,'normalization',FNormalization);
 prmINI.WriteInteger(prmSection,'size_before_split',FSizeBeforeSplit);
 prmINI.WriteInteger(prmSection,'size_leaves',FLeavesSizeAfterSplit);
 prmINI.WriteInteger(prmSection,'max_depth',FMaxDepth);
 prmINI.WriteFloat(prmSection,'goodness_threshold',FGoodnessThreshold);
end;

procedure TOpPrmClusTree.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FMaxNbClusters,sizeof(FMaxNbClusters));
 prmStream.WriteBuffer(FNormalization,sizeof(FNormalization));
 prmStream.WriteBuffer(FSizeBeforeSplit,sizeof(FSizeBeforeSplit));
 prmStream.WriteBuffer(FLeavesSizeAfterSplit,sizeof(FLeavesSizeAfterSplit));
 prmStream.WriteBuffer(FMaxDepth,sizeof(FMaxDepth));
 prmStream.WriteBuffer(FGoodnessThreshold,sizeof(FGoodnessThreshold));
end;

procedure TOpPrmClusTree.SetDefaultParameters;
begin
 inherited;
 //nombre de clusters maximum
 FMaxNbClusters:= 10;
 //normalisation -- non au dpart
 FNormalization:= 0;
 //paramtres ARBRE
 FSizeBeforeSplit:= 10;
 FLeavesSizeAfterSplit:= 5;
 //profondeur max.
 FMaxDepth:= 5;
 //gain min. en pourcentage de l'inertie totale
 FGoodnessThreshold:= 2.0;
end;

initialization
 RegisterClass(TGenClusTree);
end.
