(*************************************************************************)
(* UCompClusteringDefinition.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(*************************************************************************)

{
@abstract(Composant de base pour le clustering)
@author(Ricco)
@created(12/01/2004)
On reprend le modle qui a t mis en place pour la rgression.

(28/09/03) because LVQ, les attributs de calculs sont devenus les inputs, dans les cas
des mthodes monothtiques, on utilisera les attributs illustratifs pour dcrire les partitions. 
}
unit UCompClusteringDefinition;

interface

uses
        UCompManageDataset,
        UDatasetDefinition,
        UDatasetImplementation,
        UOperatorDefinition,
        UDatasetExamples,
        UCalcStatDes,
        UCalcStatDesConditionnalDesc,
        UCalcRndGenerator,
        Classes, IniFiles;

TYPE
        {le composant de base clustering}
        TMLCompClustering = class(TMLCompLocalData)
                            private
                            {la variable cluster en sortie, il n'y en aura toujours q'un seul}
                            FAttCluster: TAttDiscrete;
                            protected
                            procedure   InitializeDataset(); override;
                            {lister les attributs gnrs}
                            function    GetLogResultDescription(): string; override;
                            {nom gnrique de l'attribut gnr -  surcharger absolument}
                            function    getGenericAttName(): string; virtual; abstract;
                            public
                            property    AttCluster: TAttDiscrete read FAttCluster;
                            end;

        TCalcClustering = class;
        TClassCalcClustering = class of TCalcClustering;

        {l'oprateur de clustering}
        TOperatorClustering = class(TOpLocalData)
                              private
                              {pointeur sur l'attribut cluster}
                              FAttCluster: TAttDiscrete;
                              {statistiques sur le cluster}
                              FStatCluster: TCalcStatDesDiscrete;
                              {pointeur sur les attributs target - elle est gnralement vide, sauf pour les LVQ}
                              FTargets: TLstAttributes;
                              {pointeur sur les attributs input}
                              FInputs: TLstAttributes;
                              {calculateur de cluster}
                              FCalcClustering: TCalcClustering;
                              {remplir l'attribut cluster -> FAttCluster}
                              function    FillClusterAttribute(): boolean;
                              {procdure de calcul pour construire la typologie}
                              function    BuildClusters(): boolean;
                              {valuer la qualit du clustering}
                              function    EvaluateClustering(): boolean;
                              protected
                              {dsalloue le calculateur}
                              procedure   ReInitialize(); override;
                              {rcuprer la classe du calculateur de clustering -  surcharger absolument}
                              function    getClassCalcClustering(): TClassCalcClustering; virtual; abstract;
                              {description HTML des clusters, cela peut tre standard comme ici ou plus "visuel" dans le cas des Kohonen par exemple}
                              function    getHTMLClustersDescriptions(): string; virtual;
                              {vrification du statut des attributs}
                              function    CheckAttributes(): boolean; override;
                              public
                              constructor Create(AOwner: TObject); override;
                              destructor  destroy; override;
                              {execution}
                              function    CoreExecute(): boolean; override;
                              {rapport}
                              function    getHTMLResultsSummary(): string; override;
                              {attributs targets}
                              property    Targets: TLstAttributes read FTargets;
                              {attributs inputs}
                              property    Inputs:  TLstAttributes read FInputs;
                              {attribut cluster}
                              property    AttCluster: TAttDiscrete read FAttCluster;
                              {statistiques sur les clusters}
                              property    StatCluster: TCalcStatDesDiscrete read FStatCluster;
                              {pointeur sur la calculateur}
                              property   CalcClustering: TCalcClustering read FCalcClustering;
                              end;

        {oprateur de clustering o les attributs targets sont tous continus}
        TOperatorClusteringContinue = class(TOperatorClustering)
                                      protected
                                      function    CheckAttributes(): boolean; override;
                                      end;  

        {paramtre de l'oprateur cluster}
        TOpPrmClustering = class(TOperatorParameter)
                           protected
                           //mode de gnration des valeurs alatoires
                           //deux modes possibles seulement : standard ou alatoire (user est laiss de ct)
                           FModeRndGenerator: TStartSeed;
                           //fixer les valeurs par dfaut
                           procedure   SetDefaultParameters(); override;
                           public
                           {chargement  partir d'un flux}
                           procedure   LoadFromStream(prmStream: TStream); override;
                           {chargement  partir d'un fichier INI}
                           procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                           {sauvegarde dans un flux}
                           procedure   SaveToStream(prmStream: TStream); override;
                           {sauvegarde dans un fichier INI}
                           procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                           {proprits}
                           property    ModeRndGenerator: TStartSeed read FModeRndGenerator write FModeRndGenerator;
                           end;

        {classe de calcul des clusters}
        TCalcClustering = class(TObject)
                          private
                          FTargets: TLstAttributes;
                          protected
                          {generateur interne de nombre alatoire}
                          FRndGenClustering: TRndGenerator;
                          {les attributs de l'tude}
                          FInputs : TLstAttributes;
                          FPrmCalc: TOpPrmClustering;
                          FAttClus: TAttDiscrete;
                          {envoyer la description HTML du clustering}
                          function  getHTMLClustering(): string; virtual; abstract;
                          {envoyer la description HTML de l'valuation}
                          function  GetHTMLEvaluation(): string; virtual; abstract;
                          public
                          {passer les paramtres et lancer le calcul}
                          constructor Create(prmTargets, prmInputs: TLstAttributes; OpPrm: TOpPrmClustering; prmAttClus: TAttDiscrete); virtual;
                          {dtruire}
                          destructor  Destroy(); override;
                          {lancer les calculs}
                          procedure   BuildClusters(prmExamples: TExamples); virtual; abstract;
                          {dcrit l'ensemble des clusters}
                          procedure FillClusAttDef(); virtual; abstract;
                          {renvoie le cluster d'un individu}
                          function  SetClusterExample(example: integer): TTypeDiscrete; virtual; abstract;
                          {valuer la qualit du clustering - cela dpend du type de target utilis entre autres}
                          procedure EvaluateClustering(prmExamples: TExamples); virtual; abstract;
                          {envoyer la description HTML}
                          function  GetHTMLResult(): string;
                          {paramtre de calcul}
                          property  PrmCalc: TOpPrmClustering read FPrmCalc;
                          {les targets}
                          property  Targets: TLstAttributes read FTargets;
                          {les inputs}
                          property  Inputs: TLstAttributes read FInputs;
                          {attribut cluster}
                          property  AttCluster: TAttDiscrete read FAttClus;
                          {gnrateur de nombre alatoire du calculateur}
                          property  RndGenClustering: TRndGenerator read FRndGenClustering;
                          end;

        {classe de calcul des clusters, targets continus, l'valuation est modifie}
        TCalcClusteringContinue = class(TCalcClustering)
                                  private
                                  FStatsInputs: TLstCalcStatDesContinuous;
                                  FStatsAnova: TLstStatDesCondANOVA;
                                  protected
                                  function    GetHTMLEvaluation(): string; override;
                                  public
                                  procedure   BuildClusters(prmExamples: TExamples); override;
                                  destructor  Destroy; override;
                                  procedure   EvaluateClustering(prmExamples: TExamples); override;
                                  property    StatsInputs: TLstCalcStatDesContinuous read FStatsInputs;
                                  end;
        

implementation

USES
        Sysutils, UConstConfiguration;

{ TMLCompClustering }

function TMLCompClustering.GetLogResultDescription: string;
begin
 result:= Format('%d attributes before, %d attributes after >> cluster : %s',
         [(self.Predecessor as TMLCompLocalData).OutputData.LstAtts[asAll].Count,self.OutputData.lstAtts[asAll].Count,
         FAttCluster.Name]);
end;

procedure TMLCompClustering.InitializeDataset;
begin
 inherited InitializeDataset();
 FAttCluster:= TAttDiscrete.Create(Format('Cluster_%s_%d',[self.getGenericAttName(),self.Number]),LocalDataset.Size);
 LocalDataset.Add(FAttCluster);
end;

{ TOperatorClustering }

function TOperatorClustering.BuildClusters: boolean;
var ok: boolean;
begin
 if assigned(FCalcClustering)
  then FCalcClustering.Free;
 ok:= true;
 try
 FCalcClustering:= self.getClassCalcClustering.Create(FTargets,FInputs,PrmOp as TOpPrmClustering,FAttCluster);
 FCalcClustering.BuildClusters(workdata.Examples);
 except
 ok:= false;
 end;
 result:= ok;
end;

function TOperatorClustering.CheckAttributes: boolean;
var ok: boolean;
begin
 ok:= true;
 FTargets:= NIL;
 FInputs := NIL;
 FInputs:= workdata.LstAtts[asInput];
 if (FInputs.Count=0)
  then ok:= false
  else FTargets:= workdata.LstAtts[asTarget];
 result:= ok;
end;

function TOperatorClustering.CoreExecute: boolean;
var ok: boolean;
begin
 TRY
  ok:= self.BuildClusters();
  if ok
   then ok:= self.FillClusterAttribute();
  if ok
    then ok:= self.EvaluateClustering();
 EXCEPT
 ok:= false;
 END;
 result:= ok;
end;

constructor TOperatorClustering.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 {rcuprer la variable cluster}
 FAttCluster:= (Aowner as TMLCompClustering).AttCluster;
end;

destructor TOperatorClustering.destroy;
begin
 if assigned(FCalcClustering)
  then FreeAndNil(FCalcClustering);
 if assigned(FStatCluster)
  then FreeAndNil(FStatCluster);
 inherited;
end;

function TOperatorClustering.EvaluateClustering: boolean;
var ok: boolean;
begin
 ok:= true;
 try
 //statistiques sur les clusters crs
 FStatCluster:= TCalcStatDesDiscrete.Create(FAttCluster,workdata.Examples);
 //cela peut tre enrichi plus tard !!!???
 FCalcClustering.EvaluateClustering(workdata.Examples);
 except
 ok:= false;
 end;
 result:= ok;
end;

function TOperatorClustering.FillClusterAttribute(): boolean;
var i: integer;
    ok: boolean;
begin
 ok:= true;
 TRY
 FAttCluster.LstValues.clear;
 //dcrire les clusters
 FCalcClustering.FillClusAttDef();
 //remplir l'attribut, sur tous les individus de la base
 for i:= 1 to FAttCluster.Size do
  FAttCluster.dValue[i]:= FCalcClustering.SetClusterExample(i);
 except
 ok:= false;
 end;
 result:= ok;
end;

function TOperatorClustering.getHTMLClustersDescriptions: string;
var s: string;
    k: integer;
begin
 //rsultat gnrique, bilan sur les clusters gnrs
 s:= '<P><H3>Clustering results</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_DATA_GRAY;
 s:= s+format('<TD>Clusters</TD><TH align=right>%d</TH><TH></TH></TR>',[FStatCluster.TabFreq.Size]);
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH>Cluster</TH><TH>Description</TH><TH>Size</TH></TR>';
 for k:= 1 to FStatCluster.TabFreq.Size do
  s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>cluster n%d</TD><TD align=center>%s</TD><TD align=right>%d</TD></TR>',[k,FStatCluster.Attribute.LstValues.GetDescription(k),FStatCluster.TabFreq.Value[k]]);
 s:= s+'</table>';
 result:= s;
end;

function TOperatorClustering.getHTMLResultsSummary: string;
begin
 result:= self.getHTMLClustersDescriptions()+
          FCalcClustering.GetHTMLResult();
end;

procedure TOperatorClustering.ReInitialize;
begin
 inherited ReInitialize();
 if assigned(FCalcClustering)
  then FreeAndNil(FCalcClustering);
 if assigned(FStatCluster)
  then FreeAndNil(FStatCluster);  
end;

{ TCalcClustering }

constructor TCalcClustering.Create(prmTargets, prmInputs: TLstAttributes;
  OpPrm: TOpPrmClustering; prmAttClus: TAttDiscrete);
begin
 inherited Create();
 FTargets:= prmTargets;
 FInputs:= prmInputs;
 FPrmCalc:= OpPrm;
 FAttClus:= prmAttClus;
 FRndGenClustering:= TRndGenerator.Create(PrmCalc.ModeRndGenerator,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
end;

destructor TCalcClustering.Destroy;
begin
 FRndGenClustering.Free();
 inherited Destroy();
end;

function TCalcClustering.GetHTMLResult: string;
var s: string;
begin
 s:= self.getHTMLClustering();
 s:= s+self.GetHTMLEvaluation();
 result:= s;
end;

{ TOperatorClusteringContinue }

function TOperatorClusteringContinue.CheckAttributes: boolean;
begin
 //vrification de type
 result:= inherited CheckAttributes()
          and Inputs.isAllCategory(caContinue);
end;

{ TCalcClusteringContinue }

procedure TCalcClusteringContinue.BuildClusters(prmExamples: TExamples);
begin
 //premire tape, les stats sur tous les individus
 FStatsInputs:= TLstCalcStatDesContinuous.Create(FInputs,prmExamples);
end;

destructor TCalcClusteringContinue.destroy;
begin
 FreeAndNil(FStatsInputs);
 FreeAndNil(FStatsAnova);
 inherited destroy;
end;

procedure TCalcClusteringContinue.EvaluateClustering(
  prmExamples: TExamples);
var i: integer;
    stat: TCalcSDCondDescANOVA;
begin
 FStatsAnova:= TLstStatDesCondANOVA.Create(NIL,NIL);
 for i:= 0 to pred(FInputs.Count) do
  begin
   stat:= TCalcSDCondDescANOVA.Create(FInputs.Attribute[i],FAttClus,prmExamples);
   FStatsAnova.AddStat(stat);
  end;
end;

function TCalcClusteringContinue.GetHTMLEvaluation: string;
begin
 result:= '<P><H3>Clusters vs. Target Attributes</H3>'+
          FStatsAnova.getHTMLResults();
end;

{ TOpPrmClustering }

procedure TOpPrmClustering.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FModeRndGenerator:= TStartSeed(prmINI.ReadInteger(prmSection,'rnd_generator',Ord(FModeRndGenerator)));
end;

procedure TOpPrmClustering.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FModeRndGenerator,sizeof(FModeRndGenerator));
end;

procedure TOpPrmClustering.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'rnd_generator',Ord(FModeRndGenerator));
end;

procedure TOpPrmClustering.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FModeRndGenerator,sizeof(FModeRndGenerator));
end;

procedure TOpPrmClustering.SetDefaultParameters;
begin
 //par dfaut -- on produit toujours la mme squence
 FModeRndGenerator:= seedStandard;
end;

end.
