(******************************************************************)
(* UCompClusteringSOM.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(******************************************************************)

{
@abstract(Self Organisation Map de Kohonen - "Cartes de Kohonen")
@author(Ricco)
@created(12/01/2004)
}
unit UCompClusteringSOM;

interface

USES
        Forms, Classes, Contnrs,
        IniFiles,
        UCompDefinition,
        UDatasetDefinition,
        UDatasetExamples,
        UOperatorDefinition,
        UCalcStatDes,
        UCompClusteringDefinition;

TYPE
        {gnrateur de composant}
        TGenClusSOM = class(TMLGenComp)
                      protected
                      procedure   GenCompInitializations(); override;
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

        {composant SOM}
        TMLCompClusSOM = class(TMLCompClustering)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         function    getGenericAttName(): string; override;
                         end;

        {oprateur SOM}
        TOpClusSOM = class(TOperatorClusteringContinue)
                     protected
                     function    getClassParameter: TClassOperatorParameter; override;
                     function    getClassCalcClustering(): TClassCalcClustering; override;
                     function    getHTMLClustersDescriptions(): string; override;
                     end;

        {paramtre oprateur SOM}
        TOpPrmSOM = class(TOpPrmClustering)
                     private
                     {nombre de colonnes de la carte}
                     FColCount: integer;
                     {nombre de lignes de la carte}
                     FRowCount: integer;
                     {normalisation dans le calcul de la distance,
                     0 -> aucune,
                     1 -> pondr par la variance}
                     FNormalization: integer;
                     {valeur de dpart de la constante d'apprentissage}
                     FLearningRate: double;
                     {renvoie le nombre de clusters}
                     function    GetNbClusters(): integer;
                     protected
                     function    CreateDlgParameters(): TForm; override;
                     procedure   SetDefaultParameters(); override;
                     public
                     procedure   LoadFromStream(prmStream: TStream); override;
                     procedure   SaveToStream(prmStream: TStream); override;
                     procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                     procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                     function    getHTMLParameters(): string; override;
                     property    ColCount: integer read FColCount write FColCount;
                     property    RowCount: integer read FRowCount write FRowCount;
                     property    NbClusters: integer read getNbClusters;
                     property    Normalization: integer read FNormalization write FNormalization;
                     property    LearningRate: double read FLearningRate write FLearningRate;
                     end;

        {tableau d'informations sur une cellule}
        TTabCell = array of double;

        {petit forward}
        TCalcClusSOM = class;

        {une cellule de la carte topologique}
        TCellSOM = class(TObject)
                   private
                   {tableau des poids}
                   FTabWeight: TTabCell;
                   {les targets}
                   FTargets: TLstAttributes;
                   {tableau des variances}
                   FVariances: TTabCell;
                   {initialiser les poids}
                   procedure   InitializeWeight(prmCalc: TCalcClusSOM);
                   public
                   {initialisation}
                   constructor Create(prmTargets: TLstAttributes; prmAlgo: TOpPrmSOM; prmStats: TLstCalcStatDesContinuous; prmCalc: TCalcClusSOM);
                   {dtruire les tableaux internes}
                   destructor  Destroy; override;
                   {calcul de la distance - individu et poids courant}
                   function    Distance(example: integer): double;
                   {rafrachir les poids}
                   procedure   RefreshWeight(example: integer; prmLRate: double);
                   end;

        {Structure de carte topologique}
        TStructureSOM = class(TObject)
                        private
                        {paramtre de l'algo}
                        FPrm: TOpPrmSOM;
                        {nombre de lignes}
                        FColCount: integer;
                        {nombre de colonnes}
                        FRowCount: integer;
                        {liste de cellules}
                        FLstCells: TObjectList;
                        {accs  une cellule, format tableau}
                        function   getCellTab(i,j: integer): TCellSOM;
                        {accs  une cellule, format liste}
                        function   getCellLst(i: integer): TCellSOM;
                        {nombre de cellules}
                        function   getCellsCount(): integer;
                        public
                        {cration de la structure}
                        constructor Create(prmTargets: TLstAttributes; prm: TOpPrmSOM; prmStats: TLstCalcStatDesContinuous; prmCalc: TCalcClusSOM);
                        {dtruire la liste}
                        destructor  Destroy; override;
                        {rafrachir les poids pour un individu pass}
                        procedure   RefreshWeight(example: integer; prmLR: double; prmDVoisinage: integer);
                        {rcuprer les coordonnes du neurone le plus proche, et renvoie la distance associe}
                        function    getNearest(var iMax,jMax: integer; example: integer): double;
                        {accs aux cellules en tableau}
                        property    CellTab[i,j: integer]: TCellSOM read getCellTab;
                        {accs aux cellules en liste}
                        property    CellLst[i: integer]: TCellSOM read getCellLst;
                        {nombre de lignes}
                        property    RowCount: integer read FRowCount;
                        {nombre de colonnes}
                        property    ColCount: integer read FColCount;
                        {nombre de cellules}
                        property    CellsCount: integer read getCellsCount;
                        end;

        {la classe de calcul SOM}
        TCalcClusSOM = class(TCalcClusteringContinue)
                       private
                       {structure de carte}
                       FStrucSOM: TStructureSOM;
                       {ratio de l'inertie explique}
                       FRatioBSS: double;
                       {calculer l'inertie totale, n est la taille de l'chantillon}
                       function    ComputeGlobalInertia(n: double): double;
                       protected
                       {affichage topologique}
                       function    getHTMLClustering(): string; override;
                       public
                       destructor  Destroy; override;
                       procedure   BuildClusters(prmExamples: TExamples); override;
                       procedure   FillClusAttDef(); override;
                       function    SetClusterExample(example: integer): TTypeDiscrete; override;
                       procedure   EvaluateClustering(prmExamples: TExamples); override;
                       property    StrucSOM: TStructureSOM read FStrucSOM;
                       end; 


implementation

USES
        Sysutils, Math, ULogFile, UDlgOpPrmClusteringSOM,
        UConstConfiguration, UStringsResources, UCalcRndGenerator;

{ TGenClusSOM }

procedure TGenClusSOM.GenCompInitializations;
begin
 FMLComp:= mlcClustering;
 //FMLNumIcon:= 25;
 //FMLCompName:= str_comp_name_kohonen_som;
 //FMLBitmapFileName:= 'MLClusteringSOM.bmp';
end;

function TGenClusSOM.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompClusSOM;
end;

{ TMLCompClusSOM }

function TMLCompClusSOM.getClassOperator: TClassOperator;
begin
 result:= TOpClusSOM;
end;

function TMLCompClusSOM.getGenericAttName: string;
begin
 result:= 'SOM';
end;

{ TOpClusSOM }

function TOpClusSOM.getClassCalcClustering: TClassCalcClustering;
begin
 result:= TCalcClusSOM;
end;

function TOpClusSOM.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSOM;
end;

function TOpClusSOM.getHTMLClustersDescriptions: string;
var i,j,tmpN: integer;
    calc: TCalcClusSOM;
    s: string;
begin
 calc:= self.CalcClustering as TCalcClusSOM;
 //construire le MAP de SOM
 s:= '<P><H3>MAP Topology</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH width=50></TH>';
 //description des colonnes
 for j:= 1 to calc.StrucSOM.ColCount do
  s:= s+format('<TH width=50>%d</TH>',[j]);
 s:= s+'</TR>';
 for i:= 1 to calc.StrucSOM.RowCount do
  begin
   s:= s+format('<TH %s>%d</TH>',[HTML_BGCOLOR_HEADER_GRAY,i]);
   for j:= 1 to calc.StrucSOM.ColCount do
    begin
     tmpN:= self.StatCluster.TabFreq.Value[pred(i)*calc.StrucSOM.ColCount+j];
     s:= s+format('<TD align=center %s>%d</TD>',[HTML_BGCOLOR_DATA_GRAY,tmpN]);
    end;
   s:= s+'</TR>';
  end;
 s:= s+'</TABLE>';
 result:= s;
end;

{ TOpPrmSOM }

function TOpPrmSOM.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmClusKohonenSOM.CreateFromOpPrm(self);
end;

function TOpPrmSOM.getHTMLParameters: string;
var s,sPrm: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>SOM parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Columns</TD><TD align="right">%d</TD></TR>',[FColCount]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Rows</TD><TD align="right">%d</TD></TR>',[FRowCount]);
 case FNormalization of
  0: sPrm:= 'none';
  1: sPrm:= 'variance';
 end;
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Distance normalization</TD><TD align="right">%s</TD></TR>',[sPrm]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Learning rate</TD><TD align="right">%.2f</TD></TR>',[FLearningRate]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>%s</TD><TD align="right">%s</TD></TR>',['Seed random generator',StartSeedDescription[self.FModeRndGenerator]]); 
 s:= s+'</table>';
 result:= s;
end;

function TOpPrmSOM.GetNbClusters: integer;
begin
 result:= FRowCount*FColCount;
end;

procedure TOpPrmSOM.LoadFromINI(prmSection: string; prmINI: TMemIniFile);
begin
 inherited;
 FColCount:= prmINI.ReadInteger(prmSection,'col_count',FColCount);
 FRowCount:= prmINI.ReadInteger(prmSection,'row_count',FRowCount);
 FNormalization:= prmINI.ReadInteger(prmSection,'normalization',FNormalization);
 FLearningRate:= prmINI.ReadFloat(prmSection,'learning_rate',FLearningRate);
end;

procedure TOpPrmSOM.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FColCount,sizeof(FColCount));
 prmStream.ReadBuffer(FRowCount,sizeof(FRowCount));
 prmStream.ReadBuffer(FNormalization,sizeof(FNormalization));
 prmStream.ReadBuffer(FLearningRate,sizeof(FLearningRate));
end;

procedure TOpPrmSOM.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'col_count',FColCount);
 prmINI.WriteInteger(prmSection,'row_count',FRowCount);
 prmINI.WriteInteger(prmSection,'normalization',FNormalization);
 prmINI.WriteFloat(prmSection,'learning_rate',FLearningRate);
end;

procedure TOpPrmSOM.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FColCount,sizeof(FColCount));
 prmStream.WriteBuffer(FRowCount,sizeof(FRowCount));
 prmStream.WriteBuffer(FNormalization,sizeof(FNormalization));
 prmStream.WriteBuffer(FLearningRate,sizeof(FLearningRate));
end;

procedure TOpPrmSOM.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 FColCount:= 2;
 FRowCount:= 2;
 FNormalization:= 1;
 FLearningRate:= 0.2;
end;

{ TCalcClusSOM }

procedure TCalcClusSOM.BuildClusters(prmExamples: TExamples);
var prm: TOpPrmSOM;
    dVoisinage,d: integer;
    LR,gapLR: double;
    exRan: TExamples;
    i: integer;
begin
 prm:= prmCalc as TOpPrmSOM;
 //les calculs globaux
 inherited BuildClusters(prmExamples);
 //construire la structure de carte
 FStrucSOM:= TStructureSOM.Create(Inputs,prm,StatsInputs,self);
 //initialiser les paramtres de calcul
 //la distance de voisinage
 dVoisinage:= max(prm.RowCount,prm.ColCount) div 2;
 //gap de la constante d'apprentissage
 LR:= prm.LearningRate;
 gapLR:= LR/(1.0+dVoisinage);
 //mlanger les individus
 exRan:= prmExamples.funcRandomizeExamples(PrmCalc.ModeRndGenerator);
 //lancer le processus
 for d:= dVoisinage downto 0 do
  begin
   //infos logs
   TraceLog.WriteToLogFile(format('[kohonen] SOM process, neighborhood size %d, current learning rate %.4f',[d,LR]));
   //pour chaque individu
   for i:= 1 to exRan.Size do
    FStrucSom.RefreshWeight(exRan.Number[i],LR,d);
   //mj du learning rate, le passage  la taille de voisinage suivante est gr par la boucle FOR
   LR:= LR-gapLR;
  end;
 exRan.Free;
end;

function TCalcClusSOM.ComputeGlobalInertia(n: double): double;
var prm: TOpPrmSOM;
    j: integer;
    s: double;
begin
 prm:= PrmCalc as TOpPrmSOM;
 if (prm.Normalization = 1)
  //pondre par l'inverse de la variance => iterntie totale = dimension x n
  then s:= n*FInputs.Count
  //non pondre => somme des TSS, ou n x somme des variances
  else
   Begin
    s:= 0.0;
    for j:= 0 to pred(StatsInputs.Count) do
     s:= s+TCalcStatDesContinuous(StatsInputs.Stat(j)).TSS;
   end;
 result:= s;
end;

destructor TCalcClusSOM.Destroy;
begin
 if assigned(FStrucSOM)
  then FreeAndNil(FStrucSOM);
 inherited;
end;

procedure TCalcClusSOM.EvaluateClustering(prmExamples: TExamples);
var i,r,c: integer;
    TSS,WSS,d: double;
begin
 inherited EvaluateClustering(prmExamples);
 //calculer les inerties
 TSS:= self.ComputeGlobalInertia(prmExamples.Size);
 //les inerties wss
 WSS:= 0.0;
 for i:= 1 to prmExamples.Size do
  begin
   d:= self.StrucSOM.getNearest(r,c,prmExamples.Number[i]);
   WSS:= WSS+d;
  end;
 //ratio inertie explique
 FRatioBSS:= (TSS-WSS)/TSS;
end;

procedure TCalcClusSOM.FillClusAttDef;
var i,j: integer;
begin
 FAttClus.LstValues.clear;
 for i:= 1 to FStrucSOM.RowCount do
  for j:= 1 to FStrucSOM.ColCount do
   FAttClus.LstValues.getValue(format('c_som_%d_%d',[i,j]))
end;

function TCalcClusSOM.getHTMLClustering: string;
var s: string;
begin
 s:= '<P><H3>MAP Quality</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT
      +HTML_TABLE_COLOR_DATA_GREEN;
 s:= s+format('<TD>Ratio explained</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY+'</TD>',[FRatioBSS]);
 s:= s+'</TR>';
 s:= s+'</TABLE>';
 result:= s;
end;

function TCalcClusSOM.SetClusterExample(example: integer): TTypeDiscrete;
var iMax,jMax: integer;
begin
 FStrucSOM.getNearest(iMax,jMax,example);
 //resultat
 if (iMax>0) and (jMax>0)
  then result:= pred(iMax)*FStrucSOM.ColCount+jMax
  else result:= succ(FRndGenClustering.IRanMarRange(FStrucSOM.getCellsCount));
end;

{ TCellSOM }

constructor TCellSOM.Create(prmTargets: TLstAttributes; prmAlgo: TOpPrmSOM;
  prmStats: TLstCalcStatDesContinuous; prmCalc: TCalcClusSOM);
var j: integer;
begin
 inherited Create();
 FTargets:= prmTargets;
 //initialiser les tableaux locaux
 SetLength(FTabWeight,FTargets.Count);
 SetLength(FVariances,FTargets.Count);
 case prmAlgo.Normalization of
  1 : begin
       for j:= 0 to pred(FTargets.Count) do
        if (TCalcStatDesContinuous(prmStats.Stat(j)).Variance>0)
         then FVariances[j]:= TCalcStatDesContinuous(prmStats.Stat(j)).Variance
         //de toute manire elle ne psera jamais dans les calculs
         else FVariances[j]:= 1.0;
      end
  else
    begin
     for j:= 0 to pred(FTargets.Count) do
      FVariances[j]:= 1.0;
    end;
 end;
 self.InitializeWeight(prmCalc);
end;

destructor TCellSOM.Destroy;
begin
 SetLength(FTabWeight,0);
 SetLength(FVariances,0);
 inherited;
end;

function TCellSOM.Distance(example: integer): double;
var s: double;
    j: integer;
begin
 s:= 0.0;
 for j:= 0 to pred(FTargets.Count) do
  s:= s+1.0/FVariances[j]*SQR(FTabWeight[j]-FTargets.Attribute[j].cValue[example]);
 result:= s;
end;

procedure TCellSOM.InitializeWeight(prmCalc: TCalcClusSOM);
var j: integer;
begin
 for j:= 0 to pred(FTargets.Count) do
  FTabWeight[j]:= 0.01*prmCalc.RndGenClustering.RanMar();
end;

procedure TCellSOM.RefreshWeight(example: integer; prmLRate: double);
var j: integer;
begin
 for j:= 0 to pred(FTargets.Count) do
  FTabWeight[j]:= FTabWeight[j]+prmLRate*(FTargets.Attribute[j].cValue[example]-FTabWeight[j]);
end;

{ TStructureSOM }

constructor TStructureSOM.Create(prmTargets: TLstAttributes; prm: TOpPrmSOM; prmStats: TLstCalcStatDesContinuous; prmCalc: TCalcClusSOM);
var i: integer;
    cell: TCellSOM;
begin
 inherited Create();
 FLstCells:= TObjectList.Create(TRUE);
 FPrm:= prm;
 FRowCount:= prm.RowCount;
 FColCount:= prm.ColCount;
 //initialiser la structure
 for i:= 0 to pred(prm.GetNbClusters) do
  begin
   cell:= TCellSOM.Create(prmTargets,prm,prmStats,prmCalc);
   FLstCells.Add(cell);
  end;
end;

destructor TStructureSOM.Destroy;
begin
 FLstCells.Free;
 inherited;
end;

function TStructureSOM.getCellLst(i: integer): TCellSOM;
begin
 result:= FLstCells.Items[i] as TCellSOM;
end;

function TStructureSOM.getCellsCount: integer;
begin
 result:= FLstCells.Count;
end;

function TStructureSOM.getCellTab(i, j: integer): TCellSOM;
var tmp: integer;
begin
 tmp:= pred(i)*self.FColCount+j;
 result:= FLstCells.Items[pred(tmp)] as TCellSOM;
end;

function TStructureSOM.getNearest(var iMax, jMax: integer;
  example: integer): double;
var i,j: integer;
    d,dMin: double;
begin
 iMax:= -1;
 jMax:= -1;
 dMin:= +1.0e308;
 //chercher le neurone le plus proche
 for i:= 1 to self.RowCount do
  for j:= 1 to self.ColCount do
   begin
    d:= self.CellTab[i,j].Distance(example);
    if (d<dMin)
     then
      begin
       dMin:= d;
       iMax:= i;
       jMax:= j;
      end;
   end;
 result:= dMin;
end;

procedure TStructureSOM.RefreshWeight(example: integer; prmLR: double;
  prmDVoisinage: integer);
var i,j,iMax,jMax: integer;
    sd: integer;
begin
 //obligation de procder par un double passage ici !!! pas possible autrement
 self.getNearest(iMax,jMax,example);
 //mettre  jour les neurones situs dans le voisinage
 for i:= 1 to self.RowCount do
  for j:= 1 to self.ColCount do
   begin
    sd:= abs(i-iMax)+abs(j-jMax);
    //dans le voisinage -> mj des poids
    if (sd<=prmDVoisinage)
     then self.CellTab[i,j].RefreshWeight(example,prmLR);
   end;
end;

initialization
 RegisterClass(TGenClusSOM);
end.
