(******************************************************************)
(* UCompClusteringLVQ.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(******************************************************************)

{
@abstract(Un clustering supervis - Learning Vector Quantizer)
@author(Ricco)
@created(12/01/2004)
}
unit UCompClusteringLVQ;

interface

USES
        Forms, Classes, Contnrs,
        IniFiles,
        UCompDefinition,
        UDatasetDefinition,
        UDatasetExamples,
        UOperatorDefinition,
        UCalcStatDes,
        UCompClusteringDefinition;

TYPE
        {gnrateur de composant}
        TGenClusLVQ = class(TMLGenComp)
                      protected
                      procedure   GenCompInitializations(); override;
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

        {composant LVQ}
        TMLCompClusLVQ = class(TMLCompClustering)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         function    getGenericAttName(): string; override;
                         end;

        {oprateur LVQ}
        TOpClusLVQ = class(TOperatorClusteringContinue)
                     protected
                     function    getClassParameter: TClassOperatorParameter; override;
                     function    getClassCalcClustering(): TClassCalcClustering; override;
                     function    CheckAttributes(): boolean; override;
                     end;

        {paramtre oprateur LVQ}
        TOpPrmLVQ = class(TOpPrmClustering)
                    private    
                    {Nombre de clusters par classe}
                    FNbClustersPerClass: integer;
                    {Taux d'apprentissage}
                    FLearningRate: double;
                    {nombre d'irrations}
                    FNbIterations: integer;
                    {normalisation pour le calcul de la distance,
                    0 -> none,
                    1 -> variance}
                    FNormalization: integer;
                    protected
                    function    CreateDlgParameters(): TForm; override;
                    procedure   SetDefaultParameters(); override;
                    public
                    procedure   LoadFromStream(prmStream: TStream); override;
                    procedure   SaveToStream(prmStream: TStream); override;
                    procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                    procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                    function    getHTMLParameters(): string; override;
                    property NbClustersPerClass: integer read FNbClustersPerClass write FNbClustersPerClass;
                    property LearningRate: double read FLearningRate write FLearningRate;
                    property NbIterations: integer read FNbIterations write FNbIterations;
                    property Normalization: integer read FNormalization write FNormalization;    
                    end;

        {dclaration forward}
        TCalcLVQ = class;

        {tableau d'infos pour un noeud LVQ}
        TTabNodeLVQ = array of double;

        {un noeud LVQ}
        TNodeLVQ = class(TObject)
                   private
                   {Computer associated}
                   FCalcLVQ: TCalcLVQ;
                   {coordonne du noeud}
                   FCoordNode: TTabNodeLVQ;
                   {valeur de classe associe}
                   FClassValue: TTypeDiscrete;
                   public
                   {initialiser}
                   constructor Create(prmCalcLVQ: TCalcLVQ; firstExample: integer);
                   {dtruire}
                   destructor  Destroy; override;
                   {calculer la distance}
                   function    distance(example: integer): double;
                   {recalculer les coordonnes - raprochement ou loignement selon la valeur de classvalue}
                   function    refreshNode(example: integer): integer;
                   {valeur de classe associe}
                   property    ClassValue: TTypeDiscrete read FClassValue;
                   {coordonnes}
                   property    CoordNode: TTabNodeLVQ read FCoordNode;
                   end;

        {un ensemble de noeuds LVQ}
        TSetNodesLVQ = class
                       private
                       {calculateur LVQ}
                       FCalcLVQ: TCalcLVQ;
                       {liste interne des noeuds}
                       FLstNodes: TObjectList;
                       {calculer le nombre de noeuds}
                       function   getNbNodes(): integer;
                       {accder  un noeud}
                       function   getNode(i: integer): TNodeLVQ;
                       public
                       {construire la structure}
                       constructor create(prmCalc: TCalcLVQ);
                       {dtruire la structure}
                       destructor  destroy; override;
                       {initialiser les noeuds}
                       procedure   initialisation(examples: TExamples);
                       {chercher le noeud le plus proche}
                       function    getNearestNode(example: integer; var numero: integer): TNodeLVQ;
                       {un noeud}
                       property   Node[i: integer]: TNodeLVQ read getNode;
                       {nb de noeuds}
                       property   Count: integer read getNbNodes;
                       end;

        {calculateur}
        TCalcLVQ = class(TCalcClusteringContinue)
                   private
                   {attribut de rfrence}
                   FAttRef: TAttribute;
                   {tableau des variances pour la pondration}
                   FVariances: TTabNodeLVQ;
                   {taux d'apprentissage courant}
                   FCurLRate: double;
                   {structure LVQ}
                   FSetLVQ: TSetNodesLVQ;
                   protected
                   function    getHTMLClustering(): string; override;
                   public
                   destructor  Destroy; override;
                   procedure   BuildClusters(prmExamples: TExamples); override;
                   procedure   FillClusAttDef(); override;
                   function    SetClusterExample(example: integer): TTypeDiscrete; override;
                   property Variances: TTabNodeLVQ read FVariances;
                   property AttRef: TAttribute read FAttRef;
                   property CurLRate: double read FCurLRate;
                   end;

implementation

uses
        Sysutils,
        UStringsResources, UDlgOpPrmClusteringLVQ, UConstConfiguration,
        ULogFile, UCalcRndGenerator;

{ TGenClusLVQ }

procedure TGenClusLVQ.GenCompInitializations;
begin
 FMLComp:= mlcClustering;
 //FMLNumIcon:= 37;
 //FMLCompName:= str_comp_name_lvq;
 //FMLBitmapFileName:= 'MLClusteringLVQ.bmp';
end;

function TGenClusLVQ.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompClusLVQ;
end;

{ TMLCompClusLVQ }

function TMLCompClusLVQ.getClassOperator: TClassOperator;
begin
 result:= TOpClusLVQ;
end;

function TMLCompClusLVQ.getGenericAttName: string;
begin
 result:= 'LVQ';
end;

{ TOpClusLVQ }

function TOpClusLVQ.CheckAttributes: boolean;
begin
 result:= inherited CheckAttributes()
          and (Targets.Count=1)
          and (Targets.isAllCategory(caDiscrete)); 
end;

function TOpClusLVQ.getClassCalcClustering: TClassCalcClustering;
begin
 result:= TCalcLVQ;
end;

function TOpClusLVQ.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmLVQ;
end;

{ TOpPrmLVQ }

function TOpPrmLVQ.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmClusteringLVQ.CreateFromOpPrm(self);
end;

function TOpPrmLVQ.getHTMLParameters: string;
var s,sPrm: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>LVQ parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Clusters per class</TD><TD align="right">%d</TD></TR>',[FNbClustersPerClass]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Learning rate</TD><TD align="right">%.2f</TD></TR>',[FLearningRate]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Clusters per class</TD><TD align="right">%d</TD></TR>',[FNbIterations]);
 case FNormalization of
  0: sPrm:= 'none';
  1: sPrm:= 'variance';
 end;
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Clusters per class</TD><TD align="right">%s</TD></TR>',[sPrm]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>%s</TD><TD align="right">%s</TD></TR>',['Seed random generator',StartSeedDescription[self.FModeRndGenerator]]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmLVQ.LoadFromINI(prmSection: string; prmINI: TMemIniFile);
begin
 inherited;
 FNbClustersPerClass:= prmINI.ReadInteger(prmSection,'clusters_per_class',FNbClustersPerClass);
 FLearningRate:= prmINI.ReadFloat(prmSection,'learning_rate',FLearningRate);
 FNbIterations:= prmINI.ReadInteger(prmSection,'nb_iterations',FNbIterations);
 FNormalization:= prmINI.ReadInteger(prmSection,'normalization',FNormalization);
end;

procedure TOpPrmLVQ.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FNbClustersPerClass,sizeof(FNbClustersPerClass));
 prmStream.ReadBuffer(FLearningRate,sizeof(FLearningRate));
 prmStream.ReadBuffer(FNbIterations,sizeof(FNbIterations));
 prmStream.ReadBuffer(FNormalization,sizeof(FNormalization));
end;

procedure TOpPrmLVQ.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'clusters_per_class',FNbClustersPerClass);
 prmINI.WriteFloat(prmSection,'learning_rate',FLearningRate);
 prmINI.WriteInteger(prmSection,'nb_iterations',FNbIterations);
 prmINI.WriteInteger(prmSection,'normalization',FNormalization);
end;

procedure TOpPrmLVQ.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FNbClustersPerClass,sizeof(FNbClustersPerClass));
 prmStream.WriteBuffer(FLearningRate,sizeof(FLearningRate));
 prmStream.WriteBuffer(FNbIterations,sizeof(FNbIterations));
 prmStream.WriteBuffer(FNormalization,sizeof(FNormalization));
end;

procedure TOpPrmLVQ.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 {Nombre de clusters par classe}
 FNbClustersPerClass:= 3;
 {Taux d'apprentissage}
 FLearningRate:= 0.15;
 {nombre d'irrations}
 FNbIterations:= 10;
 {normalisation pour le calcul de la distance: 0 -> none, 1 -> variance}
 FNormalization:= 1;
end;

{ TNodeLVQ }

constructor TNodeLVQ.Create(prmCalcLVQ: TCalcLVQ; firstExample: integer);
var j: integer;
begin
 inherited Create();
 FCalcLVQ:= prmCalcLVQ;
 //tableau des coordonnes
 setLength(FCoordNode,FCalcLVQ.Inputs.Count);
 //copier les valeurs
 for j:= 0 to pred(FCalcLVQ.Inputs.Count) do
  FCoordNode[j]:= FCalcLVQ.Inputs.Attribute[j].cValue[firstExample];
 //la valeur de classe associe
 FClassValue:= FCalcLVQ.AttRef.dValue[firstExample];
 //afficher
 TraceLog.WriteToLogFile(format('LVQ >> initialize a node of %d',[FClassValue]));
end;

destructor TNodeLVQ.Destroy;
begin
 setLength(FCoordNode,0);
 inherited Destroy;
end;

function TNodeLVQ.distance(example: integer): double;
var j: integer;
    s,v: double;
begin
 s:= 0.0;
 for j:= 0 to pred(FCalcLVQ.Inputs.Count) do
  begin
   //pvm si une des variables est constante
   v:= 1.0/FCalcLVQ.Variances[j];
   v:= v*SQR(FCoordNode[j]-FCalcLVQ.Inputs.Attribute[j].cValue[example]);
   s:= s+v;
  end;
 result:= s;
end;

function TNodeLVQ.refreshNode(example: integer): integer;
var signRefresh,ecart: double;
    j: integer;
begin
 //loigner ou rapprocher ?
 if (FCalcLVQ.AttRef.dValue[example] = FClassValue)
  then signRefresh:= +1.0
  else signRefresh:= -1.0;
 //pour chaque attribut
 for j:= 0 to pred(FCalcLVQ.Inputs.Count) do
  begin
   //cart
   ecart:= FCalcLVQ.Inputs.Attribute[j].cValue[example]-FCoordNode[j];
   //mj des poids
   FCoordNode[j]:= FCoordNode[j]+signRefresh*FCalcLVQ.CurLRate*ecart;
  end;
 result:= ord(signRefresh<0);
end;

{ TSetNodesLVQ }

constructor TSetNodesLVQ.create(prmCalc: TCalcLVQ);
begin
 inherited Create();
 FCalcLVQ:= prmCalc;
 FLstNodes:= TObjectList.Create(TRUE);
end;

destructor TSetNodesLVQ.destroy;
begin
 if assigned(FLstNodes)
  then FreeAndNil(FLstNodes);
 inherited Destroy;
end;

function TSetNodesLVQ.getNbNodes: integer;
begin
 result:= FLstNodes.Count;
end;

function TSetNodesLVQ.getNearestNode(example: integer; var numero: integer): TNodeLVQ;
var i,iMin: integer;
    node,minNode: TNodeLVQ;
    d,dMin: double;
begin
 minNode:= nil;
 dMin:= +1.0e308;
 iMin:= 0;
 for i:= 0 to pred(self.Count) do
  begin
   node:= self.Node[i];
   d:= node.distance(example);
   if (d<dMin)
    then
     begin
      iMin:= i;
      dMin:= d;
      minNode:= node;
     end;
  end;
 numero:= succ(iMin);
 result:= minNode;
end;

function TSetNodesLVQ.getNode(i: integer): TNodeLVQ;
begin
 result:= FLstNodes.Items[i] as TNodeLVQ;
end;

procedure TSetNodesLVQ.initialisation(examples: TExamples);
var node: TNodeLVQ;
    k,i,nbClus: integer;
    dispExamples: TObjectList;
    ex: TExamples;
    tmp: TExamples;
begin
 nbClus:= (FCalcLVQ.PrmCalc as TOpPrmLVQ).NbClustersPerClass;
 tmp:= TExamples.Create(nbClus);
 //construire une liste d'individus par classe
 dispExamples:= examples.DispatchExamples(FCalcLVQ.AttRef);
 //puis echantillonner dans chaque sous-liste
 for k:= 0 to pred(dispExamples.Count) do
  begin
   ex:= dispExamples.Items[k] as TExamples;
   //nombre d'lments  extraire
   ex.Sampling(nbClus,tmp,(FCalcLVQ.PrmCalc as TOpPrmLVQ).ModeRndGenerator);
   //initialiser les noeuds LVQ
   for i:= 1 to tmp.Size do
    begin
     node:= TNodeLVQ.Create(FCalcLVQ,tmp.Number[i]);
     FLstNodes.Add(node);
    end;
  end;
 //vider les var. temporaires
 tmp.Free;
 dispExamples.Free;
end;

{ TCalcLVQ }

procedure TCalcLVQ.BuildClusters(prmExamples: TExamples);
var j,i: integer;
    workex: TExamples;
    nbIter: integer;
    reducLR: double;
    example: integer;
    node: TNodeLVQ;
    numNearest: integer;
    nbBadClassValue: integer;
begin
 inherited BuildClusters(prmExamples);
 //prparer les variables temporaires, pour l'instant ce sera celui dans Input
 FAttRef:= NIL;
 if (self.Targets.Count>0)
  then FAttRef:= self.Targets.Attribute[0] as TAttribute;
 if not(assigned(FAttRef))
  then RAISE Exception.Create('no attribute reference for LVQ')
  else
   begin
    setLength(FVariances,self.Inputs.Count);
    //calculer la variance utilise pour la normalisation
    if ((self.PrmCalc as TOpPrmLVQ).FNormalization = 1)
     then
      begin
       For j:= 0 to pred(self.Inputs.Count) do
        FVariances[j]:= TCalcStatDesContinuous(self.StatsInputs.Stat(j)).Variance;
      end
     else
      begin
       for j:= 0 to pred(self.Inputs.Count) do
        FVariances[j]:= 1.0;
      end;
    //initialiser les noeuds
    FSetLVQ:= TSetNodesLVQ.create(self);
    FSetLVQ.initialisation(prmExamples);
    //mlanger l'ordre des individus
    workex:= prmExamples.funcRandomizeExamples(PrmCalc.ModeRndGenerator);
    //rcuperer les paramtres de calcul
    FCurLRate:= (self.PrmCalc as TOpPrmLVQ).LearningRate;
    nbIter:= (self.PrmCalc as TOpPrmLVQ).NbIterations;
    reducLR:= FCurLRate/(1.0*nbIter);
    //tant que la mj a de sens
    while (nbIter>0) do
     begin
      nbBadClassValue:= 0;
      //faire passer tous les individus
      for i:= 1 to workex.Size do
       begin
        example:= workex.Number[i];
        node:= FSetLVQ.getNearestNode(example,numNearest);
        if assigned(node)
         then nbBadClassValue:= nbBadClassValue+node.refreshNode(example);
       end;
      TraceLog.WriteToLogFile(format('LVQ >> learning rate : %.8f -- bad affectations : %d',[FCurLRate,nbBadClassValue]));
      //rduire le taux d'apprentissage
      FCurLRate:= FCurLRate-reducLR;
      dec(nbIter);
     end;
    //fin des calculs
    workex.Free;
   end;
end;

destructor TCalcLVQ.Destroy;
begin
 setLength(FVariances,0);
 If assigned(FSetLvq)
  then FreeAndNil(FSetLvq);
 inherited;
end;

procedure TCalcLVQ.FillClusAttDef;
var i,j: integer;
begin
 FAttClus.LstValues.clear;
 //pour chaque classe et pour chaque cluster dans la classe
 for i:= 1 to FAttRef.nbValues do
   for j:= 1 to (self.PrmCalc as TOpPrmLVQ).NbClustersPerClass do
     FAttClus.LstValues.getValue(format('c_lvq_%d_%d',[i,j]));
end;

function TCalcLVQ.getHTMLClustering: string;
var s: string;
    k,i,j,n: integer;
    node: TNodeLVQ;
begin
 s:= '<P><H3>LVQ nodes</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 //en-tte, attribut de rfrence et inputs
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+format('<TH>%s</TH><TH colspan=%d>%s</TH></TR>',['Ref. Attribute',Inputs.Count,'Input attributes']);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY;
 s:= s+format('<TD align=center>%s</TD>',[FAttRef.Name]);
 for j:= 0 to pred(Inputs.Count) do
  s:= s+format('<TD align=center>%s</TD>',[Inputs.Attribute[j].Name]);
 s:= s+'</TR>';
 //les valeurs de stats
 s:= s+HTML_TABLE_COLOR_DATA_GREEN+'<TD>Global</TD>';
 for j:= 0 to pred(StatsInputs.Count) do
  s:= s+format('<TD align=right>%.2f</TD>',[TCalcStatDesContinuous(StatsInputs.Stat(j)).Average]);
 s:= s+'</TR>';
 //pour chaque valeur de classe
 n:= -1;
 for k:= 1 to FAttRef.nbValues do
  begin
   s:= s+HTML_TABLE_COLOR_HEADER_BLUE;
   s:= s+format('<TD>%s</TD><TD colspan=%d></TD></TR>',[FAttRef.LstValues.getDescription(k),Inputs.Count]);
   for i:= 1 to (PrmCalc as TOpPrmLVQ).NbClustersPerClass do
    begin
     inc(n);
     node:= FSetLVQ.Node[n];
     s:= s+HTML_TABLE_COLOR_DATA_BLUE;
     s:= s+format('<TD>Clus n%d</TD>',[i]);
     for j:= 0 to pred(Inputs.Count) do
      s:= s+format('<TD align=right>%.2f</TD>',[node.CoordNode[j]]);
     s:= s+'</TR>';
    end;
  end;
 s:= s+'</table>';
 result:= s;
end;

function TCalcLVQ.SetClusterExample(example: integer): TTypeDiscrete;
var numero: integer;
begin
 FSetLVQ.getNearestNode(example,numero);
 result:= BYTE(numero);
end;

initialization
 RegisterClass(TGenClusLVQ);
end.
