(***********************************************************)
(* UCompSpvKNN.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(***********************************************************)

{
@abstract(k-ppv - Recherche exhaustive)
@author(Ricco)
@created(12/01/2004)
La recherche du plus proche voisin est le point faible O(n). On
pourrait finasser selon deux axes : (a) rduire le nombre d'individus,
(b) utiliser des heuristiques plus puissantes, style subdivision de l'espace en
zones cf. KdTree,R-Tree etc.
La distance utilise ici est la HEOM (Wilson-Martinez, JAIR'97). Voir plus tard l'extension  la VDM, HVDM, etc.
}
unit UCompSpvKNN;

interface

USES
        Forms,
        Classes,IniFiles,
        UCompDefinition,
        UCompSpvLDefinition,
        UOperatorDefinition,
        UDatasetDefinition,
        UDatasetExamples,
        UCalcDistribution,
        UCalcSpvStructScore;        

TYPE
        {le gnrateur de composant Supervised}
        TMLGCompKNN = class(TMLGenCompSpvLearning)
                      protected
                      procedure   GenCompInitializations(); override;
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

        {composant}
        TMLCompSpvKNN = class(TMLCompSpvLearning)
                        protected
                        function    getClassOperator: TClassOperator; override;
                        end;

        {l'oprateur}
        TOpSpvKNN = class(TOpSpvLearning)
                    protected
                    function    getClassParameter: TClassOperatorParameter; override;
                    function    getClassSpvLearning(): TClassCalcSpvLearning; override;
                    end;

        {type de normalisation des distances -- 0 : HEOM, 1 : euclidian non pondr}
        TEnumKNNDistNormalization = (knnDistHEOM,knnDistEuclidian);

CONST
        strKNNDistNomalization : array[TEnumKNNDistNormalization] of string = ('HEOM', 'Euclidian');

TYPE
        {paramtre de l'oprateur}
        TOpPrmSpvKNN = class(TOpPrmSpvLearning)
                       private
                       {nombre de voisins  considrer}
                       FNbNeighbour: integer;
                       {type de normalisation de la distance  utiliser}
                       FNormalization: TEnumKNNDistNormalization;
                       protected
                       procedure   SetDefaultParameters(); override;
                       function    CreateDlgParameters(): TForm; override;
                       public
                       function    getHTMLParameters(): string; override;
                       procedure   LoadFromStream(prmStream: TStream); override;
                       procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                       procedure   SaveToStream(prmStream: TStream); override;
                       procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                       property    NbNeighbour: integer read FNbNeighbour write FNbNeighbour;
                       property    DistNormalization: TEnumKNNDistNormalization read FNormalization write FNormalization;
                       end;

        {classe de calcul }
        TCalcSpvKNN = class(TCalcSpvLearning)
                      private
                      {tableau des min}
                      FTabMin: array of TTypeContinue;
                      {tableau des max}
                      FTabMax: array of TTypeContinue;
                      {Tableau des carts}
                      FTabEcarts: array of TTypeContinue;
                      {les individus d'apprentissage}
                      FRefExamples: TExamples;
                      {tableau des voisins - leur n}
                      FNeighbours: TExamples;
                      {tableau des distances aux voisins}
                      FTabDistNeighbour: array of double;
                      {nombre de voisins  considrer}
                      FNbNeighbour: integer;
                      {la distribution en sortie}
                      FTabFreqNeighbour: TTabFrequence;
                      {distance entre deux individus}
                      function    distance(e1,e2: integer): double;
                      {vider les tableaux de distance}
                      procedure   resetTabNeighbour(example: integer);
                      {qui est le plus loign ?}
                      function    getFarwest(): integer;
                      protected
                      procedure   createStructures(); override;
                      procedure   destroyStructures(); override;
                      function    coreLearning(examples: TExamples): boolean; override;
                      public
                      procedure   getScore(example: integer; var postProba: TTabScore); override;
                      end;

implementation

uses
        SysUtils,
        UStringsResources, UConstConfiguration, UDlgOpPrmSpvKNN;

{ TMLGCompKNN }

procedure TMLGCompKNN.GenCompInitializations;
begin
 FMLComp:= mlcSpvLearning;
 //FMLNumIcon:= 35;
 //FMLCompName:= str_comp_name_spvl_knn;
 //FMLBitmapFileName:= 'MLSpvKNN.bmp';
end;

function TMLGCompKNN.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompSpvKNN;
end;

{ TMLCompSpvKNN }

function TMLCompSpvKNN.getClassOperator: TClassOperator;
begin
 result:= TOpSpvKNN;
end;

{ TOpSpvRBF }

function TOpSpvKNN.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSpvKNN;
end;

function TOpSpvKNN.getClassSpvLearning: TClassCalcSpvLearning;
begin
 result:= TCalcSpvKNN;
end;

{ TOpPrmSpvMLP }

function TOpPrmSpvKNN.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmSpvKNN.CreateFromOpPrm(self);
end;

function TOpPrmSpvKNN.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>k-NN parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Neighbors</TD><TD align=right>%d</TD></TR>',[FNbNeighbour]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Distance</TD><TD align=right>%s</TD></TR>',[strKNNDistNomalization[FNormalization]]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmSpvKNN.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FNbNeighbour:= prmINI.ReadInteger(prmSection,'neighbors',FNbNeighbour);
 FNormalization:= TEnumKNNDistNormalization(prmINI.ReadInteger(prmSection,'dist_normalization',ord(FNormalization)));
end;

procedure TOpPrmSpvKNN.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FNbNeighbour,sizeof(FNbNeighbour));
 prmStream.ReadBuffer(FNormalization,sizeof(FNormalization));
end;

procedure TOpPrmSpvKNN.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'neighbors',FNbNeighbour);
 prmINI.WriteInteger(prmSection,'dist_normalization',ord(FNormalization));
end;

procedure TOpPrmSpvKNN.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FNbNeighbour,sizeof(FNbNeighbour));
 prmStream.WriteBuffer(FNormalization,sizeof(FNormalization));
end;

procedure TOpPrmSpvKNN.SetDefaultParameters;
begin
 FNbNeighbour:= 5;
 FNormalization:= knnDistHEOM;
end;

{ TCalcSpvKNN }

procedure TCalcSpvKNN.getScore(example: integer; var postProba: TTabScore);
var kMax: integer;
    d,dMax: double;
    i: integer;
begin
 //initialiser les tableaux avec les K premiers
 self.resetTabNeighbour(example);
 //trouver celui qui est le plus loign
 kMax:= self.getFarwest();
 dMax:= FTabDistNeighbour[kMax];
 //passer le reste des individus
 for i:= succ(FNbNeighbour) to FRefExamples.Size do
  begin
   d:= self.distance(example,FRefExamples.Number[i]);
   //s'il est plus proche de example que le recens le plus loign
   if (d<dMax)
    then
     begin
      FNeighbours.Number[kMax]:= FRefExamples.Number[i];
      FTabDistNeighbour[kMax]:= d;
      //retrouver le plus loign
      kMax:= self.getFarwest();
      dMax:= FTabDistNeighbour[kMax];
     end;
  end;
 //calculer la distribution en sortie
 //non pondr par les distances
 FTabFreqNeighbour.Refresh(FNeighbours);
 //trouver le max
 postProba.recupFromTabFrequence(FTabFreqNeighBour);
end;

function TCalcSpvKNN.coreLearning(examples: TExamples): boolean;
var i,j: integer;
    att: TAttribute;
    v: TTypeContinue;
begin
 result:= true;
 TRY
 //rechercher les extrmes pour les descripteurs continus
 for j:= 0 to pred(descriptors.Count) do
  begin
   att:= descriptors.Attribute[j];
   if att.isCategory(caContinue)
    then
     begin
      //sur tous les exemples pour dterminer les extrmes
      //a vite de les recalculer en classement
      for i:= 1 to att.Size do
       begin
        v:= att.cValue[i];
        if (v<FTabMin[j])
         then FTabMin[j]:= v;
        if (v>FTabMax[j])
         then FTabMax[j]:= v;
       end;
     end;
    //ecart entre les extrmes
    FTabEcarts[j]:= FTabMax[j]-FTabMin[j];
  end;
 //rcuprer les individus de l'ensemble de rfrences (apprentissage)
 //un ventuel filtrage est possible ici !!! style percolation, etc.
 FRefExamples.Copy(examples);
 EXCEPT
 result:= false;
 END;
end;

procedure TCalcSpvKNN.createStructures;
var i: integer;
begin
 inherited;
 FNbNeighbour:= (OpPrmSpv as TOpPrmSpvKNN).NbNeighbour;
 setLength(FTabMin,descriptors.Count);
 setLength(FTabMax,descriptors.Count);
 setLength(FTabEcarts,descriptors.Count); 
 for i:= 0 to pred(descriptors.Count) do
  begin
   FTabMin[i]:= +1.0e38;
   FTabMax[i]:= -1.0e38;
  end;
 //individus de rfrence
 FRefExamples:= TExamples.Create(0);
 //tableau des distances
 FNeighbours:= TExamples.Create(FNbNeighbour);
 setLength(FTabDistNeighbour,succ(FNbNeighbour));
 FTabFreqNeighbour:= TTabFrequence.CreateFromAtt(self.ClassAttribute,NIL); 
end;

procedure TCalcSpvKNN.destroyStructures;
begin
 inherited;
 setLength(FTabMin,0);
 setLength(FTabMax,0);
 setLength(FTabEcarts,0); 
 FRefExamples.Free;
 FNeighbours.Free;
 setLength(FTabDistNeighbour,0);
 FTabFreqNeighbour.Free;
end;

function TCalcSpvKNN.distance(e1, e2: integer): double;
var j: integer;
    att: TAttribute;
    s,d: double;
begin
 s:= 0.0;
 for j:= 0 to pred(descriptors.Count) do
  begin
   att:= descriptors.Attribute[j];
   if att.isCategory(cAContinue)
    //new -- 24/03/2005 -- rn_diff ou euclidian ou ... selon le paramtrage
    //-- la diffrenciation est uniquement sur les attributs continus pour l'instant
    then
     begin
      case (self.OpPrmSpv as TOpPrmSpvKNN).DistNormalization of
       knnDistEuclidian : d:= 1.0*SQR(att.cValue[e1]-att.cValue[e2])
       else d:= 1.0/FTabEcarts[j]*abs(att.cValue[e1]-att.cValue[e2]);
      end;
     end
    // ***
    //overlapp
    else d:= ord(att.dValue[e1]<>att.dValue[e2]);
   s:= s+d;
  end;
 result:= s;
end;

function TCalcSpvKNN.getFarwest: integer;
var k,kMax: integer;
    v,vMax: double;
begin
 kMax:= 1;
 vMax:= FTabDistNeighbour[1];
 for k:= 2 to FNbNeighbour do
  begin
   v:= FTabDistNeighbour[k];
   if (v>vMax)
    then
     begin
      vMax:= v;
      kMax:= k;
     end;
  end;
 result:= kMax;
end;

procedure TCalcSpvKNN.resetTabNeighbour(example: integer);
var k: integer;
begin
 for k:= 1 to FNbNeighbour do
  begin
   FNeighbours.Number[k]:= FRefExamples.Number[k];
   FTabDistNeighbour[k]:= self.distance(example,FNeighbours.Number[k]);
  end;
end;

initialization
 RegisterClass(TMLGCompKNN);
end.

