(*******************************************************************)
(* UCompSpvPrototypeNN.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(1-NN o l'ensemble de rfrence est une srie de prototypes dfinies par un cluster)
@author(Ricco)
@created(12/01/2004)
cf. ouvrage de Hastie, Tibshirani & Friedman pp.411
les clusters peuvent tre dfinies par une variable discrte, qui peut tre la classe par ailleurs,
on a un comportement proche du LDA ou QDA dans ce cas.
On peut rellement aller plus loin si on veut par exemple intgrer les matrices de variance covariance par groupes
dans le calcul des distances... mais est-ce vraiment utile ?
}
unit UCompSpvPrototypeNN;

interface

USES
        Forms, Contnrs,
        Classes,IniFiles,
        UCompDefinition,
        UCompSpvLDefinition,
        UOperatorDefinition,
        UDatasetDefinition,
        UDatasetExamples,
        UCalcDistribution,
        UCalcStatDes,
        Matrices,
        UCalcSpvStructScore;

TYPE
        {le gnrateur de composant Supervised}
        TMLGCompProtoNN = class(TMLGenCompSpvLearning)
                          protected
                          procedure   GenCompInitializations(); override;
                          public
                          function    GetClassMLComponent: TClassMLComponent; override;
                          end;

        {composant}
        TMLCompSpvProtoNN = class(TMLCompSpvLearning)
                            protected
                            function    getClassOperator: TClassOperator; override;
                            end;

        {l'oprateur}
        TOpSpvProtoNN = class(TOpSpvLearningContinuous)
                        protected
                        function    getClassParameter: TClassOperatorParameter; override;
                        function    getClassSpvLearning(): TClassCalcSpvLearning; override;
                        end;

        {paramtre de l'oprateur}
        TOpPrmProtoNN  = class(TOpPrmSpvLearning)
                         private
                         {attribut indiquant les clusters}
                         FAttClusterName: string;
                         {normalisation : 0 -> rien, 1 -> variance locale, 2 -> variance globale}
                         FNormalization: integer;
                         protected
                         procedure   SetDefaultParameters(); override;
                         function    CreateDlgParameters(): TForm; override;
                         public
                         function    getHTMLParameters(): string; override;
                         procedure   LoadFromStream(prmStream: TStream); override;
                         procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                         procedure   SaveToStream(prmStream: TStream); override;
                         procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                         property    AttClusterName: string read FAttClusterName write FAttClusterName;
                         property    Normalization: integer read FNormalization write FNormalization;
                         end;

        {tableau d'infos - e.g. moyennes locales, etc.}
        TTabInfosProto = array of double;

        {dclaration forward}
        TCalcSpvProtoNN = class;

        {un prototype}
        TPrototypeNN = class(TObject)
                       private
                       {calculateur associ}
                       FCalcProto: TCalcSpvProtoNN;
                       {distribution de la classe}
                       FClassDist: TTabFrequence;
                       {Stats du prototype}
                       FStats: TLstCalcStatDesContinuous;
                       {la conclusion associe au prototype}
                       FClassValue: TTypeDiscrete;
                       {variance utilise pour la normalisation}
                       FVariances: TTabInfosProto;
                       {rcuprer les stats pour l'attribut j}
                       function    getStat(j: integer): TCalcStatDesContinuous;
                       public
                       constructor Create(prmCalcProto: TCalcSpvProtoNN);
                       destructor  Destroy; override;
                       procedure   populatePrototype(examples: TExamples);
                       {calculer la distance au prototype}
                       function    distance(example: integer): double;
                       property    ClassValue: TTypeDiscrete read FClassValue;
                       property    Stats: TLstCalcStatDesContinuous read FStats;
                       property    Stat[j: integer]: TCalcStatDesContinuous read getStat;
                       end;

        {un ensemble de prototypes}
        TSetPrototypeNN = class(TObject)
                          private
                          {liste des prototypes}
                          FLstProtos: TObjectList;
                          {calculateur}
                          FCalcProto: TCalcSpvProtoNN;
                          {renvoyer un prototype}
                          function    getProto(k: integer): TPrototypeNN;
                          {renvoyer le nombre de protoypes}
                          function    getCount(): integer;
                          public
                          constructor create(prmCalcProto: TCalcSpvProtoNN);
                          destructor  destroy; override;
                          procedure   populatePrototypes(allExamples: TObjectList);
                          {renvoyer le prototype le plus proche de l'exemple  classer}
                          function    getNearestProto(example: integer): TPrototypeNN;
                          property    proto[k: integer]: TPrototypeNN read getProto;
                          property    protoCount: integer read getCount;
                          end;

        {classe de calcul }
        TCalcSpvProtoNN = class(TCalcSpvLearning)
                          private      
                          {attribut de clustering}
                          FAttCluster: TAttribute;
                          {liste des prototypes}
                          FProtos: TSetPrototypeNN;
                          {inverse la matrice de var-covar pour le calcul des distances}
                          //FInvVCV: PMatrix;
                          {statistiques globales sur les attributs continus}
                          FStats: TLstCalcStatDesContinuous;
                          protected
                          procedure   createStructures(); override;
                          procedure   destroyStructures(); override;
                          function    beforeLearning(examples: TExamples): boolean; override;
                          function    coreLearning(examples: TExamples): boolean; override;
                          public
                          procedure   getScore(example: integer; var postProba: TTabScore); override;
                          // garder car on utilise directement l'info sur la conclusion sans passer par les distributions
                          procedure   classification(example: integer; var response: TTypeDiscrete); override;
                          property    AttCluster: TAttribute read FAttCluster;
                          property    Stats: TLstCalcStatDesContinuous read FStats;
                          //property    InvVCV: PMatrix read FInvVCV;
                          end;


implementation

uses
        Sysutils,
        UStringsResources, UConstConfiguration,
        UDlgOpPrmSpvPrototypeNN,
        UCalcMatrixToAttributes;

{ TMLGCompProtoNN }

procedure TMLGCompProtoNN.GenCompInitializations;
begin
 FMLComp:= mlcSpvLearning;
 //FMLNumIcon:= 36;
 //FMLCompName:= str_comp_name_spvl_pnn;
 //FMLBitmapFileName:= 'MLSpvPrototypeNN.bmp';
end;

function TMLGCompProtoNN.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompSpvProtoNN;
end;

{ TMLCompSpvProtoNN }

function TMLCompSpvProtoNN.getClassOperator: TClassOperator;
begin
 result:= TOpSpvProtoNN;
end;

{ TOpSpvProtoNN }

function TOpSpvProtoNN.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmProtoNN;
end;

function TOpSpvProtoNN.getClassSpvLearning: TClassCalcSpvLearning;
begin
 result:= TCalcSpvProtoNN;
end;

{ TOpPrmProtoNN }

function TOpPrmProtoNN.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmSpvPrototypeNN.CreateFromOpPrm(self);
end;

function TOpPrmProtoNN.getHTMLParameters: string;
var s: string;
    sTmp: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Prototype-NN parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Cluster attribute</TD><TD align=right>%s</TD></TR>',[FAttClusterName]);
 case self.Normalization of
  1: sTmp:= 'Local variance';
  2: sTmp:= 'Global variance'
  else sTmp:= 'none';
 end;
 s:=s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Distance normalization</TD><TD align=right>%s</TD></TR>',[sTmp]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmProtoNN.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FNormalization:= prmINI.ReadInteger(prmSection,'normalization',FNormalization);
 FAttClusterName:= prmINI.ReadString(prmSection,'cluster_name',FAttClusterName);
end;

procedure TOpPrmProtoNN.LoadFromStream(prmStream: TStream);
var l: integer;
begin
 prmStream.ReadBuffer(FNormalization,sizeof(FNormalization));
 prmStream.ReadBuffer(l,sizeof(l));
 if (l>0)
  then
   begin
    setLength(FAttClusterName,l);
    prmStream.ReadBuffer(FAttClusterName[1],l);
   end;
end;

procedure TOpPrmProtoNN.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'normalization',FNormalization);
 prmINI.WriteString(prmSection,'cluster_name',FAttClusterName);
end;

procedure TOpPrmProtoNN.SaveToStream(prmStream: TStream);
var l: integer;
begin
 prmStream.WriteBuffer(FNormalization,sizeof(FNormalization));
 l:= length(FAttClusterName);
 prmStream.WriteBuffer(l,sizeof(l));
 if (l>0)
  then prmStream.WriteBuffer(FAttClusterName[1],l);
end;

procedure TOpPrmProtoNN.SetDefaultParameters;
begin
 FAttClusterName:= '';
 FNormalization:= 1;//variance locale
end;

{ TPrototypeNN }

constructor TPrototypeNN.Create(prmCalcProto: TCalcSpvProtoNN);
begin
 inherited Create();
 FCalcProto:= prmCalcProto;
 FClassDist:= TTabFrequence.CreateFromAtt(prmCalcProto.ClassAttribute,NIL);
 FClassValue:= 0;
 setLength(FVariances,prmCalcProto.Descriptors.Count); 
end;

destructor TPrototypeNN.Destroy;
begin
 setLength(FVariances,0);
 if assigned(FStats)
  then FreeAndNil(FStats);
 FClassDist.Free;
 inherited destroy();
end;

function TPrototypeNN.distance(example: integer): double;
var sTotal,v: double;
    j: integer;
    att: TAttribute;
    stat: TCalcStatDesContinuous;
begin
 (*cas de l'inverse de la matrice var-covar comme pondration (mahalanobis)
 sTotal:= 0.0;
 for i:= 1 to FCalcProto.Descriptors.Count do
  begin
   sIntermed:= 0.0;
   for j:= 1 to FCalcProto.Descriptors.Count do
    begin
     att:= FCalcProto.Descriptors.Attribute[pred(j)];
     v:= att.cValue[example]-self.Stat[pred(j)].Average;
     sIntermed:= sIntermed+v*FCalcProto.InvVCV^[j]^[i];
    end;
   att:= FCalcProto.Descriptors.Attribute[pred(i)];
   v:= att.cValue[example]-self.Stat[pred(i)].Average;
   sTotal:= sTotal+sIntermed*v;
   //correction par les probas a priori ? de la classe courante
   //sTotal:= sTotal-2.0*ln(FCalcProto.StatClassAtt.TabFreq.Frequence[FClassValue]);
  end;
 *)
 sTotal:= 0.0;
 for j:= 0 to pred(FCalcProto.Descriptors.Count) do
  begin
   att:= FCalcProto.Descriptors.Attribute[j];
   stat:= self.Stat[j];
   v:= 1.0/FVariances[j]*SQR(att.cValue[example]-stat.Average);
   sTotal:= sTotal+v;
  end;
 result:= sTotal;
end;

function TPrototypeNN.getStat(j: integer): TCalcStatDesContinuous;
begin
 result:= TCalcStatDesContinuous(FStats.Stat(j));
end;

procedure TPrototypeNN.populatePrototype(examples: TExamples);
var prm: TOpPrmProtoNN;
    j: integer;
    v: double;
begin
 //distribution de la classe
 //inutile par la suite, sauf si on dcide de faire voter plusieurs prototypes,  voir...
 FClassDist.Refresh(examples);
 //conslusion associe
 FClassValue:= FClassDist.getIndexMaxValue();
 //calculer la coordonne du prototype
 FStats:= TLstCalcStatDesContinuous.Create(FCalcProto.Descriptors,examples);
 //dcider de la pondration  utiliser
 prm:= FCalcProto.OpPrmSpv as TOpPrmProtoNN;
 case prm.Normalization of
  //variance locale
  1: begin
      for j:= 0 to pred(FCalcProto.Descriptors.Count) do
       begin
        v:= TCalcStatDesContinuous(FStats.Stat(j)).Variance;
        if (v>0)
         then FVariances[j]:= v
         //variance globale dans les cas contraire
         else FVariances[j]:= TCalcStatDesContinuous(FCalcProto.Stats.Stat(j)).Variance;
       end;
     end;
  //variance globale
  2: begin
      for j:= 0 to pred(FCalcProto.Descriptors.Count) do
       FVariances[j]:= TCalcStatDesContinuous(FCalcProto.Stats.Stat(j)).Variance;
     end
  else
   begin
    for j:= 0 to pred(FCalcProto.Descriptors.Count) do
     FVariances[j]:= 1.0;
   end;
 end;
end;

{ TSetPrototypeNN }

constructor TSetPrototypeNN.create(prmCalcProto: TCalcSpvProtoNN);
begin
 inherited Create();
 FCalcProto:= prmCalcProto;
 FLstProtos:= TObjectList.Create(TRUE);
end;

destructor TSetPrototypeNN.destroy;
begin
 FLstProtos.Free;
 inherited destroy();
end;

function TSetPrototypeNN.getCount: integer;
begin
 result:= FLstProtos.Count;
end;

function TSetPrototypeNN.getNearestProto(example: integer): TPrototypeNN;
var v,vMin: double;
    i: integer;
    proto,protoMIN: TPrototypeNN;
begin
 protoMIN:= NIL;
 vMin:= +1.0e308;
 for i:= 0 to pred(FLstProtos.Count) do
  begin
   proto:= FlstProtos.Items[i] as TPrototypeNN;
   v:= proto.distance(example);
   if (v<vMin)
    then
     begin
      vMin:= v;
      protoMin:= proto;
     end;
  end;
 result:= protoMin;
end;

function TSetPrototypeNN.getProto(k: integer): TPrototypeNN;
begin
 result:= FLstProtos.Items[k] as TPrototypeNN;
end;

procedure TSetPrototypeNN.populatePrototypes(allExamples: TObjectList);
var proto: TPrototypeNN;
    examples: TExamples;
    k: integer;
begin
 for k:= 0 to pred(allExamples.Count) do
  begin
   examples:= allExamples.Items[k] as TExamples;
   proto:= TPrototypeNN.Create(FCalcProto);
   proto.populatePrototype(examples);
   FLstProtos.Add(proto); 
  end;
end;

{ TCalcSpvProtoNN }

function TCalcSpvProtoNN.beforeLearning(examples: TExamples): boolean;
begin
 result:= assigned(FAttCluster) and inherited BeforeLearning(examples);
end;

procedure TCalcSpvProtoNN.getScore(example: integer;
  var postProba: TTabScore);
var protoNearest: TPrototypeNN;
begin
 protoNearest:= FProtos.getNearestProto(example);
 postProba.recupFromTabFrequence(protoNearest.FClassDist);
end;

procedure TCalcSpvProtoNN.classification(example: integer;
  var response: TTypeDiscrete);
var protoNearest: TPrototypeNN;
begin
 protoNearest:= FProtos.getNearestProto(example);
 response:= protoNearest.ClassValue;
end;

function TCalcSpvProtoNN.coreLearning(examples: TExamples): boolean;
var lstExamples: TObjectList;
    //matIntraVCV,mvcv: PMatrix;
    //k,i,j: integer;
    //ex: TExamples;
    //v: double;
    //err: integer;
begin
 result:= true;
 TRY
 //stats descriptives sur tous les individus
 FStats:= TLstCalcStatDesContinuous.Create(descriptors,examples);
 //crer les prototypes
 lstExamples:= examples.DispatchExamples(FAttCluster);
 FProtos.populatePrototypes(lstExamples);
 lstExamples.Free;
 (*
 //calculer l'inverse de la matrice de var-covar
 //la matrice de variance covariance intra
 dimMatrix(matIntraVCV,Descriptors.Count,Descriptors.Count);
 //pour chaque modalit de l'attribut classe
 for k:= 1 to lstExamples.Count do
  begin
   ex:= lstExamples.Items[pred(k)] as TExamples;
   stats:= FProtos.proto[pred(k)].Stats;
   mvcv:= BuildMatVCV(ex,Descriptors,vcvNormCentered,stats);
   //addition
   v:= 1.0*ex.Size;
   for i:= 1 to Descriptors.Count do
    for j:= 1 to Descriptors.Count do
     matIntraVCV^[i]^[j]:= matIntraVCV^[i]^[j]+v*mvcv^[i]^[j];
   // optimiser plus tard, c'est dramatique toutes ces allocations/dsallocations inutiles
   delMatrix(mvcv,Descriptors.Count,Descriptors.Count);
  end;
 //calcul final, introduction du scalaire de correction
 v:= 1.0/(1.0*(examples.Size-self.AttCluster.nbValues));
 for i:= 1 to Descriptors.Count do
  for j:= 1 to Descriptors.Count do
   matIntraVCV^[i]^[j]:= matIntraVCV^[i]^[j]*v;
 //inversion
 dimMatrix(FInvVCV,Descriptors.Count,Descriptors.Count);
 err:= invMat(matIntraVCV,1,Descriptors.Count,FInvVCV);
 //dtruire
 delMatrix(matIntraVCV,descriptors.Count,descriptors.Count);
 lstExamples.Free;
 //vrifier l'inversion
 if (err=MAT_SINGUL)
  then raise Exception.Create('singular matrix');
 *)
 EXCEPT
 result:= false;
 END;
end;

procedure TCalcSpvProtoNN.createStructures;
begin
 FAttCluster:= FAllAttributes.GetFromName((OpPrmSpv as TOpPrmProtoNN).AttClusterName);
 if assigned(FAttCluster)
  then
   begin
    FProtos:= TSetPrototypeNN.create(self);
   end;
end;

procedure TCalcSpvProtoNN.destroyStructures;
begin
 if assigned(FProtos)
  then FreeAndNil(FProtos);
 //delMatrix(FInvVCV,descriptors.Count,descriptors.Count);
end;

initialization
 RegisterClass(TMLGCompProtoNN);
end.
