(**************************************************************************)
(* UCompFCRadialBasisFunction.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(**************************************************************************)

{
@abstract(Fonction  base radiale)
@author(Ricco)
@created(14/05/2005)

A partir d'une srie de clusters (gnrs pas K-Means par ex.), calculer
l'appartenance de chaque individu en utilisant un noyau gaussien.

L'ide est de gnraliser le RBF du rseau de neurones, on pourra par ex.
appliquer une rgression logistique par la suite, etc. --> la dimension
de Vapnik est infinie dans ce cas... (cf. les articles sur les SVM)

}

unit UCompFCRadialBasisFunction;

interface

USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFCDefinition,
        UOperatorDefinition,
        UCalcStatDes, UDatasetDefinition, UDatasetImplementation,
        UCalcStatDesConditionnalDesc;

TYPE
   //gnrateur
   TGenFCRbf = class(TMLGenComp)
               protected
               procedure   GenCompInitializations(); override;
               public
               function    GetClassMLComponent: TClassMLComponent; override;
               end;

   //composant
   TMLCompFCRbf = class(TMLCompFC)
                  protected
                  function    getClassOperator: TClassOperator; override; 
                  end;

   //tableau de proprits des clusters
   TTabClustersProperties = array of array of double;

   //oprateur
   TOpFCRbf = class(TOperatorFC)
              private
              //cluster de rfrence
              FClusterer: TAttDiscrete;
              //statistiques globales
              FStatsGlobal: TLstCalcStatDesContinuous;
              //statistiques locales
              FStatsLocal: TLstStatDesCondANOVA;
              //stats sur les axes gnrs
              FStatsNewAtts: TLstCalcStatDesContinuous;
              //calculer les statistiques
              procedure computeDescriptorsStats();
              //calculer les paramtres des clusters
              procedure computeClustersProperties(avg,std: TTabClustersProperties);
              protected
              function  CheckAttributes(): boolean; override;
              function  getClassParameter: TClassOperatorParameter; override;
              function  CoreExecute(): boolean; override;
              public
              destructor destroy(); override;
              function  getHTMLResultsSummary(): string; override;
              end;

   //paramtrage
   TPrmOpFCRbf = class(TOperatorPrmFC)
                 private
                 //type d'estimation de la variance :: locale (0) ou globale (1)
                 FStdDevEstimation: integer;
                 //utiliser la renormalization :: vrai ou faux
                 FUseRenormalization: boolean;
                 protected
                 function    CreateDlgParameters(): TForm; override;
                 procedure   SetDefaultParameters(); override;
                 public
                 function    getHTMLParameters(): string; override;
                 //
                 procedure   LoadFromStream(prmStream: TStream); override;
                 procedure   SaveToStream(prmStream: TStream); override;
                 procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                 procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                 //
                 property    StdDevEstimation: integer read FStdDevEstimation write FStdDevEstimation;
                 property    UseRenormalization: boolean read FUseRenormalization write FUseRenormalization; 
                 end;

implementation

USES
   Math, Sysutils, UConstConfiguration, UDlgOpPrmFCRadialBasisFunction;

CONST
   //seuil pour l'cart-type
   MIN_VALUE_STD_DEV : double = 1.0e-6;

   //texte estimation de la variance
   STR_STD_DEV_ESTIMATION : array[0..1] of string = ('Local','Global');

{ TGenFCRbf }

procedure TGenFCRbf.GenCompInitializations;
begin
 FMLComp:= mlcFeatureConstruction;
end;

function TGenFCRbf.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFCRbf;
end;

{ TMLCompFCRbf }

function TMLCompFCRbf.getClassOperator: TClassOperator;
begin
 result:= TOpFCRbf;
end;

{ TOpFCRbf }

function TOpFCRbf.CheckAttributes: boolean;
var ok: boolean;
begin
 //1 target discret
 ok:= (self.WorkData.LstAtts[asTarget].Count = 1) and self.WorkData.LstAtts[asTarget].isAllCategory(caDiscrete);
 //>0 input continus ou binaires
 ok:= ok and ((self.WorkData.LstAtts[asInput].Count > 0) and self.WorkData.LstAtts[asInput].isAllCategory(caQuasiContinue));
 //
 result:= ok;
end;

procedure TOpFCRbf.computeClustersProperties(avg,
  std: TTabClustersProperties);
var j,k: integer;
    stat: TCalcSDCondDesc;
    statRBF: TCalcStatDesContinuous;
    stdDev: double;
begin
 //pour chaque noyau
 for k:= 0 to pred(FClusterer.nbValues) do
  begin
   //pour chaque descripteur
   for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
    begin
     //pour l'attribut courant
     stat:= TCalcSDCondDesc(FStatsLocal.Stat(j));
     //pour le noyau courant
     statRBF:= stat.StatCond[k];
     //rcuprer la moyenne
     avg[k,j]:= statRBF.Average;
     //l'cart-type est un peu plus compliqu
     case (self.PrmOp as TPrmOpFCRbf).FStdDevEstimation of
      //globale
      1: stdDev:= (FStatsGlobal.Stat(j) as TCalcStatDesContinuous).StdDev
      //locale
      else
       begin
        if (statRBF.StdDev > 0)
         then stdDev:= statRBF.StdDev
         //on revient sur le global
         else stdDev:= (FStatsGlobal.Stat(j) as TCalcStatDesContinuous).StdDev;
       end;
     end;
    //tester quand mme
    if (stdDev < MIN_VALUE_STD_DEV) then stdDev:= 1.0;
    //affecter
    std[k,j]:= stdDev;
    end;
  end;
end;

procedure TOpFCRbf.computeDescriptorsStats;
var j: integer;
    att: TAttribute;
    stat: TCalcSDCondDescANOVA;
begin
 //calculer les statistiques globales
 if assigned(FStatsGlobal) then FStatsGlobal.Free();
 FStatsGlobal:= TLstCalcStatDesContinuous.Create(self.WorkData.LstAtts[asInput],self.WorkData.Examples);
 //et locales aux clusters
 if assigned(FStatsLocal) then FStatsLocal.Free();
 FStatsLocal:= TLstStatDesCondANOVA.Create(nil,nil);
 for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
  begin
   att:= self.WorkData.LstAtts[asInput].Attribute[j];
   stat:= TCalcSDCondDescANOVA.Create(att,FClusterer,self.WorkData.Examples);
   FStatsLocal.AddStat(stat);
  end;
end;

function TOpFCRbf.CoreExecute: boolean;
var i,j,k: integer;
    tabAvg,tabStdDev: TTabClustersProperties;
    newAtt: TAttContinue;
    tabValues: array of double;
    distance,value,sumValue: extended;
    useNormalization: boolean;
begin
 TRY
 //***********************************************************************
 //**** prparer les paramtres et calculer les stats
 //***********************************************************************

 //rcuprer le clusterer
 FClusterer:= self.workdata.LstAtts[asTarget].Attribute[0] as TAttDiscrete;
 //caluler les stats globales et locales aux noyaux
 self.computeDescriptorsStats();
 //appliquer les paramtres d'cart-type pour chaque noyau
 setLength(tabAvg,FClusterer.nbValues,self.WorkData.LstAtts[asInput].Count);
 setLength(tabStdDev,FClusterer.nbValues,self.WorkData.LstAtts[asInput].Count);
 self.computeClustersProperties(tabAvg,tabStdDev);

 //***********************************************************************
 //**** encoder les nouvelles variables
 //***********************************************************************

 //vider la liste des variables
 GenAtts.Clear();
 //crer une variable par cluster
 for k:= 1 to FClusterer.nbValues do
  begin
   newAtt:= TAttContinue.Create('Rbf_'+IntToStr(k)+'_'+IntToStr((self.MLOwner as TMLCompFCRbf).Number),FClusterer.Size);
   genAtts.Add(newAtt);
  end;

 //normalisation ?
 useNormalization:= (self.PrmOp as TPrmOpFCRbf).FUseRenormalization;

 //encoder chaque individu de la base
 setLength(tabValues,FClusterer.nbValues);
 //pour chaque individu
 for i:= 1 to FClusterer.Size do
  begin
   //travailler sur le noyau nk
   for k:= 0 to pred(FClusterer.nbValues) do
    begin
     distance:= 0.0;
     //et le descripteur j
     for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
      begin
       value:= self.WorkData.LstAtts[asInput].Attribute[j].cValue[i];
       //distance euclidienne pondre
       distance:= distance + SQR((value-tabAvg[k,j])/tabStdDev[k,j]);
      end;
     //diviser par le nombre de descripteurs -- a limite la "taille" des valeurs
     distance:= distance/(1.0*self.WorkData.LstAtts[asInput].Count);
     //passage  l'exponentiel -- dlicat si dbordement (!) d'o cette petite gymnastique
     value:= EXP(-0.5*distance);
     if (value < Math.MinDouble) then value:= 0.0;
     tabValues[k]:= value;
    end;

   //normalisation ?
   if useNormalization
    then
     begin
      sumValue:= Math.Sum(tabValues);
      if (sumValue > 0.0)
       then for k:= 0 to pred(FClusterer.nbValues) do
            tabValues[k]:= tabValues[k] / sumValue;
     end;

    //transmettre les valeurs aux variables -- enfin...
    for k:= 0 to pred(FClusterer.nbValues) do
     genAtts.Attribute[k].cValue[i]:= tabValues[k];
  end;

 //vider les var. temporaires
 Finalize(tabValues);
 Finalize(tabAvg);
 Finalize(tabStdDev);

 //calculer les stats sur les variables gnres
 if assigned(FStatsNewAtts) then FStatsNewAtts.Free();
 FStatsNewAtts:= TLstCalcStatDesContinuous.Create(GenAtts,self.WorkData.Examples);

 //and then...
 result:= true;
 EXCEPT
 result:= false;
 END;
end;

destructor TOpFCRbf.destroy;
begin
 if assigned(FStatsNewAtts) then FStatsNewAtts.Free();
 if assigned(FStatsGlobal) then FStatsGlobal.Free();
 if assigned(FStatsLocal) then FStatsLocal.Free();
 inherited;
end;

function TOpFCRbf.getClassParameter: TClassOperatorParameter;
begin
 result:= TPrmOpFCRbf;
end;

function TOpFCRbf.getHTMLResultsSummary: string;
var s: string;
begin
 s:= '<h3>RBF attribute generation</h3>';
 s:= s+FStatsNewAtts.getHTMLResults();
 result:= s;
end;

{ TPrmOpFCRbf }

function TPrmOpFCRbf.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmFCRadialBasisFunction.CreateFromOpPrm(self);
end;

function TPrmOpFCRbf.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<th colspan=2>Parameters</th></tr>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<td>Std-dev esimation</td><td>%s</td></tr>',[STR_STD_DEV_ESTIMATION[FStdDevEstimation]]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<td>Use renormalization</td><td>%d</td></tr>',[ord(FUseRenormalization)]);
 s:= s+'</table>';
 result:= s;
end;

procedure TPrmOpFCRbf.LoadFromINI(prmSection: string; prmINI: TMemIniFile);
begin
 FStdDevEstimation:= prmINI.ReadInteger(prmSection,'std_dev_estimation',FStdDevEstimation);
 FUseRenormalization:= prmINI.ReadBool(prmSection,'renormalization',FUseRenormalization);
end;

procedure TPrmOpFCRbf.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FStdDevEstimation,sizeof(FStdDevEstimation));
 prmStream.ReadBuffer(FUseRenormalization,sizeof(FUseRenormalization));
end;

procedure TPrmOpFCRbf.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'std_dev_estimation',FStdDevEstimation);
 prmINI.WriteBool(prmSection,'renormalization',FUseRenormalization);
end;

procedure TPrmOpFCRbf.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FStdDevEstimation,sizeof(FStdDevEstimation));
 prmStream.WriteBuffer(FUseRenormalization,sizeof(FUseRenormalization));
end;

procedure TPrmOpFCRbf.SetDefaultParameters;
begin
 //type d'estimation de la variance :: locale (0) ou globale (1)
 FStdDevEstimation:= 0;
 //utiliser la renormalization :: vrai ou faux
 FUseRenormalization:= true;
end;

initialization
 RegisterClass(TGenFCRbf);
end.
