(******************************************************************)
(* UCompFCStandardize.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(******************************************************************)

{
@abstract(Standardisation des donnes)
@author(Ricco)
@created(16/04/2004)
Rduire ou centrer les donnes, on peut aussi utiliser la rduction par min-max.
(0) xs = (x-x_min)/(x_max-x_min)
(1) xs = (x-x_avg)
(2) xs = x/x_stddev
(3) xs = (x-x_avg)/x_stddev
}
unit UCompFCStandardize;

interface

USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFCDefinition,
        UOperatorDefinition,
        UCalcStatDes, UDatasetDefinition, UDatasetImplementation;

TYPE
        {gnrateur de standardisation}
        TGenFCStandardize = class(TMLGenComp)
                            protected
                            procedure   GenCompInitializations(); override;
                            public
                            function    GetClassMLComponent: TClassMLComponent; override;
                            end;

        {composant standardisation}
        TMLCompFCStandardize = class(TMLCompFC)
                               protected
                               function    getClassOperator: TClassOperator; override;
                               function    GetLogResultDescription(): string; override; 
                               end;

        {oprateur de standaridsation}
        TOpFCStandardize = class(TOperatorFC)
                           private
                           FStats: TLstCalcStatDesContinuous;
                           procedure   Compute(cur_att,new_att: TAttContinue; cur_stat: TCalcStatDesContinuous); 
                           protected
                           function    getClassParameter: TClassOperatorParameter; override;
                           {au dpart, il n'y a que des inputs, et ils sont tous continus}
                           function  CheckAttributes(): boolean; override;
                           {calculer les stats sur les bons individus et variables, puis normaliser}
                           function    CoreExecute(): boolean; override;
                           public
                           {prparer la liste des stat-des pour la normalisation}
                           constructor Create(AOwner: TObject); override;
                           {dtruire la liste des stat-des}
                           destructor  Destroy(); override;
                           {->> HTML}
                           function    getHTMLResultsSummary(): string; override;
                           end;

        {paramtre de l'oprateur}
        TPrmOpFCStandardize = class(TOperatorPrmFC)
                              private
                              //voir les spcifs au dbut de l'unit
                              FPrmStd: integer;
                              protected
                              function    CreateDlgParameters(): TForm; override;
                              procedure   SetDefaultParameters(); override;
                              public
                              function    getHTMLParameters(): string; override;
                              procedure   LoadFromStream(prmStream: TStream); override;
                              procedure   SaveToStream(prmStream: TStream); override;
                              procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                              procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                              property    PrmStd: integer read FPrmStd write FPrmStd;
                              end;

implementation

USES
        Sysutils, Windows, ULogFile, UDlgOpPrmFCStandardize,
        UConstConfiguration;

{ TGenFCStandardize }

procedure TGenFCStandardize.GenCompInitializations;
begin
 FMLComp:= mlcFeatureConstruction;
end;


function TGenFCStandardize.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFCStandardize; 
end;

{ TMLCompFCStandardize }

function TMLCompFCStandardize.getClassOperator: TClassOperator;
begin
 result:= TOpFCStandardize; 
end;

function TMLCompFCStandardize.GetLogResultDescription: string;
begin
 result:= format('%d continuous attributes generated',[LocalDataset.Count]);
end;

{ TOpFCStandardize }

function TOpFCStandardize.CheckAttributes: boolean;
begin
 result:= (self.WorkData.LstAtts[asInput].Count>0) and (self.WorkData.LstAtts[asInput].isAllCategory(caContinue));
end;

procedure TOpFCStandardize.Compute(cur_att, new_att: TAttContinue;
  cur_stat: TCalcStatDesContinuous);
var range,xMin,xAvg,xStd: single;
    i: integer;    
begin
   //remplir
   case (self.PrmOp as TPrmOpFCStandardize).PrmStd of
    //min-max normalisation
    0: begin
        range:= cur_stat.Max - cur_stat.Min;
        if (range>0)
         then
          begin
           xMin:= cur_stat.Min;
           for i:= 1 to new_att.Size do
            new_att.cValue[i]:= (cur_att.cValue[i]-xMin)/range;
          end
         else for i:= 1 to new_att.Size do new_att.cValue[i]:= CONTINUE_MISSING_VALUE;
       end;
    //centrage
    1: begin
        xAvg:= cur_stat.Average;
        if (xAvg <> STAT_DES_MISSING_RESULT)
         then
          begin
           for i:= 1 to new_att.Size do
            new_att.cValue[i]:= cur_att.cValue[i]-xAvg;
          end
         else for i:= 1 to new_att.Size do new_att.cValue[i]:= CONTINUE_MISSING_VALUE;
       end;
    //rduction
    2: begin
        xStd:= cur_stat.StdDev;
        if (xStd <> STAT_DES_MISSING_RESULT) and (abs(xStd) > EPSILON_VALUE)
         then
          begin
           for i:= 1 to new_att.Size do
            new_att.cValue[i]:= cur_att.cValue[i]/xStd;
          end
         else for i:= 1 to new_att.Size do new_att.cValue[i]:= CONTINUE_MISSING_VALUE;
       end
     //centrage-rduction
     else
       begin
        xAvg:= cur_stat.Average;
        xStd:= cur_stat.StdDev;
        if (xAvg <> STAT_DES_MISSING_RESULT) and (xStd <> STAT_DES_MISSING_RESULT) and (abs(xStd) > EPSILON_VALUE)
         then
          begin
           for i:= 1 to new_att.Size do
            new_att.cValue[i]:= (cur_att.cValue[i]-xAvg)/xStd;
          end
         else for i:= 1 to new_att.Size do new_att.cValue[i]:= CONTINUE_MISSING_VALUE;
       end;
   end;
end;

function TOpFCStandardize.CoreExecute: boolean;
var j: integer;
    cur_stat: TCalcStatDesContinuous;
    cur_att,new_att: TAttContinue;
    tps: cardinal;
begin
 Result:= TRUE;
 TRY
 //vider
 GenAtts.Clear();
 FStats.FreeAll();
 //recalculer les indicateurs
 tps:= GetTickCount();
 FStats.RebuildStatDes(self.WorkData.LstAtts[asInput],self.WorkData.Examples);
 tps:= GetTickCount()-tps;
 TraceLog.WriteToLogFile(format('STANDARDIZE -- calc stats = %d ms',[tps]));
 tps:= GetTickCount();
 //transformer les attributs
 for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
  begin
   //rcuprer
   cur_att:= self.WorkData.LstAtts[asInput].Attribute[j] as TAttContinue;
   cur_stat:= FStats.Stat(j) as TCalcStatDesContinuous;
   //crer et ajouter
   new_att:= TAttContinue.Create('std_'+cur_att.Name+'_'+IntToStr((self.MLOwner as TMLCompFCStandardize).Number),cur_att.Size);
   GenAtts.Add(new_att);
   //computer
   self.Compute(cur_att,new_att,cur_stat);
  end;
 tps:= GetTickCount()-tps;
 TraceLog.WriteToLogFile(format('STANDARDIZE -- codage new_att(s) = %d ms',[tps]));
 EXCEPT
 Result:= FALSE;
 END;
end;

constructor TOpFCStandardize.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 //on ne connat au dpart, ni le nombre d'attriubts actifs, ni les exemples actifs au moment du calcul
 FStats:= TLstCalcStatDesContinuous.Create(NIL,NIL);
end;

destructor TOpFCStandardize.Destroy;
begin
 //la liste des propritaire
 FStats.Free;
 inherited Destroy();
end;

function TOpFCStandardize.getClassParameter: TClassOperatorParameter;
begin
 result:= TPrmOpFCStandardize;
end;

function TOpFCStandardize.getHTMLResultsSummary: string;
var s: string;
    j: integer;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan="2">Attribute standardization</TH></TR>';
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH>Src att</TH><TH>New att</TH></TR>';
 for j:= 0 to pred(self.GenAtts.Count) do
  s:= s+ HTML_TABLE_COLOR_DATA_GRAY
       + format('<TD>%s</TD>',[self.WorkData.LstAtts[asInput].Attribute[j].Name])
       + format('<TD>%s</TD>',[self.GenAtts.Attribute[j].Name])
       + '</TR>';
 s:= s+'</table>';
 result:= s;
end;

{ TPrmOpFCStandardize }

function TPrmOpFCStandardize.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmFCStandardize.CreateFromOpPrm(self);
end;

function TPrmOpFCStandardize.getHTMLParameters: string;
var s: string;
begin
 s:= '<P><B>Formula : </B>';
 case self.PrmStd of
  0: s:= s+'(x - x_min) / (x_max - x_min)';
  1: s:= s+'(x - x_avg)';
  2: s:= s+'(x / x_std_dev)'
  else s:= s+'(x - x_avg )/ x_std_dev';
 end;
 result:= s;
end;

procedure TPrmOpFCStandardize.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FPrmStd:= prmINI.ReadInteger(prmSection,'prm_std',FPrmStd);
end;

procedure TPrmOpFCStandardize.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FPrmStd,sizeof(FPrmStd));
end;

procedure TPrmOpFCStandardize.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'prm_std',FPrmStd);
end;

procedure TPrmOpFCStandardize.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FPrmStd,sizeof(FPrmStd));
end;

procedure TPrmOpFCStandardize.SetDefaultParameters;
begin
 FPrmStd:= 3;
end;

initialization
 Classes.RegisterClass(TGenFCStandardize);
end.
