(******************************************************************)
(* UCalcFSCorrelation.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(******************************************************************)

{

@abstract(Structure de calcul des corrlations)
@author(Ricco)
@created(18/05/2004)

@lastmod(14/09/2004)

Regrouper dans une unit les structures de calcul des corrlations

Nouveaut 14/09/2004 -- introduction d'une option de compilation afin de prserver la mmoire
en vitant le calcul pralable de la corrlation croise entre les X.

--> !!! DANS CE CAS, LA METHODE MODTREE N'EST PLUS OPERATIONNELLE !!!
}

unit UCalcFSCorrelation;

interface

{$DEFINE FS_CORR_MEMORY_SAFE}

USES
        UOperatorDefinition,
        UDatasetDefinition,
        UDatasetExamples,
        UCompFSInputSelection,
        UCalcCrossTab,
        UDatasetImplementation;

TYPE
        {tableau de corrlation}
        TTabCorr = array of double;

        {tableau de corrlation  2 dimensions}
        TTabTabCorr = array of array of double;

        {fonction de corrlation}
        TComputeCorrelation = function (prmTab: TCrossTab): double of object;

        {correlation de Y avec les Xj}
        TYCorr = class
                 private
                 FYCorr: TTabCorr;
                 FNbX: integer;
                 procedure   initialize(prmX: TLstAttributes); overload;
                 procedure   initialize(dimX: integer); overload;
                 public
                 destructor  destroy(); override;
                 function    getCorr(j: integer): double;
                 procedure   setCorr(j: integer; value: double);
                 procedure   compute(prmY: TAttribute; prmX: TLstAttributes; prmExamples: TExamples; prmCompute: TComputeCorrelation);
                 function    duplicate(): TYCorr;
                 property    dim: integer read FNbX;
                 end;

        {correlation croise entre les Xj}
        TXCorr = class
                 protected
                 FNbX: integer;
                 FXCorr: TTabTabCorr;
                 {$IFDEF FS_CORR_MEMORY_SAFE}
                 FLstAtt: TLstAttributes;
                 FFuncCompute: TComputeCorrelation;
                 FExamples: TExamples;
                 {$ENDIF}
                 procedure initialize(prmX: TLstAttributes); overload;
                 procedure initialize(dimX: integer); overload;
                 procedure deInitialize(); virtual;
                 procedure setTabSize(prmSize: integer); virtual;
                 public
                 {renvoie une instance de la classe courante}
                 function   getInstance(): TXCorr; virtual;
                 destructor destroy(); override;
                 procedure  compute(prmX: TLstAttributes; prmExamples: TExamples; prmCompute: TComputeCorrelation); virtual;
                 function   getCorr(i,j: integer): double; virtual;
                 procedure  setCorr(i,j: integer; value: double); virtual;
                 {comme au scrabble}
                 function   duplicate(): TXCorr;
                 property   dim: integer read FNbX;
                 end;

        {la mthode de slection bas sur les corrlations}
        TSelCorr =  class
                    protected
                    {effectif}
                    FNbExamples: double;
                    {initialiser}
                    procedure   initialize(prmData: TMLDataset); virtual;
                    {le tableau crois de corrlations, cr rellement chez MODTree par exemple...}
                    procedure   initializeXCorr(); virtual;
                    public
                    {nombre de variables dans la slection courante}
                    FNbCurSel: integer;
                    {la slection courante}
                    FCurSel: array of integer;
                    {la qualit de l'ensemble courant}
                    FCurInfo: array of double;
                    {les X elligibles}
                    FXToEnter: array of boolean;
                    {corrlation de Y avec X}
                    FYCorrelation: TYCorr;
                    {corrlation croise des X}
                    FXCorrelation: TXCorr;                          
                    destructor  destroy(); override;
                    procedure   compute_correlation(prmY: TAttribute; prmX: TLstAttributes; prmExamples: TExamples; prm: TOperatorParameter); virtual;
                    // mettre  jour chez les descendants...
                    function    accept_new_attribute(prm: TOperatorParameter): boolean; virtual; abstract;
                    function    correlation_formula(prmTab: TCrossTab): double; virtual; abstract;
                    end;

        {mta-classe de slection par corrlation}
        TClassSelCorr = class of TSelCorr;

        {oprateur gnrique de slection par avant fond sur les corrlations}
        TOpFSSelCorr =  class(TOpFSInputSelSpvInputDiscrete)
                        protected
                        FSelCorr: TSelCorr;
                        procedure   InitializeComputation();
                        procedure   Loop(); virtual;
                        procedure   RefreshSelection();
                        function    CoreExecute(): boolean; override;
                        // mettre  jour chez les descendants
                        function    getClassSelection(): TClassSelCorr; virtual; abstract;
                        public
                        property    SelCorr: TSelCorr read FSelCorr;
                        end;  


implementation

uses
        ULogFile, Sysutils, Windows;

{ TYCorr }

procedure TYCorr.compute(prmY: TAttribute; prmX: TLstAttributes;
  prmExamples: TExamples; prmCompute: TComputeCorrelation);
var tab: TCrossTab;
    j: integer;
    attX: TAttribute;
begin
 self.initialize(prmX);
 //rien de plus simple...
 for j:= 0 to pred(FNbX) do
  begin
   attX:= prmX.Attribute[j];
   tab:= TCrossTab.create(prmY,attX);
   //Tracelog.WriteToLogFile('>>> '+attX.Name);
   tab.Refresh(prmExamples);
   //Tracelog.WriteToLogFile('>>> refresh -- '+attX.Name);
   TRY
   FYCorr[j]:= prmCompute(tab);
   EXCEPT
   FYCorr[j]:= 0.0;
   //TraceLog.WriteToLogFile(format('YCORR (Y,X) computing -- error (%s,%s)',[prmY.Name,attX.Name]));
   END;
   tab.Free;
  end;
end;

destructor TYCorr.destroy;
begin
 setLength(FYCorr,0);
 inherited destroy();
end;

function TYCorr.duplicate: TYCorr;
var dup: TYCorr;
    j: integer;
begin
 dup:= TYCorr.Create();
 dup.initialize(self.dim);
 //copie toute simple
 for j:= 0 to pred(self.dim) do
  dup.FYCorr[j]:= self.getCorr(j);
 result:= dup;
end;

function TYCorr.getCorr(j: integer): double;
begin
 result:= FYCorr[j];
end;

procedure TYCorr.initialize(prmX: TLstAttributes);
begin
 FNbX:= prmX.Count;
 setLength(FYCorr,FNbX);
end;

procedure TYCorr.initialize(dimX: integer);
begin
 FNbX:= dimX;
 setLength(FYCorr,FNbX);
end;

procedure TYCorr.setCorr(j: integer; value: double);
begin
 FYCorr[j]:= value;
end;

{ TXCorr }

procedure TXCorr.compute(prmX: TLstAttributes; prmExamples: TExamples; prmCompute: TComputeCorrelation);
{$IFNDEF FS_CORR_MEMORY_SAFE}
var i,j: integer;
    attI,attJ: TAttribute;
    tab: TCrossTab;
{$ENDIF}
begin

 //passer les paramtres de dimension
 //et, ventuellement, crer les tableaux
 initialize(prmX);

 {$IFDEF FS_CORR_MEMORY_SAFE}

 TraceLog.WriteToLogFile('FS_CORR_MEMORY_SAFE option activated');

 //TEMPO -- 14/09/2004 -- memory safe strategy
 FLstAtt:= prmX;
 FFuncCompute:= prmCompute;
 FExamples:= prmExamples;
 //--

 {$ELSE}

 TraceLog.WriteToLogFile('FS_CORR_MEMORY_SAFE option NOT activated');

 for i:= 1 to pred(FNbX) do
  begin
   attI:= prmX.Attribute[i];
   for j:= 0 to pred(i) do
    begin
     attJ:= prmX.Attribute[j];
     tab:= TCrossTab.create(attI,attJ);
     TRY
       TRY
       tab.Refresh(prmExamples);
       FXCorr[i][j]:= prmCompute(tab);
       EXCEPT
       FXCorr[i][j]:= 0.0;
       END;
     FINALLY
       tab.Free;
     END;
    end;
  end;

 {$ENDIF}
end;

procedure TXCorr.deInitialize;
{$IFNDEF FS_CORR_MEMORY_SAFE}
var j: integer;
{$ENDIF}
begin

 {$IFNDEF FS_CORR_MEMORY_SAFE}
 for j:= 0 to pred(FNbX) do
  setLength(FXCorr[j],0);
 setLength(FXCorr,0);
 {$ENDIF}
 
end;

destructor TXCorr.destroy;
begin
 deInitialize();
 inherited destroy();
end;

function TXCorr.getInstance: TXCorr;
begin
 result:= TXCorr.Create();
end;

function TXCorr.duplicate: TXCorr;
var dup: TXCorr;
    i,j: integer;
begin
 //dup:= TXCorr.Create();
 //new -- 15/09/2004 -- pour que la bonne classe soit instancie
 dup:= self.getInstance();
 dup.initialize(self.dim);
 //copie directe des valeurs, sans se poser de questions...
 for i:= 0 to pred(self.dim) do
  for j:= 0 to pred(i) do
   dup.FXCorr[i][j]:= self.getCorr(i,j);
 result:= dup;
end;

function TXCorr.getCorr(i, j: integer): double;
//TEMPO
var tab: TCrossTab;
//--
begin

 {$IFDEF FS_CORR_MEMORY_SAFE}       

 //TEMPO -- memory safe strategy
 if (i=j)
  then result:= 1.0
  else
   begin
    tab:= TCrossTab.create(FLstAtt.Attribute[i],FLstAtt.Attribute[j]);
    tab.Refresh(FExamples);
    result:= FFuncCompute(tab);
    tab.Free();
   end;

 {$ELSE}

 if (j<i)
  then result:= FXCorr[i][j]
  else
   begin
    if (i=j)
     then result:= 1.0
     else result:= FXCorr[j][i];
   end;

 {$ENDIF}

end;

procedure TXCorr.initialize(prmX: TLstAttributes);
begin
 FNbX:= prmX.Count;
 self.setTabSize(FNbX);
end;

procedure TXCorr.initialize(dimX: integer);
begin
 FNbX:= dimX;
 self.setTabSize(FNbX);
end;

procedure TXCorr.setTabSize(prmSize: integer);
{$IFNDEF FS_CORR_MEMORY_SAFE}
var j: integer;
{$ENDIF}
begin
 {$IFNDEF FS_CORR_MEMORY_SAFE}
 setLength(FXCorr,prmSize);
 for j:= 0 to pred(prmSize) do
  setLength(FXCorr[j],j);
 {$ENDIF}
end;


procedure TXCorr.setCorr(i, j: integer; value: double);
begin
 //ne devrait jamais arriver ici...
end;

{ TSelMifs }

procedure TSelCorr.compute_correlation(prmY: TAttribute; prmX: TLstAttributes;
  prmExamples: TExamples; prm: TOperatorParameter);
var tps: cardinal;
begin
 //calculer les corrlations internes
 //Tracelog.WriteToLogFile('[TSELCORR -- compute_correlation Y,X] -> begin');
 tps:= GetTickCount();
 FYCorrelation.compute(prmY,prmX,prmExamples,correlation_formula);
 tps:= GetTickCount()-tps;
 Tracelog.WriteToLogFile(format('[TSELCORR -- compute_correlation Y,X] -> end = %d ms.',[tps]));
 //Tracelog.WriteToLogFile('[TSELCORR -- compute_correlation X,X] -> begin');
 tps:= GetTickCount();
 FXCorrelation.compute(prmX,prmExamples,correlation_formula);
 tps:= GetTickCount()-tps;
 Tracelog.WriteToLogFile(format('[TSELCORR -- compute_correlation X,X] -> end = %d ms.',[tps]));
end;

destructor TSelCorr.destroy;
begin
 setLength(FCurSel,0);
 FYCorrelation.Free();
 FXCorrelation.Free();
 inherited;
end;

procedure TSelCorr.initialize(prmData: TMLDataset);
var j: integer;
    dimX: integer;
begin
 FNbCurSel:= 0;
 FNbExamples:= prmData.Examples.Size;
 dimX:= prmData.LstAtts[asInput].Count;
 //slection vide mais prte
 setLength(FCurSel,dimX);
 setLength(FCurInfo,dimX);
 //tableau des elligibles
 setLength(FXToEnter,dimX);
 for j:= 0 to pred(dimX) do
  FXToEnter[j]:= TRUE;
 //tableau des corrlations Y,X
 FYCorrelation:= TYCorr.Create();
 //tableau des corrlations X,X (pas calcul dans l'anctre, uniquement pour MODTree)
 self.initializeXCorr();
end;

procedure TSelCorr.initializeXCorr;
begin
 FXCorrelation:= TXCorr.Create();
end;

{ TOpFSSelCorr }

function TOpFSSelCorr.CoreExecute(): boolean;
begin
 result:= TRUE;
 TRY
 self.InitializeComputation();
 self.Loop();
 self.RefreshSelection();
 EXCEPT
 result:= FALSE;
 END;
end;

procedure TOpFSSelCorr.InitializeComputation();
begin
 //TraceLog.WriteToLogFile('TOpFSSelCorr.InitializeComputation executed');
 if assigned(FSelCorr) then FSelCorr.Free();
 //initialiser
 FSelCorr:= self.getClassSelection().Create();
 FSelCorr.initialize(self.WorkData);
 FSelCorr.compute_correlation(self.WorkData.LstAtts[asTarget].Attribute[0],self.WorkData.LstAtts[asInput],self.WorkData.Examples, self.PrmOp);
end;

procedure TOpFSSelCorr.Loop();
begin
 //boucler jusqu' ce qu'aucune variable ne soit ajoute
 while FSelCorr.accept_new_attribute(self.PrmOp) do;
end;

procedure TOpFSSelCorr.RefreshSelection();
var j: integer;
    att: TAttribute;
begin
 //mj la slection
 for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
  begin
   att:= self.WorkData.LstAtts[asInput].Attribute[j];
   //est-il dj ajout ?
   if not(FSelCorr.FXToEnter[j])
    then self.OutputData.LstAtts[asInput].Add(att)
    else self.RemovedFromInput.AddObject(att.Name,att);
  end;
end;

end.
