(**************************************************************)
(* UCompFSMODTree.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(**************************************************************)

{

@abstract(Mthode issu des travaux conjoints sur les corrlations partielles avec S. Lallich -- 1999  2002)
@author(Ricco)
@created(19/05/2004)

2 originalits:
(1) utilisation des paires pour calculer les corrlations
(2) mj de proche en proche des corrlations partielles

Paramtrage:
(a) rgle d'arrt (R ajust ou AKAIKE)
}

unit UCompFSMODTree;

interface


USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFSDefinition,
        UCompFSInputSelection,
        UOperatorDefinition,
        UCalcStatDesCrossTab,
        UDatasetDefinition,
        UDatasetExamples,
        UCalcFSCorrelation,
        UCalcCrossTab,
        UDatasetImplementation;
        
TYPE
        {gnrateur}
        TMLGenFSMODTree = class(TMLGenFS)
                       public
                       function    GetClassMLComponent: TClassMLComponent; override;
                       end;

        {composant}
        TMLCompFSMODTree = class(TMLCompFSInputSelection)
                        protected
                        function    getClassOperator: TClassOperator; override;
                        end;

        {Coorlation croise de MODTree -- le tableau crois est rellement instanci}
        TXCorrMODTree = class(TXCorr)
                        protected
                        procedure deInitialize(); override;
                        procedure setTabSize(prmSize: integer); override;
                        public
                        function   getInstance(): TXCorr; override;
                        procedure  compute(prmX: TLstAttributes; prmExamples: TExamples; prmCompute: TComputeCorrelation); override;
                        function   getCorr(i,j: integer): double; override;
                        procedure  setCorr(i,j: integer; value: double); override;
                        end;        

        {computation rellement}
        TSelMODTree = class(TSelCorr)
                   private
                   {tableau des R2 et tableau des R2 ajust}
                   FCurR2,FCurR2Adj: array of double;
                   {mise  ajour des corrlations relativement  la variable introduite}
                   procedure   majCorrelations(kMax: integer);
                   protected
                   procedure   initialize(prmData: TMLDataset); override;
                   procedure   initializeXCorr(); override;
                   public
                   destructor  destroy(); override;
                   {overral feature selection -- stop ou encore ?}
                   function    accept_new_attribute(prm: TOperatorParameter): boolean; override;
                   {default -- pairwise correlation}
                   function    correlation_formula(prmTab: TCrossTab): double; override;
                   end;

        {oprateur}
        TOpFSMODTree = class(TOpFSSelCorr)
                    protected
                    function    getClassParameter: TClassOperatorParameter; override;
                    function    getClassSelection(): TClassSelCorr; override;
                    public
                    function    getHTMLResultsSummary(): string; override;
                    end;

        {paramtrage}
        TOpPrmFSMODTree = class(TOpPrmFSInputSelection)
                       protected
                       procedure   SetDefaultParameters(); override;
                       function    CreateDlgParameters(): TForm; override;
                       end;

        
implementation

uses
        Sysutils,
        UConstConfiguration, ULogFile;

{ TMLGenFSMODTree }

function TMLGenFSMODTree.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFSMODTree;
end;

{ TMLCompFSMODTree }

function TMLCompFSMODTree.getClassOperator: TClassOperator;
begin
 result:= TOpFSMODTree;
end;

{ TSelMODTree }

function TSelMODTree.accept_new_attribute(
  prm: TOperatorParameter): boolean;
var k,kMax: integer;
    r,rMax: double;
    stopValue: double;
    nbSel: double;
begin
 result:= FALSE;
 //slection complte ?
 if (FNbCurSel < FXCorrelation.dim)
  then
   begin
    rMax:= -1.0e308;
    kMax:= -1;
    //TraceLog.WriteToLogFile('------ MODTree begin selection');
    //chercher la corrlation max. sur Y
     for k:= 0 to pred(FXCorrelation.dim) do
      begin
       if FXToEnter[k] then
        begin
         r:= self.FYCorrelation.getCorr(k);
         //TraceLog.WriteToLogFile(format('att %d --> r = %.4f',[k,r]));
         if (r>rMax)
          then
           begin
            rMax:= r;
            kMax:= k;
           end;
        end;
      end;
    //TraceLog.WriteToLogFile('------ MODTree end selection');
    //passe-t-elle la rgle d'arrt ?
    nbSel:= 1.0+self.FNbCurSel;
    if (FNbExamples>nbSel)
     then stopValue:= 1.0/SQRT(FNbExamples-nbSel)
     else stopValue:= +1.0e308;//on bloque la slection supplmentaire -- il n'y aura pas plus de variables que d'individus dans l'ensemble slectionn
    //TraceLog.WriteToLogFile(format('[MODTree -- compare for stopping rule] :: %.8f with %.8f',[rMax,stopValue]));
    //tester maintenant
    if (rMax>stopValue)
     then
      begin
       //retirer des elligibles
       FXToEnter[kMax]:= FALSE;
       //ajouter dans la slection
       FCurInfo[FNbCurSel]:= rMax;
       FCurSel[FNbCurSel]:= kMax;
       //m des indicateurs de R2
       if (FNbCurSel=0)
        then
         begin
          //initalisation
          FCurR2[FNbCurSel]:= rMax*rMax;
          FCurR2Adj[FNbCurSel]:= FCurR2[FNbCurSel];
         end
        else
         begin
          //formule rcursive
          FCurR2[FNbCurSel]:= 1.0-(1.0-rMax*rMax)*(1.0-FCurR2[pred(FNbCurSel)]);
          FCurR2Adj[FNbCurSel]:= 1.0-(FNbExamples-1.0)/(FNbExamples-nbSel-1.0)*(1.0-FCurR2[FNbCurSel]);
         end;
       //passage au coup suivant
       inc(FNbCurSel);
       //**************************************************************
       //mj des corrlations relativement  celui qui a t introduit
       //**************************************************************
       self.majCorrelations(kMax);
       //and then...
       result:= TRUE;
      end;
   end
end;

function TSelMODTree.correlation_formula(prmTab: TCrossTab): double;
var g11,g12,g21,g22,n: Double;
    k,l: Integer;
    value,denom: double;
Begin
 n:= prmTab.Value[0,0];
 //g:= n*(n-1);
 //le tableau n'est plus 2x2 !!!
 g11:= 0; g12:=0; g21:= 0; g22:= 0;
 For k:= 1 To prmTab.RowCount Do
  For l:= 1 To prmTab.ColCount Do
   Begin
    //g11:= g11+Tableau[k,l]*(Tableau[k,l]-1.0);
    //modif. tirage avec remise pour r(Y;X)=0 <=> r(Iy,Ix)=0
    //20/05/2004 -- on divise par n pour viter le pbm des gros effectifs
    g11:= g11+(prmTab.Value[k,l]/n)*(prmTab.Value[k,l]/n);
    g22:= g22+(prmTab.Value[k,l]/n)*((n-prmTab.Value[k,0]-prmTab.Value[0,l]+prmTab.Value[k,l]))/n;
    g12:= g12+(prmTab.Value[k,l]/n)*((prmTab.Value[k,0]-prmTab.Value[k,l]))/n;
    g21:= g21+(prmTab.Value[k,l]/n)*((prmTab.Value[0,l]-prmTab.Value[k,l]))/n;
   End;
 //rsultat
 value:= (g11*g22-g12*g21);
 denom:= (g11+g12)*(g21+g22)*(g11+g21)*(g12+g22);
 if (denom>EPSILON_VALUE)
  then result:= value/SQRT(denom)
  else result:= 0.0;
end;

destructor TSelMODTree.destroy;
begin
 setLength(FCurR2,0);
 setLength(FCurR2Adj,0);
 inherited Destroy();
end;

procedure TSelMODTree.initialize(prmData: TMLDataset);
begin
 inherited initialize(prmData);
 setLength(FCurR2,prmData.LstAtts[asInput].Count);
 setLength(FCurR2Adj,prmData.LstAtts[asInput].Count);
end;

procedure TSelMODTree.initializeXCorr;
begin
 FXCorrelation:= TXCorrMODTree.Create();
end;

procedure TSelMODTree.majCorrelations(kMax: integer);
var i,j: integer;
    num_value,denom_value: double;
    dupY: TYCorr;
    dupX: TXCorr;
begin
 //les dupliqus
 dupY:= self.FYCorrelation.duplicate();
 dupX:= self.FXCorrelation.duplicate();
 //les Y
 for j:= 0 to pred(self.FYCorrelation.dim) do
  begin
   //numrateur
   num_value:= dupY.getCorr(j);
   num_value:= num_value-dupY.getCorr(kMax)*dupX.getCorr(j,kMax);
   //dnominateur
   denom_value:= 1.0-SQR(dupY.getCorr(kMax));
   denom_value:= denom_value*(1.0-SQR(dupX.getCorr(j,kMax)));
   denom_value:= SQRT(denom_value);
   //et affectation
   if (denom_value>EPSILON_VALUE)
    then self.FYCorrelation.setCorr(j,num_value/denom_value)
    else self.FYCorrelation.setCorr(j,0.0);
  end;
 //les X
 for i:= 0 to pred(self.FXCorrelation.dim) do
  begin
   for j:= 0 to pred(i) do
    begin
     num_value:= dupX.getCorr(i,j);
     num_value:= num_value-dupX.getCorr(i,kMax)*dupX.getCorr(j,kMax);
     denom_value:= 1.0-SQR(dupX.getCorr(i,kMax));
     denom_value:= denom_value*(1.0-SQR(dupX.getCorr(j,kMax)));
     denom_value:= SQRT(denom_value);
     if (denom_value>EPSILON_VALUE)
      then self.FXCorrelation.setCorr(i,j,num_value/denom_value)
      else self.FXCorrelation.setCorr(i,j,0.0);
    end;
  end;
 //librer
 dupY.Free();
 dupX.Free();
end;

{ TOpFSMODTree }

function TOpFSMODTree.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmFSMODTree; 
end;

function TOpFSMODTree.getClassSelection: TClassSelCorr;
begin
 result:= TSelMODTree; 
end;

function TOpFSMODTree.getHTMLResultsSummary: string;
var s: string;
    j: integer;
    att: TAttribute;    
begin
 s:= inherited getHTMLResultsSummary();
 //ajouter le dtail des calculs
 s:= s+'<H3>Calculations details</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+'<TH>Selected attribute</TH><TH width="70">r (Y,X/S)</TH><TH width="50">R2</TH><TH width="50">Adj R2</TH>';
 s:= s+'</TR>';
 for j:= 0 to pred(self.SelCorr.FNbCurSel) do
  begin
   att:= self.WorkData.LstAtts[asInput].Attribute[self.SelCorr.FCurSel[j]];
   s:= s+HTML_TABLE_COLOR_DATA_GRAY+
         format('<TD>%s</TD><TD align="right">%.6f</TD><TD align="right">%.4f</TD><TD align="right">%.4f</TD>',
                [att.Name,self.SelCorr.FCurInfo[j],(self.SelCorr as TSelMODTree).FCurR2[j],(self.SelCorr as TSelMODTree).FCurR2Adj[j]]);
   s:= s+'</TR>';
  end;
 s:= s+'</TR>';
 s:= s+'</table>';
 //suite
 result:= s;
end;

{ TOpPrmFSMODTree }

function TOpPrmFSMODTree.CreateDlgParameters: TForm;
begin
 result:= NIL;
end;

procedure TOpPrmFSMODTree.SetDefaultParameters;
begin
 //none
end;

{ TXCorrMODTree }

procedure TXCorrMODTree.compute(prmX: TLstAttributes;
  prmExamples: TExamples; prmCompute: TComputeCorrelation);
var i,j: integer;
    attI,attJ: TAttribute;
    tab: TCrossTab;
begin

 //passer les paramtres de dimension
 //et, ventuellement, crer les tableaux
 initialize(prmX);

 TraceLog.WriteToLogFile('FS_CORR_MEMORY_SAFE option NOT activated');

 for i:= 1 to pred(FNbX) do
  begin
   attI:= prmX.Attribute[i];
   for j:= 0 to pred(i) do
    begin
     attJ:= prmX.Attribute[j];
     tab:= TCrossTab.create(attI,attJ);
     TRY
       TRY
       tab.Refresh(prmExamples);
       FXCorr[i][j]:= prmCompute(tab);
       EXCEPT
       FXCorr[i][j]:= 0.0;
       END;
     FINALLY
       tab.Free;
     END;
    end;
  end;

end;

procedure TXCorrMODTree.deInitialize;
var j: integer;
begin
 for j:= 0 to pred(FNbX) do
  setLength(FXCorr[j],0);
 setLength(FXCorr,0);
end;

function TXCorrMODTree.getCorr(i, j: integer): double;
begin

 if (j<i)
  then result:= FXCorr[i][j]
  else
   begin
    if (i=j)
     then result:= 1.0
     else result:= FXCorr[j][i];
   end;

end;

function TXCorrMODTree.getInstance: TXCorr;
begin
 result:= TXCorrMODTree.Create();
end;

procedure TXCorrMODTree.setCorr(i, j: integer; value: double);
begin
 if (j<i)
  then FXCorr[i][j]:= value;
end;

procedure TXCorrMODTree.setTabSize(prmSize: integer);
var j: integer;
begin
 setLength(FXCorr,prmSize);
 for j:= 0 to pred(prmSize) do
  setLength(FXCorr[j],j);
end;

initialization
 RegisterClass(TMLGenFSMODTree);
end.
