(***********************************************************)
(* UCompFSFcbf.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(***********************************************************)

{

@abstract(Mthode FCBF fonde sur l'article de Yu et Liu, ICML-2003)
@author(Ricco)
@created(20/05/2004)

Toujours la mme ide : slectionner les variables qui sont le plus
corrles avec la classe et les moins corrles entre elles.

Algo sans paramtres.

}
unit UCompFSFcbf;

interface

USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFSDefinition,
        UCompFSInputSelection,
        UOperatorDefinition,
        UCalcStatDesCrossTab,
        UDatasetDefinition,
        UDatasetExamples,
        UCalcFSCorrelation,
        UCalcCrossTab;

TYPE
        {gnrateur}
        TMLGenFSFcbf = class(TMLGenFS)
                       public
                       function    GetClassMLComponent: TClassMLComponent; override;
                       end;

        {composant}
        TMLCompFSFcbf = class(TMLCompFSInputSelection)
                        protected
                        function    getClassOperator: TClassOperator; override;
                        end;

        {computation rellement}
        TSelFcbf = class(TSelCorr)
                   private
                   public
                   function    accept_new_attribute(prm: TOperatorParameter): boolean; override;
                   function    correlation_formula(prmTab: TCrossTab): double; override;
                   end;

        {oprateur}
        TOpFSFcbf = class(TOpFSSelCorr)
                    protected
                    function    getClassParameter: TClassOperatorParameter; override;
                    function    getClassSelection(): TClassSelCorr; override;
                    public
                    function    getHTMLResultsSummary(): string; override;
                    end;

        {paramtrage}
        TOpPrmFSFcbf = class(TOpPrmFSInputSelection)
                       private
                       FDelta: double; 
                       protected
                       procedure   SetDefaultParameters(); override;
                       function    CreateDlgParameters(): TForm; override;
                       public
                       procedure   LoadFromStream(prmStream: TStream); override;
                       procedure   SaveToStream(prmStream: TStream); override;
                       procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                       procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                       function    getHTMLParameters(): string; override;
                       property    Delta: double read FDelta write FDelta;
                       end;

implementation

uses
        Sysutils,
        Math, UConstConfiguration, UDatasetImplementation, ULogFile,
  UDlgOpPrmFSFcbf;

{ TMLGenFSFcbf }

function TMLGenFSFcbf.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFSFcbf;
end;

{ TMLCompFSFcbf }

function TMLCompFSFcbf.getClassOperator: TClassOperator;
begin
 result:= TOpFSFcbf;
end;

{ TSelFcbf }

function TSelFcbf.accept_new_attribute(prm: TOperatorParameter): boolean;
var k,kMax: integer;
    cxyMax: double;
    j: integer;
    xCandidates: array of boolean;
    DELTA: double;
begin
 //paramtre de l'algo
 DELTA:= (prm as TOpPrmFSFcbf).Delta;
 //l'appel de cette fonction n'est ralise qu'une seule fois ici
 //elle est un peu dtourne de son sens premier mais c'est pour la bonne cause
 //se rapprocher le plus de l'algo de l'article (Figure 1., ICML-2003)
 result:= FALSE;
 //slection complte ?
 if (FNbCurSel < FXCorrelation.dim)
  then
   begin
    //initialisation du tableau des candidats
    setLength(xCandidates,self.FYCorrelation.dim);
    //filtrage pralable sur la base du DELTA
    for j:= 0 to pred(self.FYCorrelation.dim) do
     begin
      if (self.FYCorrelation.getCorr(j)>DELTA)
       then xCandidates[j]:= TRUE
       else xCandidates[j]:= FALSE;
     end;
    //go pour la ronde infernale...
    REPEAT
     //chercher le X le plus corrl avec le Y, parmi les variables disponibles
     kMax:= -1;
     cxyMax:= -1.0e308;
     for k:= 0 to pred(self.FYCorrelation.dim) do
      begin
       //il est candidat et meilleur que le prcdent
       if xCandidates[k] and (self.FYCorrelation.getCorr(k)>cxyMax)
        then
         begin
          cxyMax:= self.FYCorrelation.getCorr(k);
          kMax:= k;
         end;
      end;
     //ok, on en a trouv ?
     if (kMax>=0)
      then
       begin
        //y peut pas tre slectionn deux fois !!!
        xCandidates[kMax]:= FALSE;
        //on a gagn un nouveau slectionn
        FXToEnter[kMax]:= FALSE;
        FCurInfo[FNbCurSel]:= cxyMax;
        FCurSel[FNbCurSel]:= kMax;
        inc(FNbCurSel);
        //exclure ceux qui lui sont incompatibles
        for j:= 0 to pred(self.FXCorrelation.dim) do
         begin
          //l'attribut est-il candidat ?
          if xCandidates[j]
           then
            begin
             //est-il plus (ou gal) corrl avec le dernier lu qu'avec la variable  prdire
             if (self.FXCorrelation.getCorr(j,kMax)>=self.FYCorrelation.getCorr(j))
              //il est exclu du mouvement
              then xCandidates[j]:= FALSE;
            end;
         end;
       end;
    UNTIL (kMax<0);
   end;
end;

function TSelFcbf.correlation_formula(prmTab: TCrossTab): double;
var i,j: integer;
    SU,sX,sY,sX_Y,value,s: double;
begin
 //>>> hypothse de base ici, le tableau ne peut pas tre vide
 if (prmTab.Value[0,0]=0)
  then result:= 0
  else
   begin
     //marge X (colonne)
     sX:= 0.0;
     for j:= 1 to prmTab.ColCount do
      begin
       value:= prmTab.Value[0,j]/prmTab.Value[0,0];
       if (value>0)
        then sX:= sX-value*log2(value);
      end;
     //marge Y
     sY:= 0.0;
     for i:= 1 to prmTab.RowCount do
      begin
       value:= prmTab.Value[i,0]/prmTab.Value[0,0];
       if (value>0)
        then sY:= sY-value*log2(value);
      end;
     //conditionnelle
     sX_Y:= 0.0;
     for i:= 1 to prmTab.RowCount do
      begin
       s:= 0.0;
       for j:= 1 to prmTab.ColCount do
        begin
         if (prmTab.Value[i,0]>0)
          then
           begin
            value:= prmTab.Value[i,j]/prmTab.Value[i,0];
            if (value>0)
             then s:= s-value*log2(value);
           end;
        end;
       //addition pondre par la frquence de la valeur de Y
       sX_Y:= sX_Y+prmTab.Value[i,0]/prmTab.Value[0,0]*s;
      end;
     //corrlation SU (Symmetrical uncertainty)
     if ((sX+sY)>0)
      then
       begin
        SU:= 2.0*(sX-sX_Y)/(sX+sY);
        result:= SU;
        //TraceLog.WriteToLogFile(format('SU --> %.6f',[SU]));
       end
      else result:= 0.0;
   end;
end;

{ TOpFSFcbf }

function TOpFSFcbf.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmFSFcbf;
end;

function TOpFSFcbf.getClassSelection: TClassSelCorr;
begin
 result:= TSelFcbf;
end;

function TOpFSFcbf.getHTMLResultsSummary: string;
var s: string;
    j: integer;
    att: TAttribute;
begin
 s:= inherited getHTMLResultsSummary();
 //ajouter le dtail des calculs
 s:= s+'<H3>Calculations details</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+'<TH>Selected attribute</TH><TH>SU(X,Y)</TH>';
 s:= s+'</TR>';
 for j:= 0 to pred(self.SelCorr.FNbCurSel) do
  begin
   att:= self.WorkData.LstAtts[asInput].Attribute[self.SelCorr.FCurSel[j]];
   s:= s+HTML_TABLE_COLOR_DATA_GRAY+
         format('<TD>%s</TD><TD align="right">%.6f</TD>',[att.Name,self.SelCorr.FCurInfo[j]]);
   s:= s+'</TR>';
  end;
 s:= s+'</TR>';
 s:= s+'</table>';
 result:= s;
end;

{ TOpPrmFSFcbf }

function TOpPrmFSFcbf.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmFSFcbf.CreateFromOpPrm(self);
end;

function TOpPrmFSFcbf.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan="2">FCBF parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY;
 s:= s+'<TD>Delta</TD>'+format('<TD align="right">%.2f</TD>',[FDelta])+'</TR>';
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmFSFcbf.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FDelta:= prmINI.ReadFloat(prmSection,'delta',FDelta);
end;

procedure TOpPrmFSFcbf.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FDelta,sizeof(FDelta));
end;

procedure TOpPrmFSFcbf.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteFloat(prmSection,'delta',FDelta);
end;

procedure TOpPrmFSFcbf.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FDelta,sizeof(FDelta));
end;

procedure TOpPrmFSFcbf.SetDefaultParameters;
begin
 FDelta:= 0.0;
end;

initialization
 RegisterClass(TMLGenFSFcbf);
end.
