(************************************************************)
(* UCompFSMerit.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(************************************************************)

{

@abstract(Mthode CFS fonde sur la mesure MERIT (HALL et al., 1997 et 2000))
@author(Ricco)
@created(18/05/2004)

Mthode trs simple, fond sur les corrlations (gain d'entropie
normalis et symtris). Seule l'option FORWARD SELECTION est
implmente ici, pas de "Best First Search", a me parat vain.

Algo sans paramtres.

//-- new --
>> 04/02/2005 << paramtre secret, il est possible de limiter le nombre de variables slectionns --
non modifiable dans une bote de dialogue, il faut passer directement par la manipulation du TDM.
Utilis pour les tests sur la reconnaissance de protines
//---------

}
unit UCompFSMerit;

interface

USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFSDefinition,
        UCompFSInputSelection,
        UOperatorDefinition,
        UCalcStatDesCrossTab,
        UDatasetDefinition,
        UDatasetExamples,
        UCalcFSCorrelation,
        UCalcCrossTab;

TYPE
        {gnrateur}
        TMLGenFSMerit = class(TMLGenFS)
                       public
                       function    GetClassMLComponent: TClassMLComponent; override;
                       end;

        {composant}
        TMLCompFSMerit = class(TMLCompFSInputSelection)
                        protected
                        function    getClassOperator: TClassOperator; override;
                        end;

        {computation rellement}
        TSelMerit = class(TSelCorr)
                   private
                   public
                   function    accept_new_attribute(prm: TOperatorParameter): boolean; override;
                   function    correlation_formula(prmTab: TCrossTab): double; override;     
                   end;

        {oprateur}
        TOpFSMerit = class(TOpFSSelCorr)
                    protected
                    function    getClassParameter: TClassOperatorParameter; override;
                    function    getClassSelection(): TClassSelCorr; override;
                    public
                    function    getHTMLResultsSummary(): string; override;
                    end;

        {paramtrage}
        TOpPrmFSMerit = class(TOpPrmFSInputSelection)
                       private
                       {utiliser la rgle d'arrt sur le nombre d'lments :: 0 -- non, 1 -- oui}
                       FUseOtherStoppingRule: integer;
                       {Max. de variables  slectionner, >= 1 et <= nombre de variables}
                       FNbMaxSelDescriptors: integer;
                       protected
                       procedure   SetDefaultParameters(); override;
                       function    CreateDlgParameters(): TForm; override;
                       public
                       procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                       procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                       end;

implementation

uses
        Sysutils,
        Math, UConstConfiguration, UDatasetImplementation, ULogFile;

{ TMLGenFSMerit }

function TMLGenFSMerit.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFSMerit;
end;

{ TMLCompFSMerit }

function TMLCompFSMerit.getClassOperator: TClassOperator;
begin
 result:= TOpFSMerit;
end;

{ TSelMerit }

function TSelMerit.accept_new_attribute(prm: TOperatorParameter): boolean;
var i,j: integer;
    scY,scX: double;
    sumcY,sumcX: double;
    maxMerit,merit,vK: double;
    k,kMax: integer;
    accept_enter: boolean;
    meritPrm: TOpPrmFSMerit;
begin
 meritPrm:= prm as TOpPrmFSMerit;
 //zoo...
 result:= FALSE;
 //slection complte ?
 if (FNbCurSel < FXCorrelation.dim)
  then
   begin
    //somme des corrlations dans les Y
    scY:= 0.0;
    for j:= 0 to pred(FNbCurSel) do
     scY:= scY+FYCorrelation.getCorr(FCurSel[j]);
    //somme des corrlations dans les X
    scX:= 0.0;
    for i:= 0 to pred(FNbCurSel) do
     for j:= succ(i) to pred(FNbCurSel) do
      scX:= scX+FXCorrelation.getCorr(FCurSel[i],FCurSel[j]);
    //tester maintenant un attribut candidat
    kMax:= -1;
    maxMerit:= -1.0e308;
    vK:= 1.0+FNbCurSel;
    for k:= 0 to pred(FXCorrelation.dim) do
     begin
      if FXToEnter[k]
       then
        begin
         //rajouter la corrlation de la variable candidate avec les corrlations de Y
         sumcY:= scY+FYCorrelation.getCorr(k);
         //rajouter la corrlation de la variable candidate avec les X prsents
         sumcX:= scX;
         for j:= 0 to pred(FNbCurSel) do
          sumcX:= sumcX+FXCorrelation.getCorr(k,FCurSel[j]);
         //les moyennes
         sumcY:= sumcY/vK;
         if (FNbCurSel>0)
          then sumcX:= sumcX/(vK*(vK-1.0)/2.0);
         //l'indicateur de merite -- formule de Ghiselli -- on aurait pu simplifier mais conformons-nous strictement au texte
         merit:= (vK*sumcY)/SQRT(vK+vK*(vK-1.0)*sumcX);
         //alors ?
         if (merit>maxMerit)
          then
           begin
            maxMerit:= merit;
            kMax:= k;
           end;
        end;
     end;
    accept_enter:= FALSE;
    //TraceLog.WriteToLogFile(format('[CFS -- Feature selection] -- K:%.0f, MERIT:%.4f for attribute number:%d',[vK,maxMerit,kMax]));
    //alors, alors ?
    if (FNbCurSel=0)
     //pas de slection courante, on teste si on fait mieux que zro
     then
      begin
       //new -- 04/02/2005 -- tester si on veut quand mme intgrer mme si on est mauvais
       if ((meritPrm.FUseOtherStoppingRule=0) and (maxMerit>EPSILON_VALUE)) OR ((meritPrm.FUseOtherStoppingRule=1) and (FNbCurSel < meritPrm.FNbMaxSelDescriptors) and (FNbCurSel < FXCorrelation.dim))
        then accept_enter:= TRUE;
      end
     //il y a une slection courante, l'objectif est d'amliorer
     else
      begin
       //new -- 04/02/2005 -- tester si on veut quand mme intgrer mme si on n'amliore pas
       if ((meritPrm.FUseOtherStoppingRule=0) and (maxMerit>FCurInfo[pred(FNbCurSel)])) OR ((meritPrm.FUseOtherStoppingRule=1) and (FNbCurSel < meritPrm.FNbMaxSelDescriptors) and (FNbCurSel < FXCorrelation.dim))
        then accept_enter:= TRUE;
      end;
     //si introduction donc
     if accept_enter
      then
       begin
        FXToEnter[kMax]:= FALSE;
        FCurInfo[FNbCurSel]:= maxMerit;
        FCurSel[FNbCurSel]:= kMax;
        inc(FNbCurSel);
        //et tout est bien...
        result:= TRUE;
       end;
   end;
end;

function TSelMerit.correlation_formula(prmTab: TCrossTab): double;
var i,j: integer;
    SU,sX,sY,sXY,value: double;
begin
 //>>> hypothse de base ici, le tableau ne peut pas tre vide
 if (prmTab.Value[0,0]=0)
  then result:= 0
  else
   begin
     //marge X (colonne)
     sX:= 0.0;
     for j:= 1 to prmTab.ColCount do
      begin
       value:= prmTab.Value[0,j]/prmTab.Value[0,0];
       if (value>0)
        then sX:= sX-value*log2(value);
      end;
     //marge Y
     sY:= 0.0;
     for i:= 1 to prmTab.RowCount do
      begin
       value:= prmTab.Value[i,0]/prmTab.Value[0,0];
       if (value>0)
        then sY:= sY-value*log2(value);
      end;
     //croise
     sXY:= 0.0;
     for i:= 1 to prmTab.RowCount do
      begin
       for j:= 1 to prmTab.ColCount do
        begin
         value:= prmTab.FullFreq[i,j];
         if (value>0) then sXY:= sXY-value*log2(value);
        end;
      end;
     //corrlation SU (Symmetrical uncertainty)
     if ((sX+sY)>0)
      then
       begin
        SU:= 2.0*(sX+sY-sXY)/(sX+sY);
        result:= SU;
        //TraceLog.WriteToLogFile(format('SU --> %.6f',[SU]));
       end
      else result:= 0.0;
   end;
end;

{ TOpFSMerit }

function TOpFSMerit.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmFSMerit;
end;

function TOpFSMerit.getClassSelection: TClassSelCorr;
begin
 result:= TSelMerit; 
end;

function TOpFSMerit.getHTMLResultsSummary: string;
var s: string;
    j: integer;
    att: TAttribute;
begin
 s:= inherited getHTMLResultsSummary();
 //ajouter le dtail des calculs
 s:= s+'<H3>Calculations details</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+'<TH>Selected attribute</TH><TH>MERIT(S)</TH>';
 s:= s+'</TR>';
 for j:= 0 to pred(self.SelCorr.FNbCurSel) do
  begin
   att:= self.WorkData.LstAtts[asInput].Attribute[self.SelCorr.FCurSel[j]];
   s:= s+HTML_TABLE_COLOR_DATA_GRAY+
         format('<TD>%s</TD><TD align="right">%.6f</TD>',[att.Name,self.SelCorr.FCurInfo[j]]);
   s:= s+'</TR>';
  end;
 s:= s+'</table>';
 result:= s;
end;

{ TOpPrmFSMerit }

function TOpPrmFSMerit.CreateDlgParameters: TForm;
begin
 result:= NIL;
end;

procedure TOpPrmFSMerit.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FUseOtherStoppingRule:= prmINI.ReadInteger(prmSection,'other_stopping_rule',FUseOtherStoppingRule);
 FNbMaxSelDescriptors:= prmINI.ReadInteger(prmSection,'x_best',FNbMaxSelDescriptors);
end;

procedure TOpPrmFSMerit.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'other_stopping_rule',FUseOtherStoppingRule);
 prmINI.WriteInteger(prmSection,'x_best',FNbMaxSelDescriptors);
end;

procedure TOpPrmFSMerit.SetDefaultParameters;
begin
 //par dfaut, on utilise la rgle standard de CFS
 FUseOtherStoppingRule:= 0;
 //en scurit...
 FNbMaxSelDescriptors:= 1;
end;

initialization
 RegisterClass(TMLGenFSMerit);
end.
