(*****************************************************************)
(* UCompFSUnivFisher.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(*****************************************************************)

{

@abstract(Slectionner les input avec un test univari de Fisher -- Il s'agit tout btement d'une ANOVA)
@author(Ricco)
@created(04/10/2004)

Test basique mais trs rapide : mthode supervise bien sr
(1 TARGET discret obligatoire) et tous les INPUT doivent tre continus.

Les degrs de liberts sont les mmes toujours, du coup la statistique est comparable d'une variable  l'autre,
on peut trier sur le F directement.

Attention, si la distribution est multimodale, cette mthode de vaut rien...

3 paramtrages possibles :
--------------------------
(0) seuil la p-value du F
(1) seuil sur la valeur du F
(2) les x meilleurs au sens du F de Fisher
(3) trouver le plus grand "saut" dans la srie des F

}
unit UCompFSUnivFisher;

interface

USES
        Forms, Classes, IniFiles,
        UCompDefinition,
        UCompFSDefinition,
        UCompFSInputSelection,
        UOperatorDefinition,
        UCalcStatDesConditionnalDesc;

TYPE
        {gnrateur}
        TMLGenFSUnivFisher = class(TMLGenFS)
                             public
                             function    GetClassMLComponent: TClassMLComponent; override;
                             end;

        {composant}
        TMLCompFSUnivFisher = class(TMLCompFSInputSelection)
                              protected
                              function    getClassOperator: TClassOperator; override;
                              end;

        {oprateur}
        TOpFSUnivFisher = class(TOpFSInputSelSpvInputContinuous)
                          private
                          {La liste des stats calcules}
                          FLstFisher: TLstStatDesCondANOVA;
                          {Filtrage selon la p-value}
                          procedure   filterWithPValue();
                          {filtrage selon les X-best}
                          procedure   filterWithXBest();
                          {filtrage selon le gap}
                          procedure   filterWithGap();
                          protected
                          function    getClassParameter: TClassOperatorParameter; override;
                          public
                          constructor Create(AOwner: TObject); override;
                          destructor  Destroy(); override;
                          function    getHTMLResultsSummary(): string; override;
                          function    CoreExecute(): boolean; override;
                          end;

        {paramtre d'oprateur}
        TOpPrmFSUnivFisher = class(TOpPrmFSInputSelection)
                             private
                             {paramtre actif : 0 -> p-value, 1 -> x-premiers, 2 -> gap}
                             FActivePrm: integer;
                             {seuil p-value}
                             FPValue: double;
                             {x-premiers}
                             FXBest: integer;
                             protected
                             procedure   SetDefaultParameters(); override;
                             function    CreateDlgParameters(): TForm; override;
                             public
                             procedure   LoadFromStream(prmStream: TStream); override;
                             procedure   SaveToStream(prmStream: TStream); override;
                             procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                             procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                             function    getHTMLParameters(): string; override;
                             //proprits
                             property    ActivePrm: integer read FActivePrm write FActivePrm;
                             property    PValue: double read FPValue write FPValue;
                             property    XBest: integer read FXBest write FXBest;
                             end;

implementation

uses
        sysutils,
        UDatasetDefinition, UCompManageDataset, UDatasetImplementation,
        UDlgOpFSUnivariateChi2, UConstConfiguration, UStringAddBuffered,
        UCalcStatDes, UDlgOpPrmFSUnivFisher;

{ TMLGenFSUnivFisher }

function TMLGenFSUnivFisher.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFSUnivFisher;
end;

{ TMLCompFSUnivFisher }

function TMLCompFSUnivFisher.getClassOperator: TClassOperator;
begin
 result:= TOpFSUnivFisher;
end;

{ TOpFSUnivFisher }

function TOpFSUnivFisher.CoreExecute: boolean;
var attTarget, attInput: TAttribute;
    j: integer;    
begin
 result:= TRUE;
 TRY
  //vider en dtruisant les objets
  FLstFisher.FreeAll();
  //relancer tous les calculs
  attTarget:= self.WorkData.LstAtts[asTarget].Attribute[0];
  for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
   begin
    attInput:= self.WorkData.LstAtts[asInput].Attribute[j];
    FLstFisher.AddStat(TCalcSDCondDescANOVA.Create(attInput,attTarget,self.WorkData.Examples));
   end;
  //trier sur la valeur du F
  FLstFisher.CompareMode:= 2;
  FLstFisher.SortStats();
  //Filtrage...
  case (self.PrmOp as TOpPrmFSUnivFisher).ActivePrm of
   1: self.filterWithXBest();
   2: self.filterWithGap();
   //par dfaut, on utilise la p-value
   else self.filterWithPValue();
  end;
 EXCEPT
 result:= FALSE;
 END;
end;

constructor TOpFSUnivFisher.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 FLstFisher:= TLstStatDesCondANOVA.Create(NIL,NIL);
end;

destructor TOpFSUnivFisher.Destroy;
begin
 if assigned(FLstFisher) then FLstFisher.Free();
 inherited Destroy();
end;

procedure TOpFSUnivFisher.filterWithGap;
var j,jMax: integer;
    delta,maxDelta: double;
begin
 jMax:= -1;
 maxDelta:= -1.0e308;
 for j:= 0 to self.FLstFisher.Count-2 do
  begin
   delta:= (self.FLstFisher.Stat(j) as TCalcSDCondDescANOVA).Fisher - (self.FLstFisher.Stat(succ(j)) as TCalcSDCondDescANOVA).Fisher;
   if (delta>maxDelta)
    then
     begin
      maxDelta:= delta;
      jMax:= j;
     end;
  end;
 //puis subdiviser en deux parties
 //les slectionns
 for j:= 0 to jMax do
  self.OutputData.LstAtts[asInput].Add((self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute);
 //les exclus
 for j:= succ(jMax) to pred(self.FLstFisher.Count) do
  self.RemovedFromInput.AddObject((self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute.Name,(self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute);
end;

procedure TOpFSUnivFisher.filterWithPValue;
var j: integer;
    stat: TCalcSDCondDescANOVA;
    thresold: double;
begin
 thresold:= (self.PrmOp as TOpPrmFSUnivFisher).PValue;
 for j:= 0 to pred(self.FLstFisher.Count) do
  begin
   stat:= self.FLstFisher.Stat(j) as TCalcSDCondDescANOVA;
   if (stat.ProbaFisher<=thresold)
    then self.OutputData.LstAtts[asInput].Add(stat.Attribute)
    else self.RemovedFromInput.AddObject(stat.Attribute.Name,stat.Attribute);
  end;
end;

procedure TOpFSUnivFisher.filterWithXBest;
var nb,j: integer;
begin
 nb:= (self.PrmOp as TOpPrmFSUnivFisher).XBest;
 //vitons les surprises inutiles
 if (nb>self.FLstFisher.Count)
  then nb:= self.FLstFisher.Count;
 //puis prendre d'un ct les meilleurs
 for j:= 0 to pred(nb) do
  self.OutputData.LstAtts[asInput].Add((self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute);
 //et de l'autre les exclus de la socit...
 for j:= nb to pred(self.FLstFisher.Count) do
  self.RemovedFromInput.AddObject((self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute.Name,(self.FLstFisher.Stat(j) as TCalcSDCondDesc).Attribute);
end;


function TOpFSUnivFisher.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmFSUnivFisher;
end;

function TOpFSUnivFisher.getHTMLResultsSummary: string;
var s: string;
    stat: TCalcSDCondDescANOVA;
    j: integer;
    buf: TBufString;
    maxFisher,curFisher: double; 
begin
 buf:= TBufString.Create();
 buf.BeginUpdate();
 s:= inherited getHTMLResultsSummary();
 buf.AddStr(s);
 //ajouter le dtail des calculs
 s:= '<H3>Calculations details</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 //un peu embtant mais bon...
 stat:= self.FLstFisher.Stat(0) as TCalcSDCondDescANOVA;
 maxFisher:= stat.Fisher;
 //
 s:= s+format('<TH>N</TH><TH>Attribute</TH><TH>F</TH><TH>F (max normalized)</TH><TH>p-value (%d,%d)</TH>',[stat.ddl1,stat.ddl2]);;
 s:= s+'</TR>';
 buf.AddStr(s);
 for j:= 0 to pred(self.FLstFisher.Count) do
  begin
   stat:= self.FLstFisher.Stat(j) as TCalcSDCondDescANOVA;
   if (maxFisher>0)
    then curFisher:= stat.Fisher/maxFisher
    else curFisher:= 0.0;
   s:= HTML_TABLE_COLOR_DATA_GRAY+format('<TD>%d</TD><TD>%s</TD><TD align="right">%.2f</TD><TD>%s</TD><TD align="right">%.6f</TD></TR>',
                                         [succ(j),stat.Attribute.Name,stat.Fisher,getHtmlHistogram(trunc(70*curFisher)),stat.ProbaFisher]);
   buf.AddStr(s);
  end;
 s:= '</table>';
 buf.AddStr(s);
 buf.EndUpdate();
 result:= buf.BufS;
 buf.Free();
end;

{ TOpPrmFSUnivFisher }

function TOpPrmFSUnivFisher.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmFSUnivFisher.CreateFromOpPrm(self);
end;

function TOpPrmFSUnivFisher.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan="2">Parameters</TH></TR>';
 s:= s+format('%s<TD>Active parameter</TD><TD align="right">%d</TD></TR>',[HTML_TABLE_COLOR_DATA_GRAY,FActivePrm]);
 s:= s+format('%s<TH>Parameter</TH><TH>Value</TH></TR>',[HTML_TABLE_COLOR_HEADER_GRAY]);
 s:= s+format('%s<TD>p-value thresold</TD><TD align="right">%.6f</TD></TR>',[HTML_TABLE_COLOR_DATA_GRAY,FPValue]);
 s:= s+format('%s<TD>best attributes</TD><TD align="right">%d</TD></TR>',[HTML_TABLE_COLOR_DATA_GRAY,FXBest]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmFSUnivFisher.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FActivePrm:= prmINI.ReadInteger(prmSection,'active_prm',FActivePrm);
 FPValue:= prmINI.ReadFloat(prmSection,'p-value',FPValue);
 FXBest:= prmINI.ReadInteger(prmSection,'x-Best',FXBest);
end;

procedure TOpPrmFSUnivFisher.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FActivePrm,sizeof(FActivePrm));
 prmStream.ReadBuffer(FPValue,sizeof(FPValue));
 prmStream.ReadBuffer(FXBest,sizeof(FXBest))
end;

procedure TOpPrmFSUnivFisher.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'active_prm',FActivePrm);
 prmINI.WriteFloat(prmSection,'p-value',FPValue);
 prmINI.WriteInteger(prmSection,'x-Best',FXBest);
end;

procedure TOpPrmFSUnivFisher.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FActivePrm,sizeof(FActivePrm));
 prmStream.WriteBuffer(FPValue,sizeof(FPValue));
 prmStream.WriteBuffer(FXBest,sizeof(FXBest));
end;

procedure TOpPrmFSUnivFisher.SetDefaultParameters;
begin
 {paramtre actif : 0 -> p-value, 1 -> x-premiers, 2 -> gap}
 FActivePrm:= 0;
 {seuil p-value}
 FPValue:= 0.001;
 {x-premiers}
 FXBest:= 10;
end;

initialization
 RegisterClass(TMLGenFSUnivFisher);
end.
