(**********************************************************************)
(* UCompSpvAssesBootstrap.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(**********************************************************************)

{
@abstract(Estimation boostrap du taux d'erreur)
@author(Ricco)
@created(14/02/2005)

Estimateur 0.632 Bootstrap d'EFFRON et 0.632+ de TIBSHIRANI & EFFRON

1 paramtre -> nombre de replication

}
unit UCompSpvAssesBootstrap;

interface

USES
        Forms,Classes,IniFiles,
        UCompDefinition,
        UCompSpvAssesDefinition,
        UOperatorDefinition,
        UDatasetExamples;

TYPE
        {gnrateur}
        TMLGenCompAssesBootstrap = class(TMLGenComp)
                                   protected
                                   procedure   GenCompInitializations(); override;
                                   public
                                   function    GetClassMLComponent: TClassMLComponent; override;
                                   end;

        {composant}
        TMLCompAssesBootstrap =  class(TMLCompSpvAsses)
                                protected
                                function    getClassOperator: TClassOperator; override;
                                end;

        {structure d'erreur}
        TStrucErrBootstrap = record
                              trainErr, testErr: double;
                              testSize: integer;
                             end;

        {oprateur}
        TOpAssesBootstrap =  class(TOpSpvAsses)
                            private
                            {tableau des taux d'erreur successifs calculs}
                            FTabErr: array of TStrucErrBootstrap;
                            {taux d'erreur en test -- attention, a a une signification particulire en bootstrap}
                            FTestErrorRate,FSizeErrorRate: double;
                            {erreur en resubstitution}
                            FResubErrRate: double;
                            {taux d'erreur bootstrap}
                            FBootErr,FBootErrPlus: double;
                            procedure   prepareTrainTest(var train,test: TExamples);
                            protected
                            procedure   PrepareConfMatrix(); override;
                            function    getClassParameter: TClassOperatorParameter; override;
                            procedure   AssesExecution(); override;
                            {profiter de l'excution sur tous pour calculer les indicateurs finaux}
                            procedure   LastExecution(); override;
                            public
                            destructor  destroy(); override;
                            function    getHTMLResultsSummary(): string; override;
                            function    getErrorRate(): double; override;
                            end;

        {paramtres de l'oprateur}
        TOpPrmAssesBootstrap = class(TOpPrmSpvAsses)
                              private
                              {nombre de replication}
                              FNbReplications: integer;
                              protected
                              function    CreateDlgParameters(): TForm; override;
                              procedure   SetDefaultParameters(); override;
                              public
                              procedure   LoadFromStream(prmStream: TStream); override;
                              procedure   SaveToStream(prmStream: TStream); override;
                              procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                              procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                              function    getHTMLParameters(): string; override;
                              property    NbReplications: integer read FNbReplications write FNbReplications;
                              end;

implementation

USES
        Sysutils, EzdslBAr, UCompSpvLDefinition, UDatasetDefinition,
        UDatasetImplementation, UCalcCrossTab, Math, UConstConfiguration,
  UDlgOpPrmSpvAssesBootstrap;

{ TMLGenCompAssesCV }

procedure TMLGenCompAssesBootstrap.GenCompInitializations;
begin
 FMLComp:= mlcSpvAssessment;
end;

function TMLGenCompAssesBootstrap.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompAssesBootstrap;
end;

{ TMLCompAssesCV }

function TMLCompAssesBootstrap.getClassOperator: TClassOperator;
begin
 result:= TOpAssesBootstrap;
end;

{ TOpPrmAssesBootstrap }

function TOpPrmAssesBootstrap.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmSpvAssesBootstrap.CreateFromOpPrm(self);
end;

function TOpPrmAssesBootstrap.getHTMLParameters: string;
begin
 Result:= format('<P><B>Replications : <B> %d</P>',[FNbReplications]);
end;

procedure TOpPrmAssesBootstrap.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FNbReplications:= prmINI.ReadInteger(prmSection,'replications',FNbReplications);
end;

procedure TOpPrmAssesBootstrap.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FNbReplications,sizeof(FNbReplications));
end;

procedure TOpPrmAssesBootstrap.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'replications',FNbReplications);
end;

procedure TOpPrmAssesBootstrap.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FNbReplications,sizeof(FNbReplications));
end;

procedure TOpPrmAssesBootstrap.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 FNbReplications:= 25;
end;

{ TOpAssesBootstrap }

procedure TOpAssesBootstrap.AssesExecution;
var replication: integer;
    ClassAtt: TAttribute;
    exTrain,exTest: TExamples;
    confMat: TConfusionMatrix;
begin
 //attribut  prdire
 ClassAtt:= CompMetaSpv.OutputData.LstAtts[asTarget].Attribute[0];
 //les observations
 exTrain:= TExamples.Create(self.AllExamples.Size);
 exTest:= TExamples.Create(self.AllExamples.Size);
 //matrices de confusion
 confMat:= TConfusionMatrix.createStructure(classAtt);
 //pour chaque replication
 FTestErrorRate:= 0.0;
 FSizeErrorRate:= 0.0;
 for replication:= 0 to pred((self.PrmOp as TOpPrmAssesBootstrap).NbReplications) do
  begin
   //prparer apprentissage et test
   self.prepareTrainTest(exTrain,exTest);
   //apprentissage
   self.RootExamples.Copy(exTrain);
   self.CompMetaSpv.Execute(TRUE);
   //computer et conserver les matrices de confusions et surtout les erreurs
   confMat.connectPredAttribute(self.CompMetaSpv.PredClass);
   //en apprentissage -- il peut y avoir des doublons
   confMat.refresh(exTrain);
   FTabErr[replication].trainErr:= confMat.getErrorRate();
   //en test (n'ayant pas particip  l'apprentissage)
   confMat.refresh(exTest);
   FTabErr[replication].testErr:= confMat.getErrorRate();
   FTabErr[replication].testSize:= exTest.Size;
   //recueillir pour moyenner
   FTestErrorRate:= FTestErrorRate+1.0*FTabErr[replication].testSize*FTabErr[replication].testErr;
   FSizeErrorRate:= FSizeErrorRate+FTabErr[replication].testSize;
  end;
 //moyenne de l'erreur en test (des individus successifs n'ayant pas particips  l'apprentissage)
 if (FSizeErrorRate>0)
  then FTestErrorRate:= FTestErrorRate/FSizeErrorRate
  else FTestErrorRate:= 1.0;//il y a qd mme un srieux pbm dans ce cas...
 //vider
 confMat.Free();
 exTest.Free();
 exTrain.Free();
end;

destructor TOpAssesBootstrap.destroy;
begin
 setLength(FTabErr,0);
 inherited;
end;

function TOpAssesBootstrap.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmAssesBootstrap;
end;

function TOpAssesBootstrap.getErrorRate: double;
begin
 result:= FBootErrPlus;
end;

function TOpAssesBootstrap.getHTMLResultsSummary: string;
var s: string;
begin
 s:= '<P><H3>Boostrap error estimation</H3>';
 s:= s+ HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Error rate</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GREEN+format('<TD width="100">.632+ bootstrap</TD><TD align="right" width="100">%.4f</TD></TR>',[FBootErrPlus]);
 s:= s+HTML_TABLE_COLOR_DATA_GREEN+format('<TD width="100">.632  bootstrap</TD><TD align="right" width="100">%.4f</TD></TR>',[FBootErr]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD width="100">Resubstitution</TD><TD align="right" width="100">%.4f</TD></TR>',[FResubErrRate]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD width="100">Avg test set</TD><TD align="right" width="100">%.4f</TD></TR>',[FTestErrorRate]);
 s:= s+'</table>';
 //and then...
 result:= s;
end;

procedure TOpAssesBootstrap.LastExecution;
var classAtt: TAttribute;
    resubMatConf: TConfusionMatrix;
    //
    GChapeau,RPrim,ErrPrim: double;
    k: integer;
begin
 inherited LastExecution();
 //************************************
 //calculer les indicateurs bootstrap *
 //************************************
 //attribut  prdire
 ClassAtt:= CompMetaSpv.OutputData.LstAtts[asTarget].Attribute[0];
 //matrice de confusion en resubstitution
 resubMatConf:= TConfusionMatrix.create(classAtt,self.CompMetaSpv.PredClass,self.RootExamples);
 FResubErrRate:= resubMatConf.getErrorRate();
 //***
 //*** 0.632 bootstrap error rate ***
 //***
 FBootErr:= 0.368*FResubErrRate+0.632*FTestErrorRate;
 //***
 //*** 0.632+ bootstrap error rate ***
 //***
 //produit des marges
 GChapeau:= 0.0;
 for k:= 1 to resubMatConf.CrossTab.RowCount do
  GChapeau:= GChapeau+resubMatConf.CrossTab.ColFreq[k,0]*(1.0-resubMatConf.CrossTab.RowFreq[0,k]);
 //tenir compte du biais d'apprentissage
 ErrPrim:= MinValue([FTestErrorRate,GChapeau]);
 if (FTestErrorRate>FResubErrRate) and (GChapeau>FResubErrRate)
  then RPrim:= (FTestErrorRate-FResubErrRate)/(GChapeau-FResubErrRate)
  else RPrim:= 0.0;
 //correction du diable !!!
 FBootErrPlus:= FBootErr+(ErrPrim-FResubErrRate)*(0.368*0.632*RPrim)/(1.0-0.368*RPrim);
 //vider
 resubMatConf.Free();
end;

procedure TOpAssesBootstrap.PrepareConfMatrix;
begin
 inherited;
 setLength(FTabErr,(self.PrmOp as TOpPrmAssesBootstrap).NbReplications);
end;

procedure TOpAssesBootstrap.prepareTrainTest(var train, test: TExamples);
var i: integer;
    bTest: TBooleanArray;
begin
 //chantillonnage avec remise pour train
 self.AllExamples.SampleReplicate(train);
 //vrifier ceux qui ne sont pas pris pour les rserver au test
 bTest:= TBooleanArray.Create(succ(train.Size));
 bTest.SetAllFalse();
 for i:= 1 to train.Size do
  bTest.Flag[train.Number[i]]:= TRUE;
 //construire alors le test
 test.BeginAdd();
 for i:= 1 to self.AllExamples.Size do
  begin
   if not(bTest.Flag[i])
    then test.AddExample(i);//quivaut  self.AllExamples.Number[i]
  end;
 test.EndAdd();
 //vider
 bTest.Free();
end;

initialization
 RegisterClass(TMLGenCompAssesBootstrap);
end.
