(****************************************************************************)
(* UCompSpvAssesBVDecomposition.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(****************************************************************************)

{
@abstract(Dcomposition biais-variance)
@author(Ricco)
@created(14/02/2005)

Dcomposition biais-variance  la mode WOLPERT-KOHAVI, inspir de l'implmentation de WEKA.

2 paramtres :

(1) taille relative de l'chantillon test (par rapport aux donnes disponbibles) -- max. 90%
(2) taille du training pool (par rapport  ce qui est dispo pour l'apprentissage) -- max. 50%
}

unit UCompSpvAssesBVDecomposition;

interface

USES
        Forms,Classes,IniFiles,
        UCompDefinition,
        UCompSpvAssesDefinition,
        UOperatorDefinition,
        UDatasetExamples;

TYPE
        {gnrateur}
        TMLGenCompAssesBVar = class(TMLGenComp)
                            protected
                            procedure   GenCompInitializations(); override;
                            public
                            function    GetClassMLComponent: TClassMLComponent; override;
                            end;

        {composant}
        TMLCompAssesBVar =  class(TMLCompSpvAsses)
                          protected
                          function    getClassOperator: TClassOperator; override;
                          end;

        {oprateur}
        TOpAssesBVar =  class(TOpSpvAsses)
                        private
                        {taille absolue test}
                        FAbsSizeTest: integer;
                        {taille absolue train}
                        FAbsSizeTrain: integer;
                        {taille absolue training pool}
                        FAbsSizeTrainPool: integer;
                        {chantillon en test et en apprentissage}
                        FTrain, FTest: TExamples;
                        {les statistiques}
                        FErrorRate, FBias, FVariance, FSigma: double;
                        {obtenir les bonnes tailles des individus et prparer les sous-ensembles dfinitifs d'individus}
                        procedure   computeSetSize();
                        protected
                        //procedure   ReInitialize(); override;
                        //procedure   PrepareConfMatrix(); override;
                        function    getClassParameter: TClassOperatorParameter; override;
                        procedure   AssesExecution(); override;
                        public
                        destructor  destroy(); override;
                        function    getHTMLResultsSummary(): string; override;
                        end;

        {paramtres de l'oprateur}
        TOpPrmAssesBVar = class(TOpPrmSpvAsses)
                          private
                          {taille relative du test}
                          FRelSizeTest: double;
                          {taille relative du training pool -- <= 50%,  fixer dans la bote de paramtrage !!!}
                          FRelSizeTrainingPool: double;
                          {nombre d'itrations}
                          FNbIterations: integer;
                          protected
                          function    CreateDlgParameters(): TForm; override;
                          procedure   SetDefaultParameters(); override;
                          public
                          procedure   LoadFromStream(prmStream: TStream); override;
                          procedure   SaveToStream(prmStream: TStream); override;
                          procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                          procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                          function    getHTMLParameters(): string; override;
                          property    RelSizeTest: double read FRelSizeTest write FRelSizeTest;
                          property    RelSizeTrainingPool: double read FRelSizeTrainingPool write FRelSizeTrainingPool;
                          property    NbIterations: integer read FNbIterations write FNbIterations; 
                          end;

implementation

USES
        Sysutils, UConstConfiguration, UDatasetDefinition,
  UDatasetImplementation, UDlgOpPrmSpvAssesBVDecomposition;


TYPE
        //structure tableau interne pour les probas d'affectation
        TTabAffectation = array of array of integer;



{ TMLGenCompAssesBVar }

procedure TMLGenCompAssesBVar.GenCompInitializations;
begin
 FMLComp:= mlcSpvAssessment;
end;

function TMLGenCompAssesBVar.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompAssesBVar;
end;

{ TMLCompAssesBVar }

function TMLCompAssesBVar.getClassOperator: TClassOperator;
begin
 result:= TOpAssesBVar;
end;

{ TOpAssesBVar }

procedure TOpAssesBVar.AssesExecution;
var prm: TOpPrmAssesBVar;
    classAtt: TAttribute;
    tabProba: TTabAffectation;
    iter,iTest,example: integer;
    iClass,kClass,kPred: TTypeDiscrete;
    rootExamples: TExamples;
    bsum,vsum,ssum,denom: Double;
    valObs, valPred: double;
begin
 prm:= self.PrmOp as TOpPrmAssesBVar;
 rootExamples:= self.RootExamples;
 classAtt:= CompMetaSpv.OutputData.LstAtts[asTarget].Attribute[0];
 //calculer les tailles respectives des sous-ensembles d'individus
 self.computeSetSize();
 //prparer le tableau des probas d'affectation -- ouah !!! la place mmoire
 //attention, la valeur 0 est automatiquement affecte, pas besoin de le faire nous-mme
 setLength(tabProba,self.FTest.Size,classAtt.nbValues);
 //rpeter le processus "apprentissage-test"
 FErrorRate:= 0.0;
 for iter:= 1 to prm.NbIterations do
  begin
   //tirage du training pool sur la racine
   FTrain.Sampling(FAbsSizeTrainPool,rootExamples);
   //lancer l'apprentissage
   self.CompMetaSpv.Execute(TRUE);
   //vrifier les biens-classs sur le test-set
   for iTest:= 1 to FTest.Size do
    begin
     //individu
     example:= FTest.Number[iTest];
     //"vraie" valeur
     kClass:= classAtt.dValue[example];
     //valeur "prdite"
     kPred:= self.CompMetaSpv.PredClass.dValue[example];
     //comparer pour mesurer le taux d'erreur
     if (kClass<>kPred)
      then FErrorRate:= FErrorRate+1.0;
     //mettre  jour le tableau des mauvaises affectations
     INC(tabProba[pred(iTest),pred(kPred)]);
    end;
  end;
 //**
 //** post-calcul des statistiques
 //**
 //taux d'erreur
 FErrorRate:= FErrorRate/(1.0*prm.NbIterations*FTest.Size);
 //biais et variance
 FBias:= 0.0;
 FVariance:= 0.0;
 FSigma:= 0.0;
 for iTest:= 1 to FTest.Size do
  begin
   bsum:= 0.0; vsum:= 0.0; ssum:= 0.0;
   kClass:= classAtt.dValue[FTest.Number[iTest]];
   for iClass:= 1 to classAtt.nbValues do
    begin
     //"vraie" valeur de la classe  celle en cours d'tude ?
     valObs:= 1.0*ord(kClass = iClass);//doit tre gal  1 si galit vrifie
     //proportion de la valeur prdite pour la modalit "iClass" tudie
     valPred:= tabProba[pred(iTest),pred(iClass)]/(1.0*prm.NbIterations);
     //formule magique
     bsum:= bsum+(valObs-valPred)*(valObs-valPred)-valPred*(1.0-valPred)/(-1.0+prm.NbIterations);
     vsum:= vsum+valPred*valPred;
     ssum:= ssum+valObs*valObs;
    end;
   FBias:= FBias+bsum;
   FVariance:= FVariance+(1.0-vsum);
   FSigma:= FSigma+(1.0-ssum);
  end;
 //corrections
 denom:= 2.0*FTest.Size;
 FBias:= FBias/denom;
 FVariance:= FVariance/denom;
 FSigma:= FSigma/denom;
 //librer
 setLength(tabProba,0,0);
end;

procedure TOpAssesBVar.computeSetSize();
var prm: TOpPrmAssesBVar;
begin
 prm:= self.PrmOp as TOpPrmAssesBVar;
 //le test set peut tre prpar une fois pour toutes !!!
 if assigned(FTrain) then FreeAndNil(FTrain);
 if assigned(FTest) then FreeAndNil(FTest);
 FTrain:= TExamples.Create(self.AllExamples.Size);
 FTest:= TExamples.Create(self.AllExamples.Size);
 //rpartir apprentissage-test
 self.AllExamples.SamplingSplitting(1.0-prm.RelSizeTest,FTrain,FTest);
 //tailles respectives
 self.FAbsSizeTrain:= FTrain.Size;
 self.FAbsSizeTest:= FTest.Size;
 //training pool size
 self.FAbsSizeTrainPool:= TRUNC(prm.RelSizeTrainingPool*self.FAbsSizeTrain);
end;

destructor TOpAssesBVar.destroy();
begin
 if assigned(FTrain) then FreeAndNil(FTrain);
 if assigned(FTest) then FreeAndNil(FTest);
 inherited;
end;

function TOpAssesBVar.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmAssesBVar;
end;

function TOpAssesBVar.getHTMLResultsSummary: string;
var s: string;
    pBias, pVariance: double;
begin
 //taille des chantillons utiliss
 s:= '<P><H3>Samples size</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Samples</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD width="100">Train size</TD><TD align="right" width="100">%d</TD></TR>',[FAbsSizeTrain]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Train pool size</TD><TD align="right">%d</TD></TR>',[FAbsSizeTrainPool]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Test size</TD><TD align="right">%d</TD></TR>',[FAbsSizeTest]);
 s:= s+'</table>';
 //rsultats mesurs
 s:= s+'<P><H3>Results</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Measurements</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD width="100">Error rate</TD><TD align="right" width="100">%.4f</TD></TR>',[FErrorRate]);
 pBias:= 0.0; pVariance:= 0.0;
 if (FErrorRate>0.0)
  then
   begin
    pBias:= 100.0*FBias/FErrorRate;
    pVariance:= 100.0*FVariance/FErrorRate;
   end;
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Bias</TD><TD align="right">%.4f (%2.0f%s)</TD></TR>',[FBias,pBias,'%']);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Variance</TD><TD align="right">%.4f (%2.0f%s)</TD></TR>',[FVariance,pVariance,'%']);
 //s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Sigma</TD><TD align="right">%.4f</TD></TR>',[FSigma]);
 s:= s+'</table>';
 //and then...
 result:= s;
end;

{ TOpPrmAssesBVar }

function TOpPrmAssesBVar.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmSpvAssesBVDecomposition.CreateFromOpPrm(self);
end;

function TOpPrmAssesBVar.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Bias-variance decomposition parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Test set size</TD><TD align="right">%.2f</TD></TR>',[FRelSizeTest]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Training pool size</TD><TD align="right">%.2f</TD></TR>',[FRelSizeTrainingPool]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Iterations</TD><TD align="right">%d</TD></TR>',[FNbIterations]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmAssesBVar.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FRelSizeTest:= prmINI.ReadFloat(prmSection,'test_set_size',FRelSizeTest);
 FRelSizeTrainingPool:= prmINI.ReadFloat(prmSection,'training_pool_set_size',FRelSizeTrainingPool);
 FNbIterations:= prmINI.ReadInteger(prmSection,'iterations',FNbIterations);
end;

procedure TOpPrmAssesBVar.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FRelSizeTest,sizeof(FRelSizeTest));
 prmStream.ReadBuffer(FRelSizeTrainingPool,sizeof(FRelSizeTrainingPool));
 prmStream.ReadBuffer(FNbIterations,sizeof(FNbIterations));
end;

procedure TOpPrmAssesBVar.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteFloat(prmSection,'test_set_size',FRelSizeTest);
 prmINI.WriteFloat(prmSection,'training_pool_set_size',FRelSizeTrainingPool);
 prmINI.WriteInteger(prmSection,'iterations',FNbIterations);
end;

procedure TOpPrmAssesBVar.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FRelSizeTest,sizeof(FRelSizeTest));
 prmStream.WriteBuffer(FRelSizeTrainingPool,sizeof(FRelSizeTrainingPool));
 prmStream.WriteBuffer(FNbIterations,sizeof(FNbIterations));
end;

procedure TOpPrmAssesBVar.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 //33% en test au dpart
 FRelSizeTest:= 0.33;
 //50% pour le training pool
 FRelSizeTrainingPool:= 0.5;
 //50 itrations
 FNbIterations:= 50;
end;

initialization
 RegisterClass(TMLGenCompAssesBVar);
end.
