(*******************************************************************)
(* UCompSpvScoringLift.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(Classe de calcul d'une courbe LIFT)
@author(Ricco)
@created(23/04/2005)

Dans la catgorie des SCORING.

Calcule des courbes LIFT  partir de 2 types d'informations :

Dans ** DEFINE STATUS **
TARGET : l'attribut de rfrence
INPUT  : les attributs SCORE qui permettent d'effectuer le classement des individus

** Comme paramtre de la mthode **
POSITIVE CLASS VALUE : la modalit de la classe qu'il faut considrer comme "les positifs"

N.B: Il peut y avoir plus variables dans INPUT, on peut ainsi construire simultanment
plusieurs courbes LIFT et effectuer des comparaisons.

Par attribut de ciblage (scoring), on conserve 2 types d'infos
(a) la valeur du seuil du score
(b) la valeur de l'ordonne (Taux de vrais positifs dans la cible)

La taille de la cible est en abcisse, elle est commune  tous les attributs de ciblage.

}
unit UCompSpvScoringLift;

interface

USES
        Forms, IniFiles, Classes,   
        UCompDefinition,
        UCompManageDataset,
        UOperatorDefinition,
        UDatasetDefinition,
        UDatasetImplementation,
        UCompSpvLDefinition,
        UCompSpvScoring,
        UDatasetExamples;

CONST
   //nombre de valeurs  utiliser
   SCORING_NB_STEPS_TARGET_SIZE = 20;

TYPE

   {les individus slectionnes pour construire les courbes}
   TEnumSelExamplesScoring = (seScoringSelected,seScoringUnselected);

CONST
   //chanes de caractres associs pour les affichages
   STR_SEL_EXAMPLES_FOR_SCORING : array[TEnumSelExamplesScoring] of string = ('Selected','Unselected');

TYPE
   {gnrateur de composant}
   TMLGenSpvScoringLIFT = class(TMLGenComp)
                          protected
                          procedure   GenCompInitializations(); override;
                          public
                          function    GetClassMLComponent: TClassMLComponent; override;
                          end;

   {composant}
   TMLSpvScoringLIFT = class(TMLCompLocalData)
                       protected
                       function    getClassOperator: TClassOperator; override;
                       end;

   {structures de tableaux : en ligne, les seuils ; en colonne, les attributs de score (de classement)}
   TTabStoreScoringValues = array of array of TTypeContinue;

   {oprateur --  surcharger plus loin, ex. les courbes ROC, etc.}
   TOpSpvScoringLIFT = class(TOpLocalData)
                       protected
                       //les individus  utiliser
                       FUsedExamples: TExamples;
                       //variable target
                       FClassAttribute: TAttDiscrete;
                       //identifiant de la modalit des positifs
                       FIdPositiveClass: TTypeDiscrete;
                       //tableau des scores seuils
                       FTabScore: TTabStoreScoringValues;
                       //tableau des taux de vrais positifs (TVP)
                       FTabTVP: TTabStoreScoringValues;
                       //Nombre de positifs
                       FNbAllPos: integer;
                       //classe de paramtres assoicie
                       function    getClassParameter: TClassOperatorParameter; override;
                       //checker les attributs  utiliser
                       function    CheckAttributes(): boolean; override;
                       //checker les exemples  utiliser
                       function   CheckExamples(): boolean; override;
                       //prparer les tableaux de calculs --  surcharger le cas chant chez les descendants (ex. ROC)
                       procedure   prepareTabs(); virtual;
                       //dtruire les tableaux de calculs --  surcharger le cas chant chez les descendants (ex. ROC)
                       procedure   destroyTabs(); virtual;
                       //lancer le calcul du tableau des LIFT
                       function    CoreExecute(): boolean; override;
                       //calculer la courbe pour l'attribut nj --  surcharger chez les descendants (ex. ROC)
                       procedure   computeCurve(numAtt: integer; examples: TExamples); virtual;
                       //calculer le nombre de positifs, et autres infos --  surcharger chez les descendants (ex. pour en dduire les ngatifs)
                       procedure   computePositiveExamples(examples: TExamples); virtual;
                       public
                       destructor  destroy(); override;
                       //envoyer le rsultat des calculs
                       function    getHTMLResultsSummary(): string; override;
                       end;

   {paramtrage de l'oprateur -- identique  celui du calcul du score}
   TOpPrmSpvScoringLIFT = class(TOpPrmComputeScore)
                           protected
                           //ensemble d'individus utiliss
                           FSelExamples: TEnumSelExamplesScoring;
                           procedure   SetDefaultParameters(); override;
                           function    CreateDlgParameters(): TForm; override;
                           public
                           function    getHTMLParameters(): string; override;
                           procedure   LoadFromStream(prmStream: TStream); override;
                           procedure   SaveToStream(prmStream: TStream); override;
                           procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                           procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                           //proprits
                           property    SelExamples: TEnumSelExamplesScoring read FSelExamples write FSelExamples;
                          end;


implementation

USES
   Sysutils,
   UConstConfiguration, ULogFile, UDlgOpPrmSpvScoringLift;

{ TMLGenSpvScoringLIFT }

procedure TMLGenSpvScoringLIFT.GenCompInitializations;
begin
 FMLComp:= mlcSpvScoring;
end;

function TMLGenSpvScoringLIFT.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLSpvScoringLIFT;
end;

{ TMLSpvScoringLIFT }

function TMLSpvScoringLIFT.getClassOperator: TClassOperator;
begin
 result:= TOpSpvScoringLIFT;
end;

{ TOpSpvScoringLIFT }

function TOpSpvScoringLIFT.CheckAttributes: boolean;
var ok: boolean;
begin
 //un seul target et il doit tre discret
 ok:= (self.WorkData.LstAtts[asTarget].Count = 1) and (self.WorkData.LstAtts[asTarget].Attribute[0].isCategory(caDiscrete));
 //et tous les input doivent tre continus
 ok:= ok and ((self.WorkData.LstAtts[asInput].Count > 0) and (self.WorkData.LstAtts[asInput].isAllCategory(caContinue)));
 //and then...
 result:= ok;
end;

function TOpSpvScoringLIFT.CheckExamples: boolean;
begin
 if assigned(FUsedExamples) then FUsedExamples.Free();
 //dterminer les individus  utiliser
 case (self.PrmOp as TOpPrmSpvScoringLIFT).SelExamples of
  seScoringUnselected:
   begin
    //complmentaire des individus slectionn  la taille de la base --> c'est bien les individus non-slectionns
    FUsedExamples:= self.WorkData.Examples.getComplementaire(self.WorkData.LstAtts[asAll].Size);
   end
  else
   begin
    FUsedExamples:= TExamples.Create(self.WorkData.Examples.Size);
    FUsedExamples.Copy(self.WorkData.Examples);
   end;
 end;
 //tester si la taille convient
 result:= (FUsedExamples.Size >= SCORING_NB_STEPS_TARGET_SIZE);
end;

procedure TOpSpvScoringLIFT.computeCurve(numAtt: integer; examples: TExamples);
var srtEx: TExamples;
    attScore: TAttribute;
    i,curPos: integer;
    borneAdd,nbAdd: integer;//grer la frquence d'ajout dans le graphique
    curLigTab: integer;
begin
 attScore:= self.WorkData.LstAtts[asInput].Attribute[numAtt];
 //rcuprer les individus et les trier selon la variable de score
 srtEx:= TExamples.Create(examples.Size);
 srtEx.Copy(examples);
 srtEx.QuickSortBy(attScore);//attention le tri est ascendant, il faut prendre dans l'ordre inverse !!!
 //dterminer la premire borne pour les ajouts dans le tableau
 borneAdd:= TRUNC((1.0*srtEx.Size)/(1.0*SCORING_NB_STEPS_TARGET_SIZE));
 //la premire valeur des tableaux
 FTabScore[0,numAtt]:= attScore.cValue[srtEx.Number[srtEx.Size]];
 FTabTVP[0,numAtt]:= 0.0;
 //enquiller le reste
 curPos:= 0;
 curLigTab:= 0;
 nbAdd:= 0;
 for i:= srtEx.Size downto 1 do
  begin
   inc(nbAdd);
   //est-ce un positif ?
   if (FClassAttribute.dValue[srtEx.Number[i]] = FIdPositiveClass)
    then inc(curPos);
   //mj du tableau ?
   if (nbAdd >= borneAdd)
    then
     begin
      inc(curLigTab);
      //debug.
      //TraceLog.WriteToLogFile(format('[LIFT] add in row %d with thresold %d',[curLigTab,borneAdd]));
      //ajouter les infos
      if (curLigTab < SCORING_NB_STEPS_TARGET_SIZE)
       then
        begin
         FTabScore[curLigTab,numAtt]:= attScore.cValue[srtEx.Number[i]];
         FTabTVP[curLigTab,numAtt]:= (1.0*curPos)/(1.0*FNbAllPos);
        end;
      //nouvelle borne d'ajout
      borneAdd:= TRUNC((1.0*succ(curLigTab)*srtEx.Size)/(1.0*SCORING_NB_STEPS_TARGET_SIZE));
     end;
  end;
 //la dernire valeur des tableaux
 FTabScore[SCORING_NB_STEPS_TARGET_SIZE,numAtt]:= attScore.cValue[srtEx.Number[1]];
 FTabTVP[SCORING_NB_STEPS_TARGET_SIZE,numAtt]:= 1.0;
 //vider
 srtEx.Free();
end;

procedure TOpSpvScoringLIFT.computePositiveExamples(examples: TExamples);
var i: integer;
begin
 FNbAllPos:= 0;
 for i:= 1 to examples.Size do
  if (FClassAttribute.dValue[examples.Number[i]] = FIdPositiveClass)
   then inc (FNbAllPos);
end;

function TOpSpvScoringLIFT.CoreExecute: boolean;
var j: integer;
begin
 result:= false;
 //rcuprer la classe, pas de soucis parce que CheckAttributes doit avoir jou son rle
 FClassAttribute:= self.workdata.LstAtts[asTarget].Attribute[0] as TAttDiscrete;
 //rcuprer l'identifiant des positifs
 FIdPositiveClass:= FClassAttribute.LstValues.isValueAvailable((self.PrmOp as TOpPrmComputeScore).ClassValueName);
 if (FidPositiveClass > 0)
  then
   begin
    //compter le nombre de positifs dans le fichier
    self.computePositiveExamples(FUsedExamples);
    //prparer les tableaux -- merci le compteur de rfrences, a aide bien ici quand mme
    self.prepareTabs();
    //lancer les calculs pour chaque variable de rangement
    TRY
    for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
     self.computeCurve(j,FUsedExamples);
    result:= TRUE;
    EXCEPT
    END;
   end;
end;

destructor TOpSpvScoringLIFT.destroy;
begin
 if assigned(FUsedExamples) then FUsedExamples.Free();
 self.destroyTabs();
 inherited;
end;

procedure TOpSpvScoringLIFT.destroyTabs;
begin
 if (FTabScore <> nil) then Finalize(FTabScore);
 if (FTabTVP <> nil) then Finalize(FTabTVP);
end;

function TOpSpvScoringLIFT.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSpvScoringLIFT;
end;

function TOpSpvScoringLIFT.getHTMLResultsSummary: string;
var s: string;
    i,j: integer;
    targetSize: integer;
begin
 s:= '<H3>LIFT Curve</H3>';
 //infos sur les effectifs
 s:= s+format('<b>Sample size : </b> %d<br>',[FUsedExamples.Size]);
 s:= s+format('<b>Positive examples : </b> %d',[FNbAllPos]);
 //
 s:= s+'<P>'+HTML_HEADER_TABLE_RESULT;
 //en tte du tableau
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH>Score Attribute</TH>';
 //nom de variables de score
 for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
  s:= s+format('<TH colspan=2>%s</TH>',[self.WorkData.LstAtts[asInput].Attribute[j].Name]);
 s:= s+'</TR>';
 //couple score-TP Rate (True Positive Rate)
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+'<TD>Target size (%)</TD>';
 for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
  s:= s+format('<TD width="50" align="center">Score</TD><TD %s width="50" align="center">TP-Rate</TD>',[HTML_BGCOLOR_DATA_BLUE]);
 s:= s+'</TR>';
 //afficher les valeurs
 for i:= 0 to SCORING_NB_STEPS_TARGET_SIZE do
  begin
   targetSize:= i*(100 div SCORING_NB_STEPS_TARGET_SIZE); 
   //if (targetSize > 1.0) then targetSize:= 1.0;
   s:= s+HTML_TABLE_COLOR_DATA_GREEN+format('<TD align="center">%d</TD>',[targetSize]);
   for j:= 0 to pred(self.WorkData.LstAtts[asInput].Count) do
    s:= s+format('<TD align="center" %s>%.4f</TD><TD align="center">%.4f</TD>',[HTML_BGCOLOR_DATA_GRAY,FTabScore[i,j],FTabTVP[i,j]]);
   s:= s+'</TR>';
  end;
 s:= s+'</table>';
 //and then...
 result:= s;
end;

procedure TOpSpvScoringLIFT.prepareTabs;
begin
 //dtruire le cas chant
 self.destroyTabs();
 //et re-crer, +2 car il faut prvoir en plus la premire (0.0) et la dernire (1.0)
 setLength(FTabScore,succ(SCORING_NB_STEPS_TARGET_SIZE),self.WorkData.LstAtts[asInput].Count);
 setLength(FTabTVP,succ(SCORING_NB_STEPS_TARGET_SIZE),self.WorkData.LstAtts[asInput].Count);
end;

{ TOpPrmSpvScoringLIFT }

function TOpPrmSpvScoringLIFT.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmSpvScoringCurve.CreateFromOpPrm(self);
end;

function TOpPrmSpvScoringLIFT.getHTMLParameters: string;
begin
 result:= inherited getHTMLParameters()+
           '<br><b>Used examples : </b>'+STR_SEL_EXAMPLES_FOR_SCORING[FSelExamples];
end;

procedure TOpPrmSpvScoringLIFT.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FSelExamples:= TEnumSelExamplesScoring(prmINI.ReadInteger(prmSection,'used_examples',ord(FSelExamples)));
end;

procedure TOpPrmSpvScoringLIFT.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FSelExamples,sizeof(FSelExamples));
end;

procedure TOpPrmSpvScoringLIFT.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteInteger(prmSection,'used_examples',ord(FSelExamples));
end;

procedure TOpPrmSpvScoringLIFT.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FSelExamples,sizeof(FSelExamples));
end;

procedure TOpPrmSpvScoringLIFT.SetDefaultParameters;
begin
 inherited;
 FSelExamples:= seScoringSelected;
end;

initialization
 RegisterClass(TMLGenSpvScoringLIFT);
end.
