(***********************************************************************)
(* UCompAssocRuleAPrioriMR.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(***********************************************************************)

{
@abstract(Gnration de rgles d'association - Algorithme a priori avec lagage des rgles)
@author(Ricco)
@created(23/05/2004)

Composant test pour les travaux avec A. Morineau
}

unit UCompAssocRuleAPrioriMR;

interface

USES
        Contnrs,
        Forms,Classes,IniFiles,
        UCompDefinition,
        UOperatorDefinition,
        UCompAssociationRuleDefinition,
        UDatasetExamples,
        UCalcAssocStructure,
        UCompAssocRuleAPriori;

TYPE
        {gnrateur de composant}
        TGenCompAssocAPrioriMR = class(TMLGenComp)
                               protected
                               procedure   GenCompInitializations(); override;
                               public
                               function    GetClassMLComponent: TClassMLComponent; override;
                               end;

        {composant}
        TMLCompAssocAPrioriMR = class(TMLCompAssocRule)
                              protected
                              function    getClassOperator: TClassOperator; override;
                              public
                              function    getTXTResultsDescription(): string;
                              end;

        {dclaration forward}
        TMRAssocInfo = class;

        {oprateur}
        TOpAssocAPrioriMR = class(TOpAssocRule)
                            protected
                            //individus en apprentissage
                            FAppEx: TExamples;
                            //individus en test
                            FTestEx: TExamples;
                            //infos pour les individus en test
                            FTestInfos: TObjectList;
                            function    getClassParameter: TClassOperatorParameter; override;
                            function    getClassAssocRule(): TClassCalcAssocRule; override;
                            procedure   ApplyRulesOnTestExamples(var lstInfos: TObjectList; examples: TExamples);
                            procedure   BootstrapAllRulesEvaluation(examples: TExamples);
                            procedure   BootstrapOneRuleEvaluation(info: TMRAssocInfo; rule: TAssocRuleStructure; examples: TExamples);
                            function    getNewForm(): TForm; override;
                            public
                            //new -- 10/03/2006 -- passer les rgles en paramtres pour qu'on puisse en faire appel ailleurs
                            function    getHTMLRulesDescription(rules: TLstAssocRules): string; overload;
                            function    getHTMLRulesDescription(): string; overload; virtual;
                            function    getTXTRulesDescription(rules: TLstAssocRules): string; overload;
                            function    getTXTRulesDescription(): string; overload; virtual;
                            //
                            function    CoreExecute(): boolean; override;
                            function    getHTMLResultsSummary(): string; override;
                            function    getTXTResultsSummary(): string;
                            end;

        {paramtre de l'oprateur}
        TOpPrmAssocAPrioriMR = class(TOpPrmAssocAPriori)
                               private
                               FAppPortion: double;
                               //nombre de rptitions pour la mthode de Monte Carlo
                               FNbMCRepetition: integer;
                               //taille chantillon mthode de Monte-Carlo
                               FMCSizeSample: integer;
                               protected
                               function    CreateDlgParameters(): TForm; override;
                               function    getCoreHTMLParameters(): string; virtual;
                               function    getCoreTXTParameters(): string; virtual;
                               public
                               procedure   SetDefaultParameters(); override;
                               function    getHTMLParameters(): string; override;
                               function    getTXTParameters(): string;
                               procedure   LoadFromStream(prmStream: TStream); override;
                               procedure   SaveToStream(prmStream: TStream); override;
                               procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                               procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                               property    AppPortion: double read FAppPortion write FAppPortion;
                               //property    Puissance: double read FPuissance write FPuissance;
                               property    NbMCRepetition: integer read FNbMCRepetition write FNbMCRepetition;
                               property    MCSizeSample: integer read FMCSizeSample write FMCSizeSample;
                               end;

        {types d'information VT}
        TStrucVTInfo = record
                       vt100: double;
                       vtFull: double;
                       vtMC: double;
                       vt_zValueFull: double;
                       end;

        {types d'infos VT  produire -- Binomiale, Hypergomtrique, Confiance}
        TEnumVTComputed = (vtBin,vtThg,vtConf);

        {tableau d'infos}
        TArrayVTInfo = array[TEnumVTComputed] of TStrucVTInfo;

        {Les infos spciales -- pour post-lagage de derrire les fagots}
        TMRAssocInfo = class
                       public
                        //nouvelles infos  recenser -- cf. criture A.M.
                        n,nA,nC,nAC,nNotC,nANotC: integer;
                        // *** ratios calculs ***
                        conviction: double;
                        leverage,surprise: double;
                        //infos. VT
                        VTInfos: TArrayVTInfo;
                        //indicateurs pour la confiance avec les z-approximations normales
                        //zVTConfFull: double;
                        //lancer le calcul des ratios
                        procedure computeRatios();
                        //affichage sur une ligne des infos
                        function  getHTMLDescription(): string;
                        function  getTXTDescription(): string;
                       end;

        {structure nouvelle}
        TAssocRuleStructureMR = class(TAssocRuleStructure)
                                protected
                                procedure   RecupInfos(calcRule: TCalcRuleFromItemset); override;
                                procedure   ApplyRuleOnExample(infos: TMRAssocInfo; example: integer);
                                //renvoie 1 si l'individu est couvert, 0 sinon
                                function    CoveredBy(premisse: TObjectList; example: integer): integer;
                                public
                                appInfos: TMRAssocInfo;
                                constructor Create(calcRule: TCalcRuleFromItemset); override;
                                destructor  Destroy; override;
                                procedure   ApplyRuleOn(infos: TMRAssocInfo; examples: TExamples);
                                end;

        {liste de rgles}
        TLstAssocRulesMR = class(TLstAssocRules)
                           private
                           public
                           function    DescriptionHTML(): string; override;
                           procedure   ApplyRulesOn(infos: TObjectList; examples: TExamples);
                           end;

        {calculateur}
        TCalcAssocAPrioriMR = class(TCalcAssocRule)
                              protected
                              function    classRuleStructure(): TClassAssocRuleStructure; override;
                              function    classLstAssocRules(): TClassLstAssocRules; override;
                              public
                              procedure   ApplyRulesOn(infos: TObjectList; examples: TExamples);
                              end;


implementation

uses
        Windows,
        Sysutils,
        EZDSLBar, FMath, ULogFile, Math,
        UStringAddBuffered,
        UStringsResources, UDlgOpPrmAssociationRule, UConstConfiguration,
        UCalcRndGenerator, UDlgOpPrmAssociationRuleMR,
        UFrmOpViewAssocRuleMR;

const
        EFFECTIF_ARTIFICIEL : integer = 100;
        DEFAULT_VALUE_NOT_COMPUTED = 99.99;
        EPSILON_PROBA = 1.0e-8;

        EPSILON_INTERPOLATION = 1.0e-5;

        MAX_VT_HG = 99.99;
        MIN_VT_HG = 0.00;
        ERR_VT_HG = -99.99;

        ERR_VT_BIN = -99.99;
        MAX_VT_BIN = 99.99;

        CHAR_TMP_SEPARATOR : CHAR = ';';
        CHAR_TAB_SEPARATOR : CHAR = #9;

        STR_CRLF = #13#10;

{***** NEW -- 15/06/2004 -- passer par les logs pour estimer les probas, a vite les dbordements ******}

function ln_fact(n: integer): double;
begin
 result:= lnGamma(n+1);
end;

function ln_binomial(n,p: integer): double;
var v: double;
begin
 v:= ln_fact(n)-(ln_fact(n-p)+ln_fact(p));
 result:= v;
end;

function personalHyperGeo(n,nAC,nA,nC: integer): double;
var tmp: double;
begin
 TRY
 //corrections pour prvenir une mauvaise interpolation
 if (nAC<0) then nAC:= 0;
 if (nA<0) then nA:= 0;
 if (nC<0) then nC:= 0;
 if (n-nA<0) then nA:= n;
 if (nC-nAC<0) then nAC:= nC;
 //calcul sur les logarithmes
 tmp:= ln_binomial(nA,nAC)+ln_binomial(n-nA,nC-nAC);
 tmp:= tmp-ln_binomial(n,nC);
 //moindre risque ici....
 tmp:= exp(tmp);
 //suite normale
 result:= tmp;
 EXCEPT
 result:= DEFAULT_VALUE_NOT_COMPUTED;
 TraceLog.WriteToLogFile('(HYPERGEO) --> erreur calcul loi hypergeo, func personalHyperGeo :: '+
                          format('%d, %d, %d, %d',[n,nAC,nA,nC]));
 END;
end;

{****** fonction de rpartition -- loi hypergeometrique ******}
function cumulPersonalHypergeo(n,nAC,nA,nC: integer; var no_error: boolean): double;
var sum: double;
    i: integer;
begin
 result:= DEFAULT_VALUE_NOT_COMPUTED;
 if (nAC>nA) or (nAC>nC)
  then no_error:= false
  else
   begin
     no_error:= no_error and true;
     //correction suite e-mail A.M. du 15/05/2004
     //proba "suprieure ou gale "...
     //for i:= 0 to nAC do
     sum:= 0.0;
     for i:= nAC to min(nA,nC) do
      sum:= sum+personalHyperGeo(n,i,nA,nC);
     //rsultat
     result:= sum;
   end;
end;

{**** nouvelle formule -- hypergomtrique -- correspondance 21/12/2004 ****}
function interpoleHypergeo(n: integer; nAC,nA,nC: double): double;
var dAC, dA, dC: double;//valeurs dcimales
    proba: double;//proba pour un sommet du cube
    vtSommet,vtInterpolated: double;//VT
    iAC,iA,iC: integer;//sommets du cube
    x,y,z,poids: double;//poids
    no_error: boolean;//vrif. erreur
begin
 //optimiste au dpart
 no_error:= TRUE;

 //valeur dcimales directement ici-- cf. le prog. XLS (variante_3.xls)
 dAC:= nAC;
 dA := nA;
 dC := nC;

 //moyenne pondre
 vtInterpolated:= 0.0;

 //calculs des 8 probas et VT associes
 for iAC:= trunc(dAC) to succ(trunc(dAC)) do
  for iA:= trunc(dA) to succ(trunc(dA)) do
   for iC:= trunc(dC) to succ(trunc(dC)) do
    begin

     //poids affect  chaque proba
     x:= 1.0-abs(dA-iA);
     y:= 1.0-abs(dC-iC);
     z:= 1.0-abs(dAC-iAC);

     poids:= x*y*z;

     //proba
     proba:= cumulPersonalHyperGeo(n,iAC,iA,iC,no_error);

     if (no_error)
      then
       begin
        //VT du sommet du cube
        vtSommet:= invNorm(1.0-proba);

        //VT pondr
        vtInterpolated:= vtInterpolated+poids*vtSommet;
       end;

    end;

 //and then...
 if no_error
  then result:= vtInterpolated
  else result:= ERR_VT_HG; 
end;

{****** fin formule 21/12/2004 hypergometrique ****************************}


{**** nouvelle formule -- binomiale contre-exemples -- cf. Hypergomtrique -- 26/01/2005 ****}
function interpoleBinomialeContrex(n: integer; nAC,nA,nC: double): double;
var dAC, dA, dC: double;//valeurs dcimales
    proba: double;//proba pour un sommet du cube
    vtSommet,vtInterpolated: double;//VT
    iAC,iA,iC: integer;//sommets du cube
    x,y,z,poids: double;//poids
    no_error: boolean;//vrif. erreur
begin
 //optimiste au dpart
 no_error:= TRUE;

 //valeur dcimales directement ici-- cf. le prog. XLS (variante_3.xls)
 dAC:= nAC;
 dA := nA;
 dC := nC;

 //moyenne pondre
 vtInterpolated:= 0.0;

 //calculs des 8 probas et VT associes
 for iAC:= trunc(dAC) to succ(trunc(dAC)) do
  for iA:= trunc(dA) to succ(trunc(dA)) do
   for iC:= trunc(dC) to succ(trunc(dC)) do
    begin

     //poids affect  chaque proba
     x:= 1.0-abs(dA-iA);
     y:= 1.0-abs(dC-iC);
     z:= 1.0-abs(dAC-iAC);

     poids:= x*y*z;

     //proba
     proba:= FBinom(n,1.0*iA*(n-iC)/(1.0*n*n),iA-iAC);

     if (no_error)
      then
       begin
        //VT du sommet du cube
        vtSommet:= invNorm(1.0-proba);

        //VT pondr
        vtInterpolated:= vtInterpolated+poids*vtSommet;
       end;

    end;

 //and then...
 if no_error
  then result:= vtInterpolated
  else result:= ERR_VT_HG;
end;

{**** nouvelle formule -- binomiale confiance -- cf. Hypergomtrique -- 26/01/2005 ****}
function interpoleBinomialeConfiance(n: integer; nAC,nA,nC: double): double;
var dAC, dA, dC: double;//valeurs dcimales
    proba: double;//proba pour un sommet du cube
    vtSommet,vtInterpolated: double;//VT
    iAC,iA,iC: integer;//sommets du cube
    x,y,z,poids: double;//poids
    no_error: boolean;//vrif. erreur
begin
 //optimiste au dpart
 no_error:= TRUE;

 //valeur dcimales directement ici-- cf. le prog. XLS (variante_3.xls)
 dAC:= nAC;
 dA := nA;
 dC := nC;

 //moyenne pondre
 vtInterpolated:= 0.0;

 //calculs des 8 probas et VT associes
 for iAC:= trunc(dAC) to succ(trunc(dAC)) do
  for iA:= trunc(dA) to succ(trunc(dA)) do
   for iC:= trunc(dC) to succ(trunc(dC)) do
    begin

     //poids affect  chaque proba
     x:= 1.0-abs(dA-iA);
     y:= 1.0-abs(dC-iC);
     z:= 1.0-abs(dAC-iAC);

     poids:= x*y*z;

     //proba
     proba:= 1.0-FBinom(iA,(1.0*iC)/(1.0*n),iAC);

     if (no_error)
      then
       begin
        //VT du sommet du cube
        vtSommet:= invNorm(1.0-proba);

        //VT pondr
        vtInterpolated:= vtInterpolated+poids*vtSommet;
       end;

    end;

 //and then...
 if no_error
  then result:= vtInterpolated
  else result:= ERR_VT_HG;
end;

//new -- 03/07/2004 -- utilisation de l'approximation normale
function VTConfApproxNormale(size: integer; info: TMRAssocInfo): double;
var rapport: double;
    n_a,n_c,n_ac: double;
    esp,variance: double;
    z: double;
begin
 //rapport entre effectifs
 rapport:= (1.0*size)/(1.0*info.n);
 n_a:= rapport*info.nA;
 n_c:= rapport*info.nC;
 n_ac:= rapport*info.nAC;
 //esprance--variance
 esp:= n_a*(n_c/(1.0*size));
 variance:= n_a*(n_c/(1.0*size))*(1.0-(n_c/(1.0*size)));
 //z-value
 z:= ((n_ac-0.5)-esp)/sqrt(variance);
 //v-test approxime par la normale
 result:= z;
end;

//new -- 05/06/2005 -- utilisation de l'approximation normale pour les contre-exemples binomiale
function VTBinContrexApproxNormale(size: integer; info: TMRAssocInfo): double;
var rapport: double;
    n_a,n_not_c,n_a_not_c: double;
    x,p,esp,variance: double;
    z: double;
begin
 //rapport entre effectifs
 rapport:= (1.0*size)/(1.0*info.n);
 n_a:= rapport*info.nA;
 n_not_c:= rapport*info.nNotC;
 n_a_not_c:= rapport*info.nANotC;
 //
 x:= n_a_not_c;
 p:= n_a*n_not_c/(1.0*size*size);
 esp:= 1.0*size*p;
 variance:= 1.0*size*p*(1.0-p);
 z:= ((x-0.5)-esp)/sqrt(variance);
 //
 result:= -1.0*z;
end;

//new -- 05/06/2005 -- approx. normale pour la valeur test en HyperGeo
function VTHGApproxNormale(size: integer; info: TMRAssocInfo): double;
var rapport: double;
    n,n_a,n_c,n_a_c: double;
    x,esp,variance: double;
    z: double;
begin
 n:= 1.0*size;
 //rapport entre effectifs
 rapport:= (1.0*size)/(1.0*info.n);
 n_a:= rapport*info.nA;
 n_c:= rapport*info.nC;
 n_a_c:= rapport*info.nAC;
 //
 x:= n_a_c;
 esp:= n_a*n_c/n;
 variance:= (n_a*(n-n_a)/n)*(n_c*(n-n_c)/n)/(n-1.0);
 //
 z:= ((x-0.5)-esp)/sqrt(variance);
 //
 result:= z;
end;



{************************************************************************}


{ TGenCompAssocAPrioriMR }

procedure TGenCompAssocAPrioriMR.GenCompInitializations;
begin
 FMLComp:= mlcAssociation;
end;

function TGenCompAssocAPrioriMR.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompAssocAPrioriMR;
end;

{ TMLCompAssocAPrioriMR }

function TMLCompAssocAPrioriMR.getClassOperator: TClassOperator;
begin
 result:= TOpAssocAPrioriMR;
end;

function TMLCompAssocAPrioriMR.getTXTResultsDescription: string;
var tmpStr: string;
begin
 //paramtres
 tmpStr:= (Operator.PrmOp as TOpPrmAssocAPrioriMR).getTXTParameters;
 //rsultats
 tmpStr:= tmpStr+(Operator as TOpAssocAPrioriMR).getTXTResultsSummary();
 //zoo...
 result:= tmpStr;
end;

{ TOpAssocAPrioriMR }

procedure TOpAssocAPrioriMR.ApplyRulesOnTestExamples(var lstInfos: TObjectList; examples: TExamples);
var temps: cardinal;
begin
 temps:= GetTickCount();
 TraceLog.WriteToLogFile('[ASSOC MR] -- begin apply rules');
 //bon passer en revue chacune des rgles de la base de rgles
 if assigned(lstInfos) then lstInfos.Free;
 FTestInfos:= TObjectList.Create(TRUE);
 //appliquer les rgles
 (self.CalcAssocRule as TCalcAssocAPrioriMR).ApplyRulesOn(lstInfos,examples);
 //and then...
 temps:= GetTickCount()-temps;
 TraceLog.WriteToLogFile(format('[ASSOC MR] -- end apply rules in %d ms.',[temps]));
end;

procedure TOpAssocAPrioriMR.BootstrapAllRulesEvaluation(examples: TExamples);
var rule: TAssocRuleStructureMR;
    info: TMRAssocInfo;
    i: integer;
begin
 //pour chaque rgle
 for i:= 0 to pred(self.CalcAssocRule.rules.count) do
  begin
   //la rgle courante
   rule:= self.CalcAssocRule.rules.getRule(i) as TAssocRuleStructureMR;
   //info courante
   info:= rule.appInfos;
   //valuation bootstrap
   self.BootstrapOneRuleEvaluation(info,rule,examples);
  end;
end;

procedure TOpAssocAPrioriMR.BootstrapOneRuleEvaluation(info: TMRAssocInfo;
  rule: TAssocRuleStructure; examples: TExamples);
var curRule: TAssocRuleStructureMR;
    curInfo: TMRAssocInfo;
    curExamples: TExamples;
    bRep,example: integer;
    vt: TEnumVTComputed;
    tabVT: array[TEnumVTComputed] of double;
    nbOkVT: array[TEnumVTComputed] of double;
begin
 //initialisation
 for vt:= low(TEnumVTComputed) to high(TEnumVTComputed) do
  begin
   tabVT[vt]:= 0.0;
   nbOkVT[vt]:= 0.0;
  end;
 //RUN...
 TRY
 curRule:= rule as TAssocRuleStructureMR;
 curExamples:= TExamples.Create((self.PrmOp as TOpPrmAssocAPrioriMR).MCSizeSample);
 //NB_REPETITION_BOOTSTRAP rptitions de taille SIZE_SAMPLE_BOOTSTRAP (=100 pour matcher avec les VT100)
 for bRep:= 1 to (self.PrmOp as TOpPrmAssocAPrioriMR).NbMCRepetition do
  begin
   //renouveller l'chantillon -- tirage avec remise pour l'instant
   for example:= 1 to (self.PrmOp as TOpPrmAssocAPrioriMR).MCSizeSample do
    curExamples.Number[example]:= examples.Number[1+random(examples.Size)];
   //valuer les infos
   curInfo:= TMRAssocInfo.Create();
   curRule.ApplyRuleOn(curInfo,curExamples);
   //rcuprer les VT
   for vt:= low(TEnumVTComputed) to high(TEnumVTComputed) do
    begin
     if (curInfo.VTInfos[vt].vtFull>(ERR_VT_BIN+1.0)) AND (curInfo.VTInfos[vt].vtFull<(MAX_VT_BIN-1.0))
      then
       begin
        tabVT[vt]:= tabVT[vt]+curInfo.VTInfos[vt].vtFull;
        nbOkVT[vt]:= nbOkVT[vt]+1.0;
       end;
    end;
   //vider le tout pas trs optimal mais simple
   curInfo.Free();
  end;
 curExamples.Free();
 //moyenne affecte  l'indicateur monte-carlo
 //*** remarque -- 05 janv 2006 **********************
 //on calcule bien la moyenne des valeurs tests
 //en non pas la valeur test des moyennes des p-values
 //***************************************************
 for vt:= low(TEnumVTComputed) to high(TEnumVTComputed) do
  begin
   if (nbOkVT[vt]>0)
    then info.VTInfos[vt].vtMC:= tabVT[vt]/nbOkVT[vt]
    //pas d'estimation possible
    else info.VTInfos[vt].vtMC:= 0.0;
  end;
 EXCEPT
  TraceLog.WriteToLogFile('!!! ERREUR !!! durant valuation bootstrap des rgles');
  for vt:= low(TEnumVTComputed) to high(TEnumVTComputed) do
   info.VTInfos[vt].vtMC:= ERR_VT_BIN;
 END;
end;

function TOpAssocAPrioriMR.CoreExecute: boolean;
begin
 if assigned(FCalcAssocRule)
  then FCalcAssocRule.Free;
 //instanciation
 self.InitializeCalcAssocRule();
 //apprentissage
 result:= TRUE;
 TRY
   //maintenant, il s'agit de subdiviser la base en deux parties...
   if assigned(FAppEx) then FAppEx.Free;
   if assigned(FTestEx) then FTestEx.Free;
   //subdivision alatoire
   FAppEx:= TExamples.Create(self.WorkData.Examples.Size);
   FTestEx:= TExamples.Create(self.WorkData.Examples.Size);
   //utiliser les valeurs standard de dpart afin de pouvoir reproduire les exps !
   self.WorkData.Examples.SamplingSplitting((self.PrmOp as TOpPrmAssocAPrioriMR).AppPortion,FAppEx,FTestEx,seedStandard);
   //lancer les calculs sur la partie apprentissage
   FCalcAssocRule.buildRules(FAppEx);
   //appliquer les rgles sur l'chantillon test
   if (FTestEx.Size>0)
    then self.ApplyRulesOnTestExamples(FTestInfos,FTestEx)
    else self.ApplyRulesOnTestExamples(FTestInfos,FAppEx);
   //new -- 05/08/2004 -- essayer le bootstrap pour valuer chaque rgle...
   self.BootstrapAllRulesEvaluation(FAppEx);
 EXCEPT
   result:= FALSE;
 END;
end;

function TOpAssocAPrioriMR.getClassAssocRule: TClassCalcAssocRule;
begin
 result:= TCalcAssocAPrioriMR;
end;

function TOpAssocAPrioriMR.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmAssocAPrioriMR;
end;

function TOpAssocAPrioriMR.getHTMLResultsSummary: string;
var tmp: string;
begin
 //caractristiques des chantillons
 tmp:= '<H3>Sample characteristics</H3>';
 tmp:= tmp+HTML_HEADER_TABLE_RESULT;
 tmp:= tmp+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan="2">Samples size</TH></TR>';
 tmp:= tmp+format('%s<TD>Training</TD><TD align="right">%d</TD></TR>',[HTML_TABLE_COLOR_DATA_GRAY,FAppEx.Size]);
 tmp:= tmp+format('%s<TD>Test</TD><TD align="right">%d</TD></TR>',[HTML_TABLE_COLOR_DATA_GRAY,FTestEx.Size]);
 tmp:= tmp+'</table>';
 //l'en-tte de l'analyse
 tmp:= tmp+inherited getHTMLResultsSummary();
 //la description des rgles
 TRY
 tmp:= tmp+self.getHTMLRulesDescription();
 EXCEPT
 tmp:= tmp+'<H2>Error on rules description generation</H2>';
 END;
 //and then...
 result:= tmp;
end;

function TOpAssocAPrioriMR.getHTMLRulesDescription(rules: TLstAssocRules): string;
var buf: TBufString;
    tmp: string;
    i: integer;
    rule: TAssocRuleStructureMR;
    infoTest: TMRAssocInfo;
begin
 //ajouter des informations additionnelles dans la description des rgles
 buf:= TBufString.Create();
 buf.BeginUpdate();
 //mettre le litre
 buf.AddStr(format('<H3>%s</H3>',['Rules evaluation']));
 //insrer la liste des rgles -- cf. document de A.M. (24/05/2004)
 buf.AddStr(HTML_HEADER_TABLE_RESULT);
 tmp:= HTML_TABLE_COLOR_HEADER_GRAY+
       '<TH>N</TH>'+ //1
       '<TH>Antcdent</TH><TH>Consquent</TH>'+  //2,3
       //apprentissage
       '<TH>n</TH><TH>n[A]</TH><TH>n[C]</TH><TH>n[A^C]</TH>'+ //4,5,6,7
       '<TH>Support</TH><TH>Confiance</TH><TH>Lift</TH>'+ //8,9,10
       '<TH>Leverage</TH><TH>Conviction</TH><TH>Surprise</TH>'+ //11,12,13
       '<TH>VT-Hyp 100</TH><TH>VT-Hyp Full</TH><TH>VT-Hyp MC</TH><TH>z (Hyp)</TH>'+ //HyperGeo
       '<TH>VT-Bin 100 (contre-ex.)</TH><TH>VT-Bin Full (contre-ex.)</TH><TH>VT-Bin MC (contre-ex.)</TH><TH>z (contre-ex.)</TH>'+ //Binomiale
       '<TH>VT-conf 100</TH><TH>VT-conf Full</TH><TH>VT-conf MC</TH><TH>z (conf)</TH>'+ //confiance
       //sparateur
       format('<TH %s>Test</TH>',[HTML_BGCOLOR_DATA_RED])+
       //test
       '<TH>n</TH><TH>n[A]</TH><TH>n[C]</TH><TH>n[A^C]</TH>'+ //4,5,6,7
       '<TH>Support</TH><TH>Confiance</TH><TH>Lift</TH>'+ //8,9,10
       '<TH>Leverage</TH><TH>Conviction</TH><TH>Surprise</TH>'+ //11,12,13
       '<TH>VT-Hyp 100</TH><TH>VT-Hyp Full</TH><TH>VT-Hyp MC</TH><TH>z (Hyp)</TH>'+ //HyperGeo
       '<TH>VT-Bin 100 (contre-ex.)</TH><TH>VT-Bin Full (contre-ex.)</TH><TH>VT-Bin MC (contre-ex.)</TH><TH>z (contre-ex.)</TH>'+ //Binomiale
       '<TH>VT-conf 100</TH><TH>VT-conf Full</TH><TH>VT-conf MC</TH><TH>z (conf)'+ //confiance
       //fin de ligne
       '</TR>';
 buf.AddStr(tmp);
 for i:= 0 to pred(rules.count) do
  begin
   //rcup
   rule:= rules.getRule(i) as TAssocRuleStructureMR;
   infoTest:= self.FTestInfos.Items[i] as TMRAssocInfo;
   //affichage
   tmp:= HTML_TABLE_COLOR_DATA_GRAY;
   //n de la rgle
   tmp:= tmp+format('<TD>%d</TD>',[succ(i)]);
   //2,3
   tmp:= tmp+format('%s',[rule.ShortDescriptionHTML]);
   //les indicateurs de qualit de la rgle sur le fichier apprentissage
   tmp:= tmp+rule.appInfos.getHTMLDescription();
   //sparateur
   tmp:= tmp+format('<TD %s align="center">=></TD>',[HTML_BGCOLOR_DATA_RED]);
   //les indicateurs de qualit sur chantillon test
   tmp:= tmp+infoTest.getHTMLDescription();
   //and then...
   tmp:= tmp+'</TR>';
   buf.AddStr(tmp);
  end;
 buf.AddStr('</TABLE>');
 //and then...
 buf.EndUpdate();
 result:= buf.BufS;
 buf.Free;
end;

function TOpAssocAPrioriMR.getHTMLRulesDescription: string;
begin
 result:= self.getHTMLRulesDescription(self.CalcAssocRule.rules);
end;

function TOpAssocAPrioriMR.getNewForm: TForm;
begin
 result:= TfrmOpViewMRAssocRuleViewer.CreateFromOperator(self);
end;

function TOpAssocAPrioriMR.getTXTResultsSummary: string;
var tmp: string;
begin
 //caractristiques des chantillons
 tmp:= 'Sample characteristics;Samples size'+STR_CRLF;
 tmp:= tmp+format('Training;%d',[FAppEx.Size])+STR_CRLF;
 tmp:= tmp+format('Test;%d',[FTestEx.Size])+STR_CRLF;
 //nombre de rgles
 tmp:= tmp+format('Number of rules;%d',[CalcAssocRule.Rules.Count])+STR_CRLF;
 //la description des rgles
 tmp:= tmp+self.getTXTRulesDescription();
 //and then...
 strUtil_ReplaceCharInString(CHAR_TMP_SEPARATOR,CHAR_TAB_SEPARATOR,tmp);
 result:= tmp;
end;

function TOpAssocAPrioriMR.getTXTRulesDescription(rules: TLstAssocRules): string;
var buf: TBufString;
    tmp: string;
    i: integer;
    rule: TAssocRuleStructureMR;
    infoTest: TMRAssocInfo;
begin
 //ajouter des informations additionnelles dans la description des rgles
 buf:= TBufString.Create();
 buf.BeginUpdate();
 //mettre le litre
 buf.AddStr(format('%s',['Rules evaluation'])+STR_CRLF);
 //insrer la liste des rgles -- cf. document de A.M. (24/05/2004)
 tmp:= 'N;'+ //1
       'Antcdent;Consquent;'+  //2,3
       //apprentissage
       'n;n[A];n[C];n[A^C];'+ //4,5,6,7
       'Support;Confiance;Lift;'+ //8,9,10
       'Leverage;Conviction;Surprise;'+ //11,12,13
       'VT-Hyp 100;VT-Hyp Full;VT-Hyp MC;z-value (Hyp);'+ //HyperGeo
       'VT-Bin 100 (contre-ex.);VT-Bin Full (contre-ex.);VT-Bin MC (contre-ex.);z-value (contre-ex.);'+ //Binomiale
       'VT-conf 100;VT-conf Full;VT-conf MC;z-value (VTConfFull);'+ //confiance
       //sparateur
       'Test;'+
       //test
       'n;n[A];n[C];n[A^C];'+ //4,5,6,7
       'Support;Confiance;Lift;'+ //8,9,10
       'Leverage;Conviction;Surprise;'+ //11,12,13
       'VT-Hyp 100;VT-Hyp Full;VT-Hyp MC;z-value (Hyp);'+ //HyperGeo
       'VT-Bin 100 (contre-ex.);VT-Bin Full (contre-ex.);VT-Bin MC (contre-ex.);z-value (contre-ex.);'+ //Binomiale
       'VT-conf 100;VT-conf Full;VT-conf MC;z-value (VTConfFull)'+ //confiance
       //fin de ligne
       STR_CRLF;
 strUtil_ReplaceCharInString(CHAR_TMP_SEPARATOR,CHAR_TAB_SEPARATOR,tmp);
 buf.AddStr(tmp);
 for i:= 0 to pred(rules.count) do
  begin
   //rcup
   rule:= rules.getRule(i) as TAssocRuleStructureMR;
   infoTest:= self.FTestInfos.Items[i] as TMRAssocInfo;
   //n de la rgle
   tmp:= format('%d;',[succ(i)]);
   //2,3
   tmp:= tmp+format('%s',[rule.ShortDescriptionTXT()]);
   //les indicateurs de qualit de la rgle sur le fichier apprentissage
   tmp:= tmp+rule.appInfos.getTXTDescription();
   //sparateur
   tmp:= tmp+format(';=>;',[HTML_BGCOLOR_DATA_RED]);
   //les indicateurs de qualit sur chantillon test
   tmp:= tmp+infoTest.getTXTDescription();
   //and then...
   tmp:= tmp+STR_CRLF;
   strUtil_ReplaceCharInString(CHAR_TMP_SEPARATOR,CHAR_TAB_SEPARATOR,tmp);
   buf.AddStr(tmp);
  end;
 buf.AddStr(STR_CRLF);
 //and then...
 buf.EndUpdate();
 result:= buf.BufS;
 buf.Free;
end;

function TOpAssocAPrioriMR.getTXTRulesDescription(): string;
begin
 result:= self.getTXTRulesDescription(self.CalcAssocRule.rules);
end;

{ TAssocRuleStructureMR }

procedure TAssocRuleStructureMR.ApplyRuleOn(infos: TMRAssocInfo; examples: TExamples);
var i: integer;
begin
 //pour chaque individu, calculer les indicateurs idoines
 for i:= 1 to examples.Size do
  self.ApplyRuleOnExample(infos,examples.Number[i]);
 //posts-calculs !!!
 infos.computeRatios();
end;

procedure TAssocRuleStructureMR.ApplyRuleOnExample(infos: TMRAssocInfo;
  example: integer);
var vA,vC: integer;
begin
 inc(infos.n);
 //couvert par l'antcdent
 vA:= self.CoveredBy(FAnte,example);
 infos.nA:= infos.na+vA;
 //couvert par le consquent
 vC:= self.CoveredBy(FCons,example);
 infos.nC:= infos.nC+vC;
 infos.nNotC:= infos.nNotC+(1-vC);
 //couvert par la rgle ?
 infos.nAC:= infos.nAC+ ORD((vA>0) and (vC>0));
 //nANotC
 infos.nANotC:= infos.nANotC+ORD((vA>0) and (vC=0));
end;

function TAssocRuleStructureMR.CoveredBy(premisse: TObjectList;
  example: integer): integer;
var i: integer;
    covered: boolean;
    item: TAssocItem;
begin
 covered:= TRUE;
 for i:= 0 to pred(premisse.Count) do
  begin
   item:= premisse.Items[i] as TAssocItem;
   //tester l'galit, en cascade
   covered:= covered and (item.Value = item.Attribute.dValue[example]); 
  end;
 result:= ORD(covered);
end;

constructor TAssocRuleStructureMR.Create(calcRule: TCalcRuleFromItemset);
begin
 appInfos:= TMRAssocInfo.Create();
 inherited Create(calcRule);
end;

destructor TAssocRuleStructureMR.Destroy;
begin
 appInfos.Free();
 inherited Destroy();
end;

procedure TAssocRuleStructureMR.RecupInfos(calcRule: TCalcRuleFromItemset);
var baNC: TBooleanArray;
begin
 inherited RecupInfos(calcRule);
 //informations supplmentaires  rcuprer -- cf. notations A.M.
 with appInfos do
  begin
   n:= trunc(calcRule.BaseSize);
   nA:= calcRule.BitAnte.Count;
   nC:= calcRule.BitCons.Count;
   nAC:= calcRule.curItemset.Support;
   nNotC:= n-nC;
   {A et Not(C) -- un peu plus complexe}
   //rcuprer les C
   baNC:= TBooleanArray.CreateFrom(calcRule.BitCons);
   //passer en ngatif -- utiliser l'oprateur NOT
   baNC.ToggleAll();
   //faire un ET avec l'antcdent
   baNC.AndArray(calcRule.BitAnte);
   //compter
   baNC.RefreshCount();
   nANotC:= baNC.Count;
   //librer
   bANC.Free;
   //calculer les ratios
   computeRatios();
  end;
end;

{ TCalcAssocAPrioriMR }

procedure TCalcAssocAPrioriMR.ApplyRulesOn(infos: TObjectList; examples: TExamples);
begin
 //simple relai
 (self.rules as TLstAssocRulesMR).ApplyRulesOn(infos,examples);
end;

function TCalcAssocAPrioriMR.classLstAssocRules: TClassLstAssocRules;
begin
 result:= TLstAssocRulesMR; 
end;

function TCalcAssocAPrioriMR.classRuleStructure: TClassAssocRuleStructure;
begin
 result:= TAssocRuleStructureMR;
end;

{ TLstAssocRulesMR }

procedure TLstAssocRulesMR.ApplyRulesOn(infos: TObjectList; examples: TExamples);
var r: integer;
    one_info: TMRAssocInfo;
begin
 //appliquer chaque rgle sur les donnes -- je crains le pire en termes de temps de calcul...
 for r:= 0 to pred(self.count) do
  begin
   one_info:= TMRAssocInfo.Create();
   TRY
   (self.getRule(r) as TAssocRuleStructureMR).ApplyRuleOn(one_info,examples);
   FINALLY
   //il faut que l'ajout se fasse quel que soit le rsultat, sinon catastrophe, les listes ne sont plus synchrones !!!
   infos.Add(one_info);
   END;
  end;
end;

function TLstAssocRulesMR.DescriptionHTML: string;
begin
 //inhiber
 result:= '';
end;

{ TMRAssocInfo }

procedure TMRAssocInfo.computeRatios;
var correction: double;
    effectif_reference: integer;
begin
 //si n est infrieur  EFFECTIF_ARTIFICIEL -- new. 23/06/2004
 if (n<EFFECTIF_ARTIFICIEL)
  then effectif_reference:= n
  else effectif_reference:= EFFECTIF_ARTIFICIEL;
 {les ratios et autres indicateurs  calculer}
 if (nANotC>0)
  then conviction:= 1.0*(nA*nNotC)/(n*nANotC)
  else conviction:= DEFAULT_VALUE_NOT_COMPUTED;
 //25/05/2004 -- leverage et surprise
 Leverage:= (nAC/(1.0*n))-(nA/(1.0*n))*(nC/(1.0*n));
 if (nC>0)
  then Surprise:= ((nAC/(1.0*n))-(nANotC/(1.0*n)))/(nC/(1.0*n))
  else Surprise:= DEFAULT_VALUE_NOT_COMPUTED;
 //correction d'effectifs
 correction:= 1.0*effectif_reference/(1.0*n);
 //** new 20/06/2004 ** appel des nouvelles fonctions d'interpolation
 VTInfos[vtBin].vt100:= interpoleBinomialeContrex(effectif_reference,correction*nAC,correction*nA,correction*nC);
 VTInfos[vtBin].vtFull:= interpoleBinomialeContrex(n,nAC,nA,nC);
 VTInfos[vtBin].vt_zValueFull:= VTBinContrexApproxNormale(n,self);
 //confiance -- new 29/06/2004
 VTInfos[vtConf].vt100:= interpoleBinomialeConfiance(effectif_reference,correction*nAC,correction*nA,correction*nC);
 VTInfos[vtConf].vtFull:= interpoleBinomialeConfiance(n,nAC,nA,nC);
 VTInfos[vtConf].vt_zValueFull:= VTConfApproxNormale(n,self);
 //loi hypergeo
 VTInfos[vtThg].vt100:= interpoleHypergeo(effectif_reference,correction*nAC,correction*nA,correction*nC);
 VTInfos[vtThg].vtFull:= interpoleHypergeo(n,nAC,nA,nC);
 VTInfos[vtThg].vt_zValueFull:= VTHGApproxNormale(n,self);
end;

function TMRAssocInfo.getHTMLDescription: string;
var tmp: string;
begin
 tmp:= '';
 //4,5,6,7
 tmp:= tmp+format('<TD>%d</TD><TD>%d</TD><TD>%d</TD><TD>%d</TD>',[n,nA,nC,nAC]);
 //8,9,10
 tmp:= tmp+format('<TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD>',[(1.0*nAC)/(1.0*n),(1.0*nAC)/(1.0*nA),(1.0*n*nAC)/(1.0*nA*nC)]);
 //11,12,13
 tmp:= tmp+format('<TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD>',[leverage,conviction,surprise]);
 //14,15
 tmp:= tmp+format('<TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD>',[VTInfos[vtThg].vt100,VTInfos[vtThg].vtFull,VTInfos[vtThg].vtMC,VTInfos[vtThg].vt_zValueFull]);
 //binomiale
 tmp:= tmp+format('<TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD>',[VTInfos[vtBin].vt100,VTInfos[vtBin].vtFull,VTInfos[vtBin].vtMC,VTInfos[vtBin].vt_zValueFull]);
 //confiance
 tmp:= tmp+format('<TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD><TD>%.4f</TD>',[VTInfos[vtConf].vt100,VTInfos[vtConf].vtFull,VTInfos[vtConf].vtMC,VTInfos[vtConf].vt_zValueFull]);
 //
 result:= tmp;
end;

function TMRAssocInfo.getTXTDescription: string;
var tmp: string;
begin
 tmp:= '';
 //4,5,6,7
 tmp:= tmp+format('%d;%d;%d;%d;',[n,nA,nC,nAC]);
 //8,9,10
 tmp:= tmp+format('%.4f;%.4f;%.4f;',[(1.0*nAC)/(1.0*n),(1.0*nAC)/(1.0*nA),(1.0*n*nAC)/(1.0*nA*nC)]);
 //11,12,13
 tmp:= tmp+format('%.4f;%.4f;%.4f;',[leverage,conviction,surprise]);
 //14,15
 tmp:= tmp+format('%.4f;%.4f;%.4f;%.4f;',[VTInfos[vtThg].vt100,VTInfos[vtThg].vtFull,VTInfos[vtThg].vtMC,VTInfos[vtThg].vt_zValueFull]);
 //binomiale
 tmp:= tmp+format('%.4f;%.4f;%.4f;%.4f;',[VTInfos[vtBin].vt100,VTInfos[vtBin].vtFull,VTInfos[vtBin].vtMC,VTInfos[vtBin].vt_zValueFull]);
 //confiance
 tmp:= tmp+format('%.4f;%.4f;%.4f;%.4f',[VTInfos[vtConf].vt100,VTInfos[vtConf].vtFull,VTInfos[vtConf].vtMC,VTInfos[vtConf].vt_zValueFull]);
 //astuce suprme qui facilite l'criture
 strUtil_ReplaceCharInString(CHAR_TMP_SEPARATOR,CHAR_TAB_SEPARATOR,tmp);
 //and then....
 result:= tmp;
end;

{ TOpPrmAssocAPrioriMR }

function TOpPrmAssocAPrioriMR.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmAssociationRuleMR.CreateFromOpPrm(self);
end;

function TOpPrmAssocAPrioriMR.getCoreHTMLParameters: string;
var s: string;
begin
 s:= '';
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>A-Priori parameters</TH></TR>';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Support min</TD><TD align="right">%.2f</TD></TR>',[MinSupport]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Confidence min</TD><TD align="right">%.2f</TD></TR>',[MinConfidence]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max rule length</TD><TD align="right">%d</TD></TR>',[MaxRuleLength]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Lift filtering</TD><TD align="right">%.2f</TD></TR>',[MinLift]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Learning set ratio</TD><TD align="right">%.2f</TD></TR>',[FAppPortion]);
 result:= s;
end;

function TOpPrmAssocAPrioriMR.getCoreTXTParameters: string;
var s: string;
begin
 s:= '';
 s:= s+format('Support min;%.2f',[MinSupport])+STR_CRLF;
 s:= s+format('Confidence min;%.2f',[MinConfidence])+STR_CRLF;
 s:= s+format('Max rule length;%d',[MaxRuleLength])+STR_CRLF;
 s:= s+format('Lift filtering;%.2f',[MinLift])+STR_CRLF;
 s:= s+format('Learning set ratio;%.2f',[FAppPortion])+STR_CRLF;
 result:= s;
end;

function TOpPrmAssocAPrioriMR.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+self.getCoreHTMLParameters();
 s:= s+'</table>';
 result:= s;
end;

function TOpPrmAssocAPrioriMR.getTXTParameters: string;
var s: string;
begin
 s:= 'A-Priori parameters'+STR_CRLF;
 s:= s+self.getCoreTXTParameters();
 strUtil_ReplaceCharInString(CHAR_TMP_SEPARATOR,CHAR_TAB_SEPARATOR,s);
 result:= s;
end;

procedure TOpPrmAssocAPrioriMR.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FAppPortion:= prmINI.ReadFloat(prmSection,'app_portion',FAppPortion);
end;

procedure TOpPrmAssocAPrioriMR.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FAppPortion,sizeof(FAppPortion));
end;

procedure TOpPrmAssocAPrioriMR.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteFloat(prmSection,'app_portion',FAppPortion);
end;

procedure TOpPrmAssocAPrioriMR.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FAppPortion,sizeof(FAppPortion));
end;

procedure TOpPrmAssocAPrioriMR.SetDefaultParameters;
begin
 inherited SetDefaultParameters();
 FAppPortion:= 1.0;
 FMCSizeSample:= 100;
 FNbMCRepetition:= 1;
end;

initialization
 Classes.RegisterClass(TGenCompAssocAPrioriMR);
end.
