(*******************************************************************)
(* UCalcRegLinMultiple.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(*******************************************************************)

{
@abstract(Unit de calcul de la rgression multiple -- Classe gnrique)
@author(Ricco)
@created(02/08/2005)

Rendre gnrique la classe de rgression multiple, l'objectif est de
pouvoir appeler cette classe de calcul dans des contextes diffrents, avec
un couplage le plus faible possible avec l'objet appellant.

L'objet appelant a donc maintenant la responsabilit de la prparation des donnes !

}
unit UCalcRegLinMultiple;

interface

USES
        UDatasetDefinition,
        UDatasetExamples,
        MATRIXMM;

TYPE
        {la classe de calcul effectif qui fait appel aux routines de REGRESS
        il faut surveiller attentivement les allocations dsallocations !!!}
        TCalcRegMultiple = class(TObject)
                           private
                           {Constante utilise}
                           FUseConst: Boolean;
                           {nombre d'individus}
                           NbExamples: integer;
                           floatNbExamples: double;
                           {nombre de variables}
                           FTrueNbVar: integer;//vrai
                           FApparentNbVar: integer;//additionn de la constante en cas de rgression avec constante
                           {attribut Y}
                           FEndogenous: TAttribute;
                           {attributs X}
                           FLstExogenous: TLstAttributes;
                           {XPX-1}
                           XPXM1: PManipMatMM;
                           {A : vecteur des coefficients}
                           A: PManipMatMM;
                           {vecteur des erreurs, peut tre trs utile mme aprs les calculs}
                           E: PManipMatMM;
                           {Carrs des erreurs}
                           SigmaC2: double;
                           {Ecart-types estims des coefficients}
                           SCA: PManipMatMM;
                           {SCR,SCE,SCT}
                           scr,sce,sct: double;
                           {carrs moyens}
                           mce,mcr: double;
                           {R2, R2-Ajust,F,proba-F}
                           R2,R2Bar,F,ProbaF: double;
                           protected
                           {excution proprement dite -- portage de la procdure MCO}
                           function  CoreExecute(): boolean;
                           {prparer les matrices de rsultats}
                           procedure PrepareResultMatrix();
                           {prparer les matrices de calcul}
                           function  PrepareDataMatrix(prmExamples: TExamples): boolean;
                           public
                           {Y}
                           Y: PManipMatMM;
                           {X}
                           X: PManipMatMM;
                           {crer l'oprateur de calcul}
                           constructor create(prmY: TAttribute; prmX: TLstAttributes; prmConstant: boolean = TRUE);
                           {tout dtruire}
                           destructor  destroy; override;
                           {lancer l'excution}
                           function    execute(prmExamples: TExamples): boolean;
                           {effectuer la prdiction et mesurer l'erreur}
                           procedure Predict(prmPred, prmErr: TAttribute);
                           {les rsultats au format HTML en reprenant le code de writeoutputfile}
                           function    GetHTMLResult(): string;
                           {properties}
                           property vecErreur: PManipMatMM read E;
                           end;


implementation

USES
        Windows, Sysutils, ULogFile, UStringAddBuffered,
        UConstConfiguration, FMath;

const
        {prcision de calcul}
        EPSILON_REGRESS_MCO = 1.0e-8;

{ TCalcRegMultiple }

function TCalcRegMultiple.CoreExecute(): boolean;
var XP,XPX,XPXM1XP,YA,EP,SC2: PManipMatMM;
    i,j: integer;
    //tps: cardinal;
begin
 TRY
   result:= TRUE;
   TRY
   //si deux appels conscutifs (a peut tre possible)
   if assigned(XPXM1) then FreeAndNil(XPXM1);
   if assigned(E) then FreeAndNil(E);
   if assigned(A) then FreeAndNil(A);
   if assigned(SCA) then FreeAndNil(SCA);
   //transposition
   TranspositionMM(X,XP);
   //matrice de variance-covariance X'X
   MultiplicationMM(XP,X,XPX);
   //inversion (X'X)-1
   if assigned(XPXM1) then XPXM1.Free();
   //tps:= GetTickCount();
   if (InversionMM(XPX,XPXM1)<>ErrMM_NoError)
    then Raise Exception.Create('SINGULAR MATRIX');
   //tps:= GetTickCount()-tps;
   //TraceLog.WriteToLogFile(format('[DUREE MATRIX INVERSION] :: %d ms',[tps]));
   //((X'X)-1)X'
   MultiplicationMM(XPXM1,XP,XPXM1XP);
   //coefficients
   if assigned(A) then A.Free();
   MultiplicationMM(XPXM1XP,Y,A);
   //projection
   MultiplicationMM(X,A,YA);
   //vecteur des erreurs
   SoustractionMM(Y,YA,E);
   //transposition du vecteur des erreurs
   TranspositionMM(E,EP);
   //calcul du carrs des rsidus
   MultiplicationMM(EP,E,SC2);
   SigmaC2:= SC2.Valeur[1,1]/(floatNbExamples-1.0*FApparentNbVar);
   If (SigmaC2<EPSILON_REGRESS_MCO)
    then Raise Exception.Create('SIGMA EPSILONESQUE');
   //matrice des ecart-types estims des coefficients
   if assigned(SCA) then SCA.Free();
   CopieMM(XPXM1,SCA);
   for i:= 1 to SCA.MatMM.Lig do
    for j:= 1 to SCA.MatMM.Col do
     SCA.Valeur[i,j]:= SigmaC2*SCA.Valeur[i,j];
   //somme des carrs totaux
   sct:= SCEMM(Y,1);
   EXCEPT
   result:= FALSE;
   END;
 FINALLY
   if assigned(XP) then FreeAndNil(XP);
   if assigned(XPX) then FreeAndNil(XPX);
   if assigned(XPXM1XP) then FreeAndNil(XPXM1XP);
   if assigned(YA) then FreeAndNil(YA);
   //if assigned(E) then FreeAndNil(E); //!\ on peut en avoir besoin par la suite !
   if assigned(EP) then FreeAndNil(EP);
   if assigned(SC2) then FreeAndNil(SC2);
   //supprimer aussi les variables inutiles maintenant telles que X,X' etc.
   //if assigned(Y) then FreeAndNil(Y);
   //if assigned(X) then FreeAndNil(X);
   if assigned(XPXM1) then FreeAndNil(XPXM1);
 END;
end;

constructor TCalcRegMultiple.create(prmY: TAttribute; prmX: TLstAttributes; prmConstant: boolean = TRUE);
begin
 inherited Create();
 FEndogenous:= prmY;
 FLstExogenous:= prmX;
 FUseConst:= prmConstant;
end;

destructor TCalcRegMultiple.destroy;
begin
  if assigned(XPXM1) then FreeAndNil(XPXM1);
  if assigned(E) then FreeAndNil(E);
  //et ceux qui sont toujours gards
  if assigned(A) then FreeAndNil(A);
  if assigned(SCA) then FreeAndNil(SCA);
  if assigned(Y) then FreeAndNil(Y);
  if assigned(X) then FreeAndNil(X);
  //suite normale
  inherited Destroy;
end;

function TCalcRegMultiple.execute(prmExamples: TExamples): boolean;
var ok: boolean;
begin
 //***********************************************************************
 //!\ attention, cette mthode est fortement dpendante de cette condition
 if (prmExamples <> NIL)
  //si les individus passs, on prpare les matrices internes
  then ok:= self.PrepareDataMatrix(prmExamples)
  //sinon, on considre que le programmeur a cr manuellement les matrices Y et X !!!
  else ok:= TRUE;
 //***********************************************************************
 //ok, les matrices Y et X sont prtes
 if ok
  then
   begin
     //rcuprer quelques paramtres
     FApparentNbVar:= X.MatMM.Col;
     if FUseConst
      then FTrueNbVar:= pred(FApparentNbVar)
      else FTrueNbVar:= FApparentNbVar;
     NbExamples:= Y.MatMM.Lig;
     floatNbExamples:= 1.0*nbExamples;
     //lancer les calculs des coefficients
     ok:= self.CoreExecute();
     //
     if ok
      then
       begin
        TRY
        //construire les matrices de rsultats
        self.PrepareResultMatrix();
        EXCEPT
        ok:= FALSE;
        END;
       end;
   end;
 //and then...
 result:= ok;
end;

function TCalcRegMultiple.GetHTMLResult: string;
var j: integer;
    buf: TBufString;
    sigma_a,t,probaT: double;
begin
 //si pas de descriptif des variables, c'est inutile
 if assigned(FEndogenous) and assigned(FLstExogenous)
  then
   begin
   buf:= TBufString.Create();
   //*** dbut des ajouts ***
   buf.BeginUpdate();
   //rsultats gnraux
   buf.AddStr('<H3>Global results</H3>');
   buf.AddStr(HTML_HEADER_TABLE_RESULT);
   buf.AddStr(format('%s<TD width="150">Endogenous attribute</TD><TD align="right" width="150"><B>%s</B></TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,FEndogenous.Name]));
   buf.AddStr(format('%s<TD>Examples</TD><TD align="right">%d</TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,NbExamples]));
   buf.AddStr(format('%s<TD>R</TD><TD align="right">%.6f</TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,R2]));
   buf.AddStr(format('%s<TD>Adjusted-R</TD><TD align="right">%.6f</TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,R2Bar]));
   buf.AddStr(format('%s<TD>Sigma error</TD><TD align="right">%.6f</TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,SQRT(SigmaC2)]));
   buf.AddStr(format('%s<TD>F-Test (%d,%d)</TD><TD align="right">%.4f (%.6f)</TD></TR>',[HTML_TABLE_COLOR_DATA_BLUE,FTrueNbVar,NbExamples-FApparentNbVar,F,ProbaF]));
   buf.AddStr('</table>');
   //tableau d'analyse de variance
   buf.AddStr('<H3>Analysis of variance</H3>');
    buf.AddStr(HTML_HEADER_TABLE_RESULT);
    buf.AddStr(format('%s<TH width="75">Source</TH><TH width="75">xSS</TH><TH width="75">d.f.</TH><TH width="75">xMS</TH><TH width="75">F</TH><TH width="75">p-value</TH></TR>',
                      [HTML_TABLE_COLOR_HEADER_GRAY]));
    buf.AddStr(format('%s<TD>%s</TD><TD align="right">%.4f</TD><TD align="right">%d</TD><TD align="right">%.4f</TD><TD align="right">%.4f</TD><TD  align="right">%.4f</TD></TR>',
                      [HTML_TABLE_COLOR_DATA_GRAY,'Regression',sce,FTrueNbVar,mce,F,ProbaF]));
    buf.AddStr(format('%s<TD>%s</TD><TD align="right">%.4f</TD><TD align="right">%d</TD><TD align="right">%.4f</TD><TD align="right"></TD><TD align="right"></TD><TD> </TD><TD> </TD></TR>',
                      [HTML_TABLE_COLOR_DATA_GRAY,'Residual',scr,NbExamples-FApparentNbVar,mcr]));
    buf.AddStr(format('%s<TD>%s</TD><TD align="right">%.4f</TD><TD align="right">%d</TD><TD align="right"></TD><TD align="right"></TD><TD align="right"></TD><TD> </TD><TD> </TD><TD> </TD></TR>',
                      [HTML_TABLE_COLOR_DATA_GRAY,'Total',sct,NbExamples-FApparentNbVar+FTrueNbVar]));
    buf.AddStr('</table>');
   //tableau des coefficients
   buf.AddStr('<H3>Coefficients</H3>');
    buf.AddStr(HTML_HEADER_TABLE_RESULT);
    buf.AddStr(format('%s<TH width="150">Attribute</TH><TH width="75">Coef.</TH><TH width="75">std</TH><TH width="75">t(%d)</TH><TH width="75">p-value</TH></TR>',
                      [HTML_TABLE_COLOR_HEADER_GRAY,NbExamples-FApparentNbVar]));
    //la constante si demande
    if FUseConst
     then
      begin
        sigma_a:= SQRT(SCA.Valeur[FApparentNbVar,FApparentNbVar]);
        t:= A.Valeur[FApparentNbVar,1]/sigma_a;
        probaT:= PStudent(NbExamples-FApparentNbVar,t);//test bilatral chez J.DEBORD
        buf.AddStr(format('%s<TD>Constant</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD></TR>',
                           [HTML_TABLE_COLOR_DATA_GRAY,A.Valeur[FApparentNbVar,1],sigma_a,t,probaT]));
      end;
    //les coefficients
    for j:= 1 to FTrueNbVar do
     begin
      sigma_a:= SQRT(SCA.Valeur[j,j]);
      t:= A.Valeur[j,1]/sigma_a;
      probaT:= PStudent(NbExamples-FApparentNbVar,t);//test bilatral
      buf.AddStr(format('%s<TD>%s</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD><TD align="right">%.6f</TD></TR>',
                        [HTML_TABLE_COLOR_DATA_GRAY,FLstExogenous.Attribute[pred(j)].Name,A.Valeur[j,1],sigma_a,t,probaT]));

     end;

    buf.AddStr('</table>');

   //*** fin des ajouts ***
   buf.EndUpdate();
   //bien sr
   result:= buf.BufS;
   buf.Free();
  end
 else result:= '';
end;

procedure TCalcRegMultiple.Predict(prmPred, prmErr: TAttribute);
var i,j: integer;
    value: double;
begin
 //on a ce qu'il faut pour le faire ?
 if (FEndogenous <> nil) and (FLstExogenous <> nil)
  then
   begin
     //produit matriciel en ligne sur les variables
     for i:= 1 to Fendogenous.Size do
      begin
       value:= 0.0;
       if FUseConst then value:= A.Valeur[FapparentNbVar,1];
       for j:= 1 to FLstExogenous.Count do
        value:= value+A.Valeur[j,1]*FLstExogenous.Attribute[pred(j)].cValue[i];
       prmPred.cValue[i]:= value;
       prmErr.cValue[i]:= Fendogenous.cValue[i]-value;
      end;
   end;
end;

procedure TCalcRegMultiple.PrepareResultMatrix;
begin
 //calculs...
 scr:= SigmaC2*(floatNbExamples-1.0*FApparentNbVar);//rsiduels
 sce:= sct-scr;//expliqus
 //carrs moyens
 if (FTrueNbVar > 0)
  then mce:= sce/(1.0*FTrueNbVar)
  else mce:= 0.0;
 mcr:= scr/(floatNbExamples-1.0*FApparentNbVar);
 //R2
 R2:= sce/sct;
 R2Bar:= 1.0-((floatNbExamples-1.0)/(floatNbExamples-1.0*FApparentNbVar)*(1.0-R2));
 //tests...
 if (FTrueNbVar > 0)
  then
   begin
     F:= (r2/(1.0*FTrueNbVar))/((1.0-r2)/(floatNbExamples-1.0*FApparentNbVar));
     ProbaF:= 1.0-FSnedecor(FTrueNbVar,NbExamples-FApparentNbVar,F);
   end
  else
   begin
    F:= 0.0;
    ProbaF:= 1.0;
   end;
end;

function TCalcRegMultiple.PrepareDataMatrix(prmExamples: TExamples): boolean;
var i,j: integer;
    att: TAttribute;
begin
 //init.
 result:= TRUE;
 TRY
 //nombre d'individus
 nbExamples:= prmExamples.Size;
 //nombre de variables "vraies"
 FTrueNbVar:= FLstExogenous.Count;
 if FUseConst
  then FApparentNbVar:= succ(FTrueNbVar)
  else FApparentNbVar:= FTrueNbVar;
 //remplir la var endogne Y
 Y:= PManipMatMM.Create(nbExamples,1);
 for i:= 1 to nbExamples do
  Y.Valeur[i,1]:= FEndogenous.cValue[prmExamples.Number[i]];
 //*** remplir par colonne la matrice de X ***
 X:= PManipMatMM.Create(NbExamples,FApparentNbVar);
 //pour les "vraies" variables
 for j:= 1 to FTrueNbVar do
  begin
   att:= FLstExogenous.Attribute[pred(j)];
   //parcourir les individus
   for i:= 1 to nbExamples do
    X.Valeur[i,j]:= att.cValue[prmExamples.Number[i]];
  end;
 //constante demande ?
 if FUseConst
  then
   begin
    for i:= 1 to NbExamples do
     X.Valeur[i,FApparentNbVar]:= 1.0;
   end;
 EXCEPT
 result:= false;
 END;
end;



end.
