(******************************************************************)
(* UCompRegForwardSel.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(******************************************************************)

{
@abstract(Regression multiple avec slection FORWARD des variables)
@author(Ricco)
@created(02/08/2005)

En relation avec le cours d'conomtrie (Licence IUP -- IDS), la slection FORWARD
est implmente conformment au descriptif du cours.

}
unit UCompRegForwardSel;

interface

USES
        Forms,Classes,IniFiles,
        UCompDefinition,
        UOperatorDefinition,
        UCompRegDefinition,
        UDatasetDefinition,
        UCalcStatDesContinuousPlus,
        UCalcRegLinMultiple,
        UCompRegMultiple,
        MatrixMM;

TYPE
    //gnrateur
    TMLGenRegForward = class(TMLGenComp)
                      protected
                      procedure   GenCompInitializations(); override;
                      public
                      function    GetClassMLComponent: TClassMLComponent; override;
                      end;

    //composant
    TMLCompRegForward = class(TMLCompRegression)
                        protected
                        function    getClassOperator: TClassOperator; override;
                        function    getGenericAttName(): string; override;
                        end;

    //information sur chaque corrlation partielle
    TInfoPartialCorr = record
                       //corrlation
                       correlation: single;
                       //t
                       t_square: single;
                       //p-value
                       p_value: single;
                       end; 

    //oprateur -- la rgression est toujours une rgression avec constante
    TOpRegForward = class(TOpRegMultiple)
                    private
                    //nombre d'observations ayant servi  la rgression
                    FNbExamples: integer;
                    //nombre de variables slectionnes
                    FNbSelVar: integer;
                    //liste originelles des variables exognes
                    FLstOrigExogenous: TLstAttributes;
                    //tableau des informations pour chaque tape de calcul
                    FArrayStep: array of array of TInfoPartialCorr;
                    //tableau de numro de la var. slectionne  l'tape j
                    FArraySelVar: array of integer;
                    //tableau des R
                    FArrayR2: array of double;
                    protected
                    function    getClassParameter: TClassOperatorParameter; override;
                    public
                    {dfinir une liste d'exognes vide}
                    constructor Create(AOwner: TObject); override;
                    {dtruire la liste d'exogne}
                    destructor  Destroy; override;
                    {excution effective}
                    function    CoreExecute(): boolean; override;
                    {rsum des rsultats}
                    function    getHTMLResultsSummary(): string; override;
                    end;

    //paramtrage
    TPrmOpRegForward = class(TOpPrmRegMultiple)
                       private
                       //niveau de signification dans la slection
                       FSigLevel: double;
                       protected
                       procedure   SetDefaultParameters(); override;
                       function    CreateDlgParameters(): TForm; override;
                       public
                       function    getHTMLParameters(): string; override;
                       procedure   LoadFromStream(prmStream: TStream); override;
                       procedure   SaveToStream(prmStream: TStream); override;
                       procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                       procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                       property    SigLevel: double read FSigLevel write FSigLevel;
                       end;


implementation

USES
    Math,
    FMath,
    Sysutils,
    UDatasetImplementation, UConstConfiguration, UDlgOpPrmRegForward;

{ TMLGenRegForward }

procedure TMLGenRegForward.GenCompInitializations;
begin
 FMLComp:= mlcRegression;
end;

function TMLGenRegForward.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompRegForward; 
end;

{ TMLCompRegForward }

function TMLCompRegForward.getClassOperator: TClassOperator;
begin
 result:= TOpRegForward; 
end;

function TMLCompRegForward.getGenericAttName: string;
begin
 result:= 'fwdReg';
end;

{ TOpRegForward }

(*
La recopie systmatique des donnes dans les matrices est lourde certes mais assure une
encapsulation nette de la chose !
*)

function TOpRegForward.CoreExecute: boolean;
var
    //sig. level.
    sigLevel: double;
    //tableau des variables dj slectionnes
    tabSel: array of boolean;
    //nombre de variables de dpart
    nbAllVar: integer;
    //attribut  tester
    attX: TAttribute;
    //calculs courants
    calcY,calcX: TCalcRegMultiple;
    //id de variables
    j,jMax: integer;
    //valeur de la corrlation
    correlation: double;
    //valeur de t
    t2, t2Max: double;
    //p-value corresp.
    pvalue: double;
    //ddlCourant
    ddl: integer;
    //suite OK
    okEntered: boolean;
    //test erreur
    okProcess: boolean;
begin
 okProcess:= TRUE;
 TRY
 FNbExamples:= self.WorkData.Examples.Size; 
 //rcuprer le niveau de signification demand
 sigLevel:= (self.PrmOp as TPrmOpRegForward).FSigLevel;
 //rcuprer la liste des variables originelles
 FLstOrigExogenous.Assign(self.LstExogenous);
 nbAllVar:= FLstOrigExogenous.Count;
 //initialiser les tableaux de correlations -- a peut tre mchamment gourmand a !
 //c'est vraiment  but pdagogique, dans la pratique, il ne faut pas le conserver tout le long
 SetLength(FArrayStep,nbAllVar,nbAllVar);
 //initialisation du tableau d'indicateur de slection --> tous FALSE par dfaut
 SetLength(tabSel,nbAllVar);
 //tableau de numro de variable slectionne  l'tape j
 SetLength(FArraySelVar,nbAllVar);
 //tableau des R successifs de la rgression
 setLength(FArrayR2,nbAllVar);
 //vider la liste courante des rgresseurs
 FNbSelVar:= 0;
 LstExogenous.Clear();
 //itrer pour chaque variable
 REPEAT
  jMax:= -1;
  FArraySelVar[FNbSelVar]:= -1;
  //degr de libert  ce stade -- //!\ ATTENTION, tenir compte du nombre de variables slectionns
  ddl:= self.WorkData.Examples.Size - 2 - FNbSelVar;
  //calcul des rsidus sur Y
  calcY:= TCalcRegMultiple.create(Endogenous,LstExogenous,TRUE);
  if calcY.execute(self.WorkData.Examples)
   then
    begin
      //pour chaque X potentiel
      t2Max:= -1;
      for j:= 0 to pred(nbAllVar) do
       begin
        //pas encore slectionn ?
        if not(tabSel[j])
         then
          begin
           attX:= FLstOrigExogenous.Attribute[j];
           calcX:= TCalcRegMultiple.create(attX,LstExogenous,TRUE);
           if calcX.execute(self.WorkData.Examples)
            //calculer la corrlation entre les deux rsidus
            then correlation:= calcCorrelation(calcY.vecErreur,calcX.vecErreur,1)
            else correlation:= 0.0;
           //vider maintenant
           calcX.Free();
           //calculer le t-correspondant et son carr
           t2:= correlation/SQRT((1.0 - power(correlation,2.0))/(1.0 * ddl));
           t2:= t2 * t2;
           //et la p-value corresp. -- Loi de Fisher
           pvalue:= PSnedecor(1,ddl,t2);
           //rcuprer les infos
           FArrayStep[j,FNbSelVar].correlation:= correlation;
           FArrayStep[j,FNbSelVar].t_square:= t2;
           FArrayStep[j,FNbSelVar].p_value:= pvalue;
           //c'est le max ?
           if (t2 > t2Max)
            then
             begin
              t2Max:= t2;
              jMax:= j;
             end;
          end;
       end;
    end;
  //vider le calcul du rsidu de Y
  calcY.Free();
  //on a trouv un max ?
  okEntered:= FALSE;
  if (jMax >= 0)
   then
    begin
     //calculer le coef. de dtermination de la rgression
     if (FNbSelVar = 0)
      then FArrayR2[FNbSelVar]:= power(FArrayStep[jMax,FNbSelVar].correlation,2.0)
      else FArrayR2[FNbSelVar]:= 1.0 - (1.0 - FArrayR2[pred(FNbSelVar)]) * (1.0 - power(FArrayStep[jMax,FNbSelVar].correlation,2.0));
     //et s'il est significatif, l'ajouter dans la liste des slectionn
     if (FArrayStep[jMax,FnbSelVar].p_value <= sigLevel)
      then
       begin
         //l'ajouter parmi les slectionns
         tabSel[jMax]:= TRUE;
         FArraySelVar[FNbSelVar]:= jMax;
         //incrmenter le nombre de slectionns
         inc(FNbSelVar);
         LstExogenous.Add(FLstOrigExogenous.Attribute[jMax]);
         //et on est content
         okEntered:= TRUE;
       end;
    end;
 UNTIL not(okEntered) OR (FNbSelVar = nbAllVar);
 EXCEPT
 okProcess:= FALSE;
 END;
 //vider ce qui n'est plus utilis
 Finalize(tabSel);
 //si tout s'est bien pass, lancer la rgression finale
 if okProcess
  then okProcess:= inherited CoreExecute();
 //and then
 result:= okProcess;
end;

constructor TOpRegForward.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 FLstOrigExogenous:= TLstAttributes.Create(FALSE,Workdata.LstAtts[asAll].Size);
end;

destructor TOpRegForward.Destroy;
begin
 FLstOrigExogenous.Free();
 if (FArrayR2 <> nil) then Finalize(FArrayR2);
 if (FArraySelVar <> nil) then Finalize(FArraySelVar);
 if (FArrayStep <> nil) then Finalize(FArrayStep);
 inherited;
end;

function TOpRegForward.getClassParameter: TClassOperatorParameter;
begin
 result:= TPrmOpRegForward;
end;

function TOpRegForward.getHTMLResultsSummary: string;
var s: string;
    j,i: integer;
    nbAllVar,columns: integer;
begin
 nbAllVar:= FLstOrigExogenous.Count;
 columns := MIN(FNbSelVar+1,nbAllVar);  
 //appeler l'affichage des coeffs de la rgression finale
 s:= self.getCoefsRegressionHTML();
 //afficher le tableau du processus de slection
 s:= s+'<H3>Forward Selection Process</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT;
 //les tapes (en-tte)
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TD align="right">partial corr.<br>F (p-value)</TD>';
 for j:= 0 to pred(columns) do
  s:= s+format('<TD align="center">Step %d</TD>',[j+1]);
 s:= s+'</TR>';
 //les ddl.
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+'<TD>d.f.</TD>';
 for j:= 0 to pred(columns) do
  s:= s+format('<TD align="center">%d</TD>',[FNbExamples-2-j]);
 s:= s+'</TR>';
 //les corrlations partielles
 s:= s+HTML_TABLE_COLOR_DATA_GREEN+'<TD>r(Y,Xj*/Xj1,Xj2...)</TD>';
 for j:= 0 to pred(columns) do
  begin
   if (FArraySelVar[j] >= 0)
    then s:= s+format('<TD align="center">%s : %.4f</TD>',[FLstOrigExogenous.Attribute[FArraySelVar[j]].Name,FArrayStep[FArraySelVar[j],j].correlation])
    else s:= s+'<TD align="center">-</TD>';
  end;
 s:= s+'</TR>';
 //les R successifs
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+'<TD>R</TD>';
 for j:= 0 to pred(columns) do
  begin
   if (FArraySelVar[j] >= 0)
    then s:= s+format('<TD align="center">%.4f</TD>',[FArrayR2[j]])
    else s:= s+'<TD align="center">-</TD>';
  end;
 s:= s+'</TR>';
 //afficher maintenant les infos de corrlations partielles
 for i:= 0 to pred(nbAllVar) do
  begin
   s:= s+HTML_TABLE_COLOR_DATA_GRAY;
   //le nom de la variable
   s:= s+format('<TD %s>%s</TD>',[HTML_BGCOLOR_HEADER_GRAY,FLstOrigExogenous.Attribute[i].Name]);
   //pour chaque tape
   for j:= 0 to pred(columns) do
    begin
     if (FArraySelVar[j] <> i)
      then s:= s+format('<TD align="right">%.4f<br>%.2f (%.4f)</TD>',[FArrayStep[i,j].correlation,FArrayStep[i,j].t_square,FArrayStep[i,j].p_value])
      else s:= s+format('<TD align="right" %s>%.4f<br>%.2f (%.4f)</TD>',[HTML_BGCOLOR_DATA_GREEN,FArrayStep[i,j].correlation,FArrayStep[i,j].t_square,FArrayStep[i,j].p_value]);
    end;
   s:= s+'</TR>';
  end;
 //fin de la table
 s:= s+'</table>';
 //les rsidus
 s:= s+self.getResidualsHTML();
 //and then
 result:= s;
end;

{ TPrmOpRegForward }

function TPrmOpRegForward.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmRegForward.CreateFromOpPrm(self);
end;

function TPrmOpRegForward.getHTMLParameters: string;
var s: string;
begin
 s:= '<P><B>Regression parameters</B><BR>'+HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_DATA_BLUE+'<TD width=120>Include intercept</TD>';
 if Constant
  then s:= s+format('<TD width=50>%s</TD>',['yes'])
  else s:= s+format('<TD width=50>%s</TD>',['no']);
 s:= s+'</TR>'+HTML_TABLE_COLOR_DATA_BLUE+'<TD width=120>Sig. Level</TD>'+format('<TD>%.4f</TD></TR>',[FSigLevel]);
 s:= s+'</table>';
 result:= s;
end;

procedure TPrmOpRegForward.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 FSigLevel:= prmINI.ReadFloat(prmSection,'sigLevel',FSigLevel);
end;

procedure TPrmOpRegForward.LoadFromStream(prmStream: TStream);
begin
 inherited;
 prmStream.ReadBuffer(FSigLevel,sizeof(FSigLevel));
end;

procedure TPrmOpRegForward.SaveToINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 inherited;
 prmINI.WriteFloat(prmSection,'sigLevel',FSigLevel);
end;

procedure TPrmOpRegForward.SaveToStream(prmStream: TStream);
begin
 inherited;
 prmStream.WriteBuffer(FSigLevel,sizeof(FSigLevel));
end;

procedure TPrmOpRegForward.SetDefaultParameters;
begin
 //trs important, c'est notre hypothse de travail, ce paramtre n'est pas modifiable
 Constant:= TRUE;
 //modifiable par l'utilisateur
 FSigLevel:= 0.05;
end;

initialization
 RegisterClass(TMLGenRegForward);
end.
