(********************************************************************)
(* UCompFactAnalysisMCA.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(********************************************************************)

{
@abstract(Analyse (factorielle) des correspondances multiples)
@author(Ricco)
@created(12/01/2004)
}
unit UCompFactAnalysisMCA;

interface

USES
        Forms, Classes,
        IniFiles,
        UCompDefinition,
        UCompFADefinition,
        UOperatorDefinition,
        UCalcStatDes,
        UCalcStatDesCrossTab,
        Matrices;

TYPE
        {gnrateur de composant ACM}
        TMLGenCompFactMCA = class(TMLGenComp)
                            protected
                            procedure   GenCompInitializations(); override;
                            public
                            function    GetClassMLComponent: TClassMLComponent; override;
                            end;

        {composant ACM}
        TMLCompFactMCA = class(TMLCompFactAnalysis)
                         protected
                         function    getClassOperator: TClassOperator; override;
                         function    GetLogResultDescription(): string; override;
                         end;

        {oprateur ACM, on travaille sur les target}
        TOpFactMCA = class(TOpFactAnalysis)
                     private
                     {statistiques univaries sur les attributs}
                     FLstStatDiscrete: TLstCalcStatDesDiscrete;
                     {statistiques bivaries sur les couples d'attributs}
                     FLstStatCrossTab: TLstCalcStatDesCrossTab;
                     {Vecteur des distributions marginales des modalits}
                     FVecDistValues: PVector;
                     {vecteur des valeurs propres}
                     FEigVals: PVector;
                     {matrice des vecteurs propres}
                     FEigVecs: PMatrix;
                     {dimension globale, addition dn nombre des valeurs de l'ensemble des modalits}
                     FDimAllValues: integer;
                     {nombre de valeurs propres}
                     FNbEigens: integer;
                     {calculs stats desc prparation du tableau de BURT}
                     procedure   CalcStats();
                     {construire les val-vec propres}
                     procedure   CalcEigens();
                     {calculer les coordonnes des modalits sur les axes}
                     function    BuildCoordValues(): PMatrix;
                     {calculer les cos des modalits,  partir des coordonnes}
                     function    BuildCosValues(prmCoord: PMatrix): PMatrix;
                     {calculer les contributions des modalits}
                     function    BuildContribValues(prmCoord: PMatrix): PMatrix;
                     {effectuer les projections sur les axes des individus}
                     procedure   SetProjections();
                     protected
                     procedure   DelPrivateMatrix(); override;
                     function    getClassParameter: TClassOperatorParameter; override;
                     function    CoreExecute(): boolean; override;
                     function    CheckAttributes(): boolean; override;
                     public
                     constructor Create(AOwner: TObject); override;
                     destructor  Destroy; override;
                     function    getHTMLResultsSummary(): string; override;
                     end;

        {paramtre oprateur ACM}
        TOpPrmFactMCA = class(TOpPrmFactAnalysis)
                        private
                        {nombre d'axes  afficher}
                        FNbAxis: integer;
                        {seuil de la valeur test}
                        FThresoldValue: double;
                        protected
                        function    CreateDlgParameters(): TForm; override;
                        procedure   SetDefaultParameters(); override;
                        public
                        procedure   LoadFromStream(prmStream: TStream); override;
                        procedure   SaveToStream(prmStream: TStream); override;
                        procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                        procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                        function    getHTMLParameters(): string; override;
                        property    NbAxis: integer read FNbAxis write FNbAxis;
                        property    ThresoldValue: double read FThresoldValue write FThresoldValue;
                        end;


implementation

uses
        SysUtils, Math,
        UDatasetDefinition, UDatasetImplementation, UCalcMatrixToAttributes,
        UCalcDistribution, UCalcMatrixAdditionalFunctions, UConstConfiguration,
  UDlgOpPrmMCA, ULogFile, UStringAddBuffered, UStringsResources;

{ TMLGenCompFactCMA }

procedure TMLGenCompFactMCA.GenCompInitializations;
begin
 FMLComp:= mlcFactorialAnalysis;
 //FMLNumIcon:= 11;
 //FMLCompName:= str_comp_name_fact_mca;
 //FMLBitmapFileName:= 'MLFactorialMCA.bmp';
end;

function TMLGenCompFactMCA.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompFactMCA;
end;

{ TMLCompFactMCA }

function TMLCompFactMCA.getClassOperator: TClassOperator;
begin
 result:= TOpFactMCA;
end;

function TMLCompFactMCA.GetLogResultDescription: string;
var data: TMLDataset;
begin
 data:= (Operator as TOpFactMCA).WorkData;
 result:= Format('%d attributes, and %d examples',[data.LstAtts[asInput].Count,data.Examples.Size]);
end;

{ TOpPrmFactMCA }

function TOpPrmFactMCA.CreateDlgParameters: TForm;
begin
 result:= TDlgOpPrmMCA.CreateFromOpPrm(self);
end;

function TOpPrmFactMCA.getHTMLParameters: string;
var s,sPrm: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>MCA parameters</TH></TR>';
 if (self.NbAxis<0)
  then sPrm:= 'all'
  else sPrm:= 'limited';
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Generate axis</TD><TD align="right">%s</TD></TR>',[sPrm]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Number of axis</TD><TD align="right">%d</TD></TR>',[self.NbAxis]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Thresold value</TD><TD align="right">%.2f</TD></TR>',[self.ThresoldValue]);
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmFactMCA.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FNbAxis:= prmINI.ReadInteger(prmSection,'nb_axis',FNbAxis);
 FThresoldValue:= prmINI.ReadFloat(prmSection,'thresold_value',FThresoldValue);
end;

procedure TOpPrmFactMCA.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FNBAxis,sizeof(FNBAxis));
 prmStream.ReadBuffer(FThresoldValue,sizeof(FThresoldValue));
end;

procedure TOpPrmFactMCA.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'nb_axis',FNbAxis);
 prmINI.WriteFloat(prmSection,'thresold_value',FThresoldValue);
end;

procedure TOpPrmFactMCA.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FNBAxis,sizeof(FNBAxis));
 prmStream.WriteBuffer(FThresoldValue,sizeof(FThresoldValue));
end;

procedure TOpPrmFactMCA.SetDefaultParameters;
begin
 FNbAxis:= 5;
 FThresoldValue:= 4.0;
end;

{ TOpFactMCA }

function TOpFactMCA.BuildContribValues(prmCoord: PMatrix): PMatrix;
var i,j: integer;
    n,m: double;
    mat: PMatrix;
begin
 //transtypage pour calculs
 n:= 1.0*workdata.Examples.Size;
 m:= 1.0*workdata.LstAtts[asInput].Count;
 //matrice des contributions
 dimMatrix(mat,FDimAllValues,FNbEigens);
 for i:= 1 to FDimAllValues do
  for j:= 1 to FNbEigens do
   mat^[i]^[j]:= FVecDistValues^[i]*SQR(prmCoord^[i]^[j])/(n*m*FEigVals^[j]);
 //renvoyer le tout
 result:= mat;
end;

function TOpFactMCA.BuildCoordValues: PMatrix;
var i,j: integer;
    mat: PMatrix;
    n,m: double;

begin
 //transtypage
 n:= 1.0*workdata.Examples.Size;
 m:= 1.0*workdata.LstAtts[asInput].Count;
 //matrice des coordonnes
 dimMatrix(mat,FDimAllValues,FNbEigens);
 for i:= 1 to FDimAllValues do
  for j:= 1 to FNbEigens do
   begin
   //coordModa^[i]^[j]:= sqrt(1.0*ex.Size*lstAtt.Count/MATBURT^[i]^[i]*goodEVal^[j])*goodEVec^[i]^[j];//test...
   //TraceLog.WriteToLogFile(format('vectdist>>%.4f | eigvals >> %.4f | eigvec >> %.4f',[FVecDistValues^[i],FEigVals^[j],FEigVecs^[i]^[j]]));
   if (FEigVals^[j]>0)
    then mat^[i]^[j]:= SQRT(1.0*n*m/FVecDistValues^[i]*FEigVals^[j])*FEigVecs^[i]^[j]
    else mat^[i]^[j]:= 0.0;
   end;
 //renvoyer le tout
 result:= mat;
end;

function TOpFactMCA.BuildCosValues(prmCoord: PMatrix): PMatrix;
var i,j: integer;
    matCos: PMatrix;
    n: double;
begin
 n:= 1.0*workdata.Examples.Size;
 
 dimMatrix(matCos,FDimAllValues,FNbEigens);
 for i:= 1 to FDimAllValues do
  for j:= 1 to FNbEigens do
   if (FVecDistValues^[i]>0) and (n/FVecDistValues^[i]-1.0<>0)
    then matCos^[i]^[j]:= SQR(prmCoord^[i]^[j]/sqrt(1.0*n/FVecDistValues^[i]-1.0))
    else matCos^[i]^[j]:= 0.0;

 result:= matCos;
end;

procedure TOpFactMCA.CalcEigens;
var MatBURT,MatACM: PMatrix;
    tmpEVal: PVector;
    tmpEVec: PMatrix;
    i,j: integer;
begin
 //tableau de BURT
 MatBURT:= BuildBurtMat(FDimAllValues,Workdata.Examples,WorkData.LstAtts[asInput],FLstStatDiscrete,FLstStatCrossTab);
 //tableau  diagonaliser
 MatACM := BuildACMMat(FDimAllValues,WorkData.LstAtts[asInput].Count,Workdata.Examples.Size,MatBURT);
 //giclons la matrice de BURT
 delMatrix(MatBURT,FDimAllValues,FDimAllValues);
 //valeurs et vecteurs propres temporaires, avant giclation des lments triviaux
 GetEigensFromSymetricMatrix(MatACM,FDimAllValues,tmpEVal,tmpEVec);
 //giclons la matrice diagonalise
 delMatrix(MatACM,FDimAllValues,FDimAllValues);
 //rcuprer les valeurs propres intrssantes, et les vecteurs propres associes
 dimVector(FEigVals,FNbEigens);
 dimMatrix(FEigVecs,FDimAllValues,FNbEigens);
 for j:= 1 to FNbEigens do
  begin
   FEigVals^[j]:= tmpEVal^[succ(j)];
   for i:= 1 to FDimAllValues do
    FEigVecs^[i]^[j]:= tmpEVec^[i]^[succ(j)];
  end;
 //dtruire les eigens temporaires
 delVector(tmpEVal,FDimAllValues);
 delMatrix(tmpEVec,FDimAllValues,FDimAllValues);
end;

procedure TOpFactMCA.CalcStats;
var i,i_cumul,j: integer;
    stat: TCalcStatDesDiscrete;
begin
 FLstStatDiscrete.RebuildStatDes(WorkData.LstAtts[asInput],Workdata.Examples);
 FLstStatCrossTab.RebuildStatDes(WorkData.LstAtts[asInput],Workdata.Examples);
 //calculer galement qqs indicateurs ncessaires
 //le vecteur des distributions marginales, la dimension et la liste des modalits
 FDimAllValues:= 0;
 for i:= 0 to pred(FLstStatDiscrete.Count) do
  begin
   stat:= TCalcStatDesDiscrete(FLstStatDiscrete.Stat(i));
   inc(FDimAllValues,stat.Attribute.nbValues);
  end;
 DimVector(FVecDistValues,FDimAllValues);//ok, FDimValues contient la bonne valeur
 i_cumul:= 0;
 for i:= 1 to FLstStatDiscrete.Count do
  begin
   stat:= TCalcStatDesDiscrete(FLstStatDiscrete.Stat(pred(i)));
   for j:= 1 to stat.TabFreq.Size do
    begin
     inc(i_cumul);
     FVecDistValues^[i_cumul]:= stat.TabFreq.Value[j];
    end;
  end;
 //nombre de vecteurs propres  rcuprer rellement
 FNbEigens:= FDimAllValues-WorkData.LstAtts[asInput].Count;
end;

function TOpFactMCA.CheckAttributes: boolean;
begin
 result:= (self.WorkData.LstAtts[asInput].Count>0) and (self.WorkData.LstAtts[asInput].isAllCategory(caDiscrete));
end;

function TOpFactMCA.CoreExecute: boolean;
begin
 result:= TRUE;
 TRY
 //TraceLog.WriteToLogFile('[ACM] begin calc stat');
 self.CalcStats();
 //TraceLog.WriteToLogFile('[ACM] begin calc eigens');
 self.CalcEigens();
 TraceLog.WriteToLogFile('[ACM] begin set projections');
 self.SetProjections();
 EXCEPT
 self.DelPrivateMatrix();
 result:= FALSE;
 END;
end;

constructor TOpFactMCA.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 FLstStatDiscrete:= TLstCalcStatDesDiscrete.Create(NIL,NIL);
 FLstStatCrossTab:= TLstCalcStatDesCrossTab.Create(NIL,NIL);
end;

procedure TOpFactMCA.DelPrivateMatrix;
begin
 FLstStatDiscrete.FreeAll;
 FLstStatCrossTab.FreeAll;
 delVector(FVecDistValues,FDimAllValues);
 delVector(FEigVals,FNbEigens);
 delMatrix(FEigVecs,FDimAllValues,FNbEigens);
end;

destructor TOpFactMCA.Destroy;
begin
 inherited Destroy;
 FLstStatDiscrete.Free;
 FLstStatCrossTab.Free;
end;

function TOpFactMCA.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmFactMCA; 
end;

function TOpFactMCA.getHTMLResultsSummary: string;
var s: string;
    sum,tot,n,testValue,thresoldValue: double;
    i,j,k: integer;
    coordValues: PMatrix;
    cosValues: PMatrix;
    contribValues: PMatrix;
    att: TAttribute;
    nbAxis,numDim: integer;
    bs: TBufString;
begin
 bs:= TBufString.Create();
 bs.BeginUpdate();
 //seuil de la valeur test
 thresoldValue:= (PrmOp as TOpPrmFactMCA).ThresoldValue;
 //effectif de calcul
 n:= 1.0*workdata.Examples.Size;
 //rcupration du nombre d'axes  afficher
 (*
 if ((PrmOp as TOpPrmFactMCA).NbAxis<0)
  then nbAxis:= FNbEigens
  else nbAxis:= Min(FNbEigens,(PrmOp as TOpPrmFactMCA).NbAxis);
 *)
 //toujours limiter  5 sinon a devient trop lourd  charger dans le webbrowser
 nbAxis:= min(5,FNbEigens);
 //construction du tableau
 s:= '<P><H3>Eigen values</H3>';
 s:= s+HTML_HEADER_TABLE_RESULT+HTML_TABLE_COLOR_HEADER_GRAY+
       '<TH>Axis</TH><TH>Eigen value</TH><TH>% explained</TH><TH width="200">Histogram</TH><TH>% cumulated</TH></TR>';
 //inertie totale
 if (workdata.LstAtts[asInput].Count>1)
  then tot:= 1.0*FDimAllValues/(1.0*workdata.LstAtts[asInput].Count)-1.0
  else tot:= 0.0;
 sum:= 0.0;
 for i:= 1 to FNbEigens do
  begin
   sum:= sum+FEigVals^[i];
   if (tot>0)
    then s:= s+HTML_TABLE_COLOR_DATA_GRAY+
               format('<TH>%d</TH><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD><TD align=right>%.2f%s</TD><TD>%s</TD><TD align=right>%.2f%s</TD></TR>',
                      [i,FEigVals^[i],100.0*FEigVals^[i]/tot,'%',getHtmlHistogram(TRUNC(100.0*FEigVals^[i]/tot)),100.0*sum/tot,'%'])+'</TR>';
  end;
 s:= s+'</table>';
 bs.AddStr(s);
 
 //les indicateurs de qualit de reprsentation des modalits sur les axes
 coordValues:= self.BuildCoordValues();
 cosValues:= self.BuildCosValues(coordValues);
 contribValues:= self.BuildContribValues(coordValues);
 //affichage

 //ligne d'en-tte
 s:= '<P><H3>Values characterization</H3>';
 
 s:= s+HTML_HEADER_TABLE_RESULT+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+'<TH>Values</TH>'+format('<TH colspan=%d>Coordinate</TH>',[nbAxis])+
       format('<TH colspan=%d>Cos</TH>',[nbAxis])+format('<TH colspan=%d>Contrib</TH>',[nbAxis])+
       '</TR>';

 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH width=150>Attribute = Value</TH>';

 for j:= 1 to nbAxis do
  s:= s+'<TD>'+format('coord_%d',[j])+'</TD>';

 for j:= 1 to nbAxis do
  s:= s+'<TD>'+format('cos2_%d',[j])+'</TD>';

 for j:= 1 to nbAxis do
  s:= s+'<TD>'+format('contrib_%d',[j])+'</TD>';  
  
 s:= s+'</TR>';

 bs.AddStr(s);

 numDim:= 0;
 for i:= 1 to workdata.LstAtts[asInput].Count do
  begin
   //rassembler les infos pour un attribut
   //nom de variable
   att:= workdata.LstAtts[asInput].Attribute[pred(i)];
   //liste des modalits par variable
   s:= HTML_TABLE_COLOR_DATA_GRAY+'<TD>';
   s:= s+HTML_HEADER_TABLE_RESULT;
   for j:= 1 to att.nbValues do
    s:= s+HTML_TABLE_COLOR_DATA_BLUE+format('<TD align=left width=150>%s = %s</TD></TR>',[att.Name,att.LstValues.getDescription(j)]);
   s:= s+'</table>';
   s:= s+'</TD>';
   
   //coordonnes pour chaque axe
   for j:= 1 to nbAxis do
    begin
     s:= s+'<td>';
     s:= s+HTML_HEADER_TABLE_RESULT;
     //lister les lments  afficher : adresse -->> [numDim+k,j]
     for k:= 1 to att.nbValues do
      begin
       s:= s+HTML_TABLE_COLOR_DATA_GRAY;
       //calculer la valeur test
       try
       testValue:= 1.0*ABS(1.0*coordValues^[numDim+k]^[j])*SQRT(FVecDistValues^[numDim+k]*(n-1.0)/(n-FVecDistValues^[numDim+k]));
       Except
       testValue:= 0.0;
       end;
       //la ccordonne
       if (testValue>thresoldValue)
        then s:= s+format('<td align=right %s>%.3f</td>',[HTML_BGCOLOR_DATA_RED,coordValues^[numDim+k]^[j]])
        else s:= s+format('<td align=right %s>%.3f</td>',[HTML_BGCOLOR_DATA_GRAY,coordValues^[numDim+k]^[j]]);
       //passage  la ligne suivante
       s:= s+'</tr>';
      end;
     s:= s+'</table>';
     //passage  l'axe suivant
     s:= s+'</td>';
    end;

   //cos pour chaque axe
   for j:= 1 to nbAxis do
    begin
     s:= s+'<td>';
     s:= s+HTML_HEADER_TABLE_RESULT;
     //lister les lments  afficher : adresse -->> [numDim+k,j]
     for k:= 1 to att.nbValues do
      begin
       s:= s+HTML_TABLE_COLOR_DATA_GRAY;
       //la coordonne
       s:= s+format('<td align=right>%.3f</td>',[cosValues^[numDim+k]^[j]]);
       //passage  la ligne suivante
       s:= s+'</tr>';
      end;
     s:= s+'</table>';
     //passage  l'axe suivant
     s:= s+'</td>';
    end;

   //contribution pour chaque axe
   for j:= 1 to nbAxis do
    begin
     s:= s+'<td>';
     s:= s+HTML_HEADER_TABLE_RESULT;
     //lister les lments  afficher : adresse -->> [numDim+k,j]
     for k:= 1 to att.nbValues do
      begin
       s:= s+HTML_TABLE_COLOR_DATA_GRAY;
       s:= s+format('<td align=right>%.3f</td>',[contribValues^[numDim+k]^[j]]);
       //passage  la ligne suivante
       s:= s+'</tr>';
      end;
     s:= s+'</table>';
     //passage  l'axe suivant
     s:= s+'</td>';
    end;

   //fin d'une variable
   s:= s+'</tr>';
   //incrmenter le nglobal de ligne dans les matrices
   inc(numDim,att.nbValues);
   bs.AddStr(s);
  end;
 s:= '</table>';
 //dtruire les matrices intermdiaires
 delMatrix(coordValues,FDimAllValues,FNbEigens);
 delMatrix(cosValues,FDimAllValues,FNbEigens);
 delMatrix(contribValues,FDimAllValues,FNbEigens);
 
 bs.AddStr(s);
 //renvoyer la sauce
 bs.EndUpdate();
 result:= bs.BufS;
 bs.Free;
end;

procedure TOpFactMCA.SetProjections;
var coef,v: double;
    i,j,k,j_cumul,j_dim: integer;
    s: double;
    nbAxis,attSize: integer;
    newAtt,att: TAttribute;
begin
 coef:= 1.0*sqrt(1.0*workdata.Examples.Size/(1.0*workdata.LstAtts[asInput].Count));
 if ((PrmOp as TOpPrmFactMCA).NbAxis<0)
  then nbAxis:= FNbEigens
  else nbAxis:= Min(FNbEigens,(PrmOp as TOpPrmFactMCA).NbAxis);
 attSize:= workdata.LstAtts[asInput].Size;
 GenAtts.Clear;
 //pour chaque axe
 for k:= 1 to nbAxis do
  begin
   newAtt:= TAttContinue.Create(format('MCA_%d_Axis_%d',[(MLOwner as TMLCompFactMCA).Number,k]),attSize);
   //pour chaque individu
   for i:= 1 to attSize do
    begin
     s:= 0.0;
     //l'objectif toujours est de ne pas construire explicitement de tableau disjonctif complet
     j_cumul:= 0;
     for j:= 1 to workdata.LstAtts[asInput].Count do
      begin
       att:= workdata.LstAtts[asInput].Attribute[pred(j)];
       j_dim:= j_cumul+att.dValue[i];//d'o l'intrt d'avoir des modalits numrots 1,2,3...
       v:= FEigVecs^[j_dim]^[k]/sqrt(FVecDistValues^[j_dim]);
       s:= s+v;
       inc(j_cumul,att.nbValues);
      end;
     s:= coef*s;
     newAtt.cValue[i]:= s;
    end;
   GenAtts.Add(newAtt);
  end;
end;

initialization
 RegisterClass(TMLGenCompFactMCA);
end.
