(***********************************************************************)
(* UCalcMatrixToAttributes.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(***********************************************************************)

{
@abstract(Unit trs importante, effectue la connexion matrice et liste d'attributs)
@author(Ricco)
@created(12/01/2004)
Les optimisations doivent tre coriaces ici, la rapidit des calculs en dpend fortement.
}
unit UCalcMatrixToAttributes;

interface

uses
        UDatasetExamples,
        UDatasetDefinition,
        UCalcStatDes,
        UCalcStatDesCrossTab,
        Matrices;

TYPE
        {le type de transformation  utiliser pour la construction de matrice de variance covariance, i.e. la matrice X'X trs demande}
        TEnumTypeCVCNormalisation = (vcvNormNone,vcvNormCentered,vcvNormNormalized);


{calcul d'une matrice de variance covariance  partir d'une liste d'attributs, et les stats associes}
function BuildMatVCV(prmExamples: TExamples; prmLstAtt: TLstAttributes;
                     prmVcvNorm: TEnumTypeCVCNormalisation;
                     prmStats: TLstCalcStatDesContinuous = nil): PMatrix;

{calcul d'une matrice tableau de BURT pour l'ACM, en gros la matrice X'X}
function BuildBURTMat(dimMat: integer; prmExamples: TExamples; prmLstAtt: TLstAttributes;
                      prmUnivStats: TLstCalcStatDesDiscrete;
                      prmBiStats: TLstCalcStatDesCrossTab): PMatrix;

{calcul de la matrice de donnes  diagonaliser pour l'ACM, elle part de la matrice de BURT}
function BuildACMMat(dimMat,nbAtt,nbEx: integer; BurtMat: PMatrix): PMatrix;

implementation

uses    SysUtils,
        FMath, ULogFile;


function BuildMatVCV(prmExamples: TExamples; prmLstAtt: TLstAttributes;
                     prmVcvNorm: TEnumTypeCVCNormalisation;
                     prmStats: TLstCalcStatDesContinuous = nil): PMatrix;
var stats: TLstCalcStatDesContinuous;
    ownerstats: boolean;
    Mat: PMatrix;
    n: integer;
    i,j,k: integer;
    attI,attJ: TAttribute;
    s: float;
    mI,mJ,sI,sJ: float;
    example: integer;
    sqrt_exSize: double;
begin
 //stat descriptives ventuellement  calculer
 if assigned(prmStats)
  then
   begin
    stats:= prmStats;
    ownerstats:= FALSE;
   end
  else
   begin
    stats:= TLstCalcStatDesContinuous.Create(prmLstAtt,prmExamples);
    ownerstats:= TRUE;
   end;
 //la matrice  construire
 n:= prmLstAtt.Count;
 DimMatrix(Mat,n,n);
 sqrt_exSize:= sqrt(1.0*prmExamples.Size);
 //pour chaque variable
 for i:= 1 to prmLstAtt.Count do
  begin
   attI:= prmLstAtt.Attribute[pred(i)];
   //gestion de la nomalisation
   mI:= 0.0;
   sI:= 1.0;
   case prmVcvNorm of
    vcvNormCentered:
     begin
      mI:= TCalcStatDesContinuous(stats.Stat(pred(i))).Average;
      sI:= 1.0;
     end;
    vcvNormNormalized:
     begin
      mI:= TCalcStatDesContinuous(stats.Stat(pred(i))).Average;
      sI:= TCalcStatDesContinuous(stats.Stat(pred(i))).StdDev;
     end;
   end;
   sI:= sI*sqrt_exSize;
   //dbut  i car la matrice est symtrique de toute manire
   for j:= i to prmLstAtt.Count do
    begin
     attJ:= prmLstAtt.Attribute[pred(j)];
     //gestion de la normalisation
     mJ:= 0.0;
     sJ:= 1.0;
     case prmVcvNorm of
      vcvNormCentered:
       begin
        mJ:= TCalcStatDesContinuous(stats.Stat(pred(j))).Average;
        sJ:= 1.0;
       end;
      vcvNormNormalized:
       begin
        mJ:= TCalcStatDesContinuous(stats.Stat(pred(j))).Average;
        sJ:= TCalcStatDesContinuous(stats.Stat(pred(j))).StdDev;
       end;
     end;
     sJ:= sJ*sqrt_exSize;
     //enquiller le tout
     s:= 0.0;
     for k:= 1 to prmExamples.Size do
      begin
       example:= prmExamples.Number[k];
       s:= s+(attI.cValue[example]-mI)/sI*(attJ.cValue[example]-mJ)/sJ
      end;
     Mat^[i]^[j]:= s;
     Mat^[j]^[i]:= s;
    end;
  end;
 //fin
 if ownerstats
  then stats.Free;
 //puis
 RESULT:= Mat;
end;

function BuildBURTMat(dimMat: integer; prmExamples: TExamples; prmLstAtt: TLstAttributes;
                      prmUnivStats: TLstCalcStatDesDiscrete;
                      prmBiStats: TLstCalcStatDesCrossTab): PMatrix;
var Burt: PMatrix;
    i,j,k: integer;//les variables  croiser
    i_cumul,j_cumul: integer;//le positionnement global dans le tableau
    ctab_Cumul: integer;//n du tableau crois cumul  analyser
    i_k,j_k: integer;//les petits tableaux
    statU: TCalcStatDesDiscrete;
    statBi: TCalcSDCrossTab;
    attI,attJ: TAttribute;
    v: float;
begin
 DimMatrix(Burt,dimMat,dimMat);
 (*
 if not(assigned(Burt))
  then Raise Exception.Create('Pbm rservation tableau de BURT');
 *)
 //pour chaque attribut
 i_cumul:= 0;
 ctab_cumul:= 0;
 for i:= 1 to prmLstAtt.Count do
  begin
   attI:= prmLstAtt.Attribute[pred(i)];
   //dcalage sur j
   j_cumul:= 0;
   for k:= 1 to pred(i) do
    j_cumul:= j_cumul+prmLstAtt.Attribute[pred(k)].nbValues;
   //on commence  i puisque le tableau est symtrique par blocs
   for j:= i to prmLstAtt.Count do
    begin
     attJ:= prmLstAtt.Attribute[pred(j)];
     if (i=j)
      then
       begin
        //on croise la variable avec elle-mme
        statU:= TCalcStatDesDiscrete(prmUnivStats.Stat(pred(j)));
        for i_k:= 1 to attI.NbValues do
         for j_k:= 1 to attJ.NbValues do
          begin
           v:= 0.0;
           if (i_k=j_k)
            then v:= statU.TabFreq.Value[i_k];
           TRY
           //TraceLog.WriteToLogFile(format('BuildBURTMat, (%d) row and (%d) column asked for %d dimension available',[i_cumul+i_k,j_cumul+j_k,dimMat]));
           Burt^[i_cumul+i_k]^[j_cumul+j_k]:= v;
           EXCEPT
           TraceLog.WriteToLogFile(format('BuildBURTMat, (%d) row and (%d) column asked for %d dimension available',[i_cumul+i_k,j_cumul+j_k,dimMat]));
           END;
          end;
       end
      else
       begin
        //on utilise le croisement de 2 variables
        statBi:= TCalcSDCrossTab(prmBiStats.Stat(ctab_cumul));
        inc(ctab_cumul);
        for i_k:= 1 to statBi.CrossTab.RowCount do
         for j_k:= 1 to statBi.CrossTab.ColCount do
          begin
           TRY
           //TraceLog.WriteToLogFile(format('BuildBURTMat, (%d) row and (%d) column asked for at non-diagonal tab',[i_cumul+i_k,j_cumul+j_k]));
           Burt^[i_cumul+i_k]^[j_cumul+j_k]:= statBi.CrossTab.Value[i_k,j_k];
           //la transpose
           Burt^[j_cumul+j_k]^[i_cumul+i_k]:= statBi.CrossTab.Value[i_k,j_k];
           EXCEPT
           TraceLog.WriteToLogFile(format('BuildBURTMat, (%d) row and (%d) column asked for at non-diagonal tab',[i_cumul+i_k,j_cumul+j_k]));
           END;
          end;
       end;
     inc(j_cumul,attJ.nbValues);
    end;
   inc(i_cumul,attI.NbValues);
  end;
 //puis
 result:= Burt;
end;

function BuildACMMat(dimMat,nbAtt,nbEx: integer; BurtMat: PMatrix): PMatrix;
var s: float;
    i,j: integer;
    MatToDiag: PMatrix;
    VecMarge: PVector;
begin
 //formule DIDAY, p.260
 DimMatrix(MatToDiag,dimMat,dimMat);
 DimVector(VecMarge,dimMat);
 //calculer la marge
 for i:= 1 to dimMat do
  begin
   s:= 0.0;
   for j:= 1 to dimMat do
    s:= s+BurtMat^[i]^[j];
   VecMarge^[i]:= s;
  end;
 //pour chaque lment de la matrice
 for i:= 1 to dimMat do
  for j:= 1 to dimMat do
   MatToDiag^[i]^[j]:= BurtMat^[i]^[j]/sqrt(VecMarge^[i]*VecMarge^[j]);
 delVector(VecMarge,dimMat);
 result:= MatToDiag;
end;


end.
