(********************************************************************)
(* UCalcStatDesCrossTab.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(********************************************************************)

{
@abstract(Tableaux croiss, avec les indicateurs stats associs)
@author(Ricco)
@created(12/01/2004)
}
unit UCalcStatDesCrossTab;

interface

USES
        UDatasetDefinition,
        UDatasetImplementation,
        UDatasetExamples,
        UCalcStatDes,
        UCalcCrossTab,
        UCalcRndGenerator;

TYPE
        {type de statistique envoye par le tableau crois}
        TEnumTypeStatFromCrossTab = (stCxTschuprow,stCxSU,stCxRandom);

CONST
        STR_STAT_NAME_FROM_CROSS_TAB : array[TEnumTypeStatFromCrossTab] of String = ('Tschuprow','Symmetrical uncertainty','Random ranking');

TYPE
        {classe de calcul sur les tableaux croiss}
        TCalcSDCrossTab = class(TCalcStatDes)
                          private
                          {tableau de comptage}
                          FCrossTab: TCrossTab;
                          {l'attribut supplmentaire}
                          FColAtt: TAttribute;
                          {les statistiques - chi2 et ses drivs}
                          FTschuprow,FCramer,FPhi2,FChi2: double;
                          {proba critique chi2}
                          FPrChi2: double;
                          {degr de libert}
                          Fddl: integer;
                          {Symmetrical uncertainty}
                          FSU: double;
                          {Random value attribution}
                          FRandom: double;
                          {rcuprer l'attribut en ligne}
                          function    GetRowAtt(): TAttribute;
                          {calculer les indicateus statistiques}
                          procedure   CalcStats();
                          procedure   calc_SU();
                          protected
                          procedure   BeginUpdate(); override;
                          procedure   AddValue(prmExample: integer); override;
                          procedure   EndUpdate(); override;
                          public
                          {constructeur}
                          constructor Create(prmR,prmC: TAttribute; prmExamples: TExamples = nil; prmRndGenerator: TRndGenerator = nil);
                          destructor  Destroy(); override;
                          function    getHTMLResult(prmOption: integer = -1): string; override;
                          {renvoyer la statistique demande}
                          function    statisticValue(prmTypeStat: TEnumTypeStatFromCrossTab): double;
                          {renvoyer la p-value de la stat demande}
                          function    pValue(prmTypeStat: TEnumTypeStatFromCrossTab): double;
                          {pointeur sur le tableau crois}
                          property   CrossTab: TCrossTab read FCrossTab;
                          {pointeur sur l'attribut en ligne}
                          property   RowAtt: TAttribute read getRowAtt;
                          {pointeur sur l'attribut en colonne}
                          property   ColAtt: TAttribute read FColAtt;
                          {le t-de Tschuprow}
                          property   Tschuprow: double read FTschuprow;
                          {le v de Cramer}
                          property   Cramer: double read FCramer;
                          {le Phi-2}
                          property   Phi2: double read FPhi2;
                          {le chi-2}
                          property   Chi2: double read FChi2;
                          {la proba associe au test du chi-2}
                          property   ProbaChi2: double read FPrChi2;
                          {Symmetrical uncertainty -- new 31/05/2004}
                          property   SU: double read FSU;
                          {Random value attribution}
                          property   Random: double read FRandom; 
                          end;

        {liste de stats tableaux croiss, utile notamment pour les tris}
        TLstCalcStatDesCrossTab = class(TLstCalcStatDes)
                                  public
                                  {reconstruction de la liste de stat des}
                                  procedure   RebuildStatDes(prmLstAtt: TLstAttributes; prmExamples: TExamples); override;
                                  {comparaison pour les stats conditionnelles}
                                  procedure SortStats(); override;
                                  {description HTML des rsultats}
                                  function    getHeaderHTML(): string; override;
                                  end;
        

implementation

USES
        Sysutils, Classes,
        MATH, FMath, UConstConfiguration;

{ TCalcSDCrossTab }

procedure TCalcSDCrossTab.AddValue(prmExample: integer);
begin
 FCrossTab.AddValue(prmExample);
 inc(FNbExamples);
end;

procedure TCalcSDCrossTab.BeginUpdate;
begin
 FCrossTab.ReInitialization();
 FNbExamples:= 0;
end;

procedure TCalcSDCrossTab.CalcStats;
var i,j: integer;
    d2,v: double;
begin
 Fddl:= pred(RowAtt.nbValues)*pred(ColAtt.nbValues);
 d2:= 0.0;
 for i:= 1 to CrossTab.RowCount do
  begin
   for j:= 1 to CrossTab.ColCount do
    begin
     v:= 1.0*CrossTab.Value[i,0]*CrossTab.Value[0,j]/(1.0*CrossTab.Value[0,0]);
     if (v>0)
      then d2:= d2+1.0*SQR(1.0*CrossTab.Value[i,j]-v)/v;
    end;
  end;
 //rcuprer les infos
 FChi2:= d2;
 FPhi2:= d2/(1.0*CrossTab.Value[0,0]);
 FTschuprow:= 0.0;
 if (Fddl>0)
  then FTschuprow:= sqrt(d2/(1.0*CrossTab.Value[0,0]*sqrt(1.0*Fddl)));
 FCramer:= 0.0;
 if (MIN(pred(RowAtt.nbValues),pred(ColAtt.nbValues))>0)
  then FCramer:= sqrt(d2/(1.0*CrossTab.Value[0,0]*MIN(pred(RowAtt.nbValues),pred(ColAtt.nbValues))));
 FPrChi2:= 1.0;
 if (Fddl>0)
  then FPrChi2:= PKHI2(Fddl,FChi2);
 //nouveaux calculs
 self.calc_SU();
end;

procedure TCalcSDCrossTab.calc_SU();
var i,j: integer;
    S,sX,sY,sXY,value: double;
begin
 //>>> hypothse de base ici, le tableau ne peut pas tre vide
 if (CrossTab.Value[0,0]=0)
  then FSU:= 0
  else
   begin
     //marge X (colonne)
     sX:= 0.0;
     for j:= 1 to CrossTab.ColCount do
      begin
       value:= CrossTab.Value[0,j]/CrossTab.Value[0,0];
       if (value>0)
        then sX:= sX-value*log2(value);
      end;
     //marge Y
     sY:= 0.0;
     for i:= 1 to CrossTab.RowCount do
      begin
       value:= CrossTab.Value[i,0]/CrossTab.Value[0,0];
       if (value>0)
        then sY:= sY-value*log2(value);
      end;
     //croise
     sXY:= 0.0;
     for i:= 1 to CrossTab.RowCount do
      begin
       for j:= 1 to CrossTab.ColCount do
        begin
         value:= CrossTab.FullFreq[i,j];
         if (value>0) then sXY:= sXY-value*log2(value);
        end;
      end;
     //corrlation SU (Symmetrical uncertainty)
     if ((sX+sY)>0)
      then
       begin
        S:= 2.0*(sX+sY-sXY)/(sX+sY);
        FSU:= S;
        //TraceLog.WriteToLogFile(format('SU --> %.6f',[SU]));
       end
      else FSU:= 0.0;
   end;
end;

constructor TCalcSDCrossTab.Create(prmR, prmC: TAttribute;
  prmExamples: TExamples = nil; prmRndGenerator: TRndGenerator = nil);
begin
 FColAtt:= prmC;
 FCrossTab:= TCrossTab.create(prmR,prmC);
 if assigned(prmRndGenerator)
  then FRandom:= prmRndGenerator.RanMar()
  else FRandom:= random;//de toute manire on ne s'en sert pas dans ce cas...
 inherited Create(prmR,prmExamples);
end;

destructor TCalcSDCrossTab.Destroy;
begin
 FCrossTab.Free();
 inherited Destroy();
end;

procedure TCalcSDCrossTab.EndUpdate;
begin
 CalcStats();
end;

function TCalcSDCrossTab.getHTMLResult(prmOption: integer): string;
var s: string;
begin
 s:= format('<TD>%s</TD><TD>%s</TD>',[self.RowAtt.Name,self.ColAtt.Name]);
 //les indicateurs stats
 s:= s+'<TD valign=top>'+HTML_HEADER_TABLE_RESULT+HTML_TABLE_COLOR_HEADER_BLUE+
       '<TH width=80>Stat</TH><TH width=60>Value</TH></TR>';
 //t de tschuprow
 s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<TD>Tschuprow''s t</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD></TR>',[self.Tschuprow]);
 //v de Cramer
 s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<TD>Cramer''s v</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD></TR>',[self.Cramer]);
 //phi-2
 s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<TD>Phi</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD></TR>',[self.Phi2]);
 //chi-2
 s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<TD>Chi</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD></TR>',[self.Chi2]);
 //proba
 s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<TD>Pr(Chi)</TD><TD align=right>'+STR_FORMAT_VIEW_STAT_ACCURACY_HIGH+'</TD></TR>',[self.ProbaChi2]);
 //fin des indicateurs
 s:= s+'</table></TD>';

 //si l'option est non utilise, on envoie la sauce standard
 if (prmOption>=0)
  then s:= s+'<TD valign=top>'+self.CrossTab.getHTMLResult(prmOption,'')+'</TD>';

 result:= s;
end;

function TCalcSDCrossTab.GetRowAtt: TAttribute;
begin
 result:= Attribute;
end;

function TCalcSDCrossTab.pValue(
  prmTypeStat: TEnumTypeStatFromCrossTab): double;
begin
 //toujours celui du chi-2 pour l'instant
 result:= self.ProbaChi2;
end;

function TCalcSDCrossTab.statisticValue(
  prmTypeStat: TEnumTypeStatFromCrossTab): double;
begin
 case prmTypeStat of
  stCxSU: result:= self.SU;
  stCxRandom: result:= self.Random
  else result:= self.Tschuprow;
 end;
end;

{ TLstCalcStatDesCrossTab }

function TLstCalcStatDesCrossTab.getHeaderHTML: string;
begin
 result:=  HTML_TABLE_COLOR_HEADER_GRAY+
           '<TH>Row (Y)</TH><TH>Column (X)</TH><TH>Statistical indicator</TH><TH>Cross-tab</TH>';
end;

procedure TLstCalcStatDesCrossTab.RebuildStatDes(prmLstAtt: TLstAttributes;
  prmExamples: TExamples);
var stat: TCalcSDCrossTab;
    i,j: integer;
    attR,attC: TAttribute;    
begin
 //pour chaque attribut de target
 for i:= 0 to prmLstAtt.Count-2 do
  begin
   attR:= prmLstAtt.Attribute[i];
   if attR.isCategory(caDiscrete)
    then
     begin
      //pour chaque attribut de input
      for j:= succ(i) to pred(prmLstAtt.Count) do
       begin
        attC:= prmLstAtt.Attribute[j];
        if attC.isCategory(caDiscrete) and (attR<>attC)
         then
          begin
           stat:= TCalcSDCrossTab.Create(attR,attC,prmExamples);
           self.AddStat(stat);
          end;
       end;
     end;
  end;
end;

{trier selon le nom des attributs en ligne}
function ListSortCompareRowAttName(item1,item2: pointer): integer;
var st1,st2: TCalcSDCrossTab;
begin
 st1:= TCalcSDCrossTab(item1);
 st2:= TCalcSDCrossTab(item2);
 if (st1.RowAtt.Name<st2.RowAtt.Name)
  then result:= -1
  else
   if (st1.RowAtt.Name>st2.RowAtt.Name)
    then result:= +1
    else result:= 0;
end;

{trier selon le nom des attributs en colonne}
function ListSortCompareColumnAttName(item1,item2: pointer): integer;
var st1,st2: TCalcSDCrossTab;
begin
 st1:= TCalcSDCrossTab(item1);
 st2:= TCalcSDCrossTab(item2);
 if (st1.ColAtt.Name<st2.ColAtt.Name)
  then result:= -1
  else
   if (st1.ColAtt.Name>st2.ColAtt.Name)
    then result:= +1
    else result:= 0;
end;

{tri descendant sur le t de Tschuprow}
function ListSortCompareTschuprow(item1,item2: pointer): integer;
var st1,st2: TCalcSDCrossTab;
begin
 st1:= TCalcSDCrossTab(item1);
 st2:= TCalcSDCrossTab(item2);
 //tri invers !!!
 if (st1.Tschuprow<st2.Tschuprow)
  then result:= +1
  else
   if (st1.Tschuprow>st2.Tschuprow)
    then result:= -1
    else result:= 0;
end;

{tri descendant sur le SU}
function ListSortCompareSU(item1,item2: pointer): integer;
var st1,st2: TCalcSDCrossTab;
begin
 st1:= TCalcSDCrossTab(item1);
 st2:= TCalcSDCrossTab(item2);
 //tri invers !!!
 if (st1.SU<st2.SU)
  then result:= +1
  else
   if (st1.SU>st2.SU)
    then result:= -1
    else result:= 0;
end;

{tri alatoire pour la slection alatoire}
function ListSortCompareRandom(item1,item2: pointer): integer;
var st1,st2: TCalcSDCrossTab;
begin
 st1:= TCalcSDCrossTab(item1);
 st2:= TCalcSDCrossTab(item2);
 //tri invers !!!
 if (st1.Random<st2.Random)
  then result:= +1
  else
   if (st1.Random>st2.Random)
    then result:= -1
    else result:= 0;
end;


procedure TLstCalcStatDesCrossTab.SortStats;
var funcCompare: TListSortCompare;
begin
 if (CompareMode>=0)
  then
   begin

    case CompareMode of
     0: funcCompare:= ListSortCompareRowAttName;
     1: funcCompare:= ListSortCompareColumnAttName;
     2: funcCompare:= ListSortCompareTschuprow;
     3: funcCompare:= ListSortCompareSU;
     4: funcCompare:= ListSortCompareRandom;
     else
      funcCompare:= NIL;
    end;

    if assigned(funcCompare)
     then LstStat.Sort(funcCompare);
   end;
end;

end.
