(********************************************************************************)
(* UCalcStatDesGroupCaracterization.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(********************************************************************************)

{
@abstract(Caractrisation de groupes, trs utile pour la classif par exemple)
@author(Ricco)
@created(12/01/2004)
}
unit UCalcStatDesGroupCaracterization;

interface

USES
        Contnrs,
        UCalcStatDes,
        UDatasetDefinition,
        UDatasetExamples;

TYPE
        {croisement groupe et une variable}
        TGroupAttCarac = class
                         private
                         {Attribut  carcatriser}
                         FAttribute: TAttribute;
                         {attribut caractrisant}
                         FCaracAtt: TAttribute;
                         {stat globale de rfrence}
                         FStatGlobal: TCalcStatDes;
                         {la stat locale  comparer}
                         FStatLocal: TCalcStatDes;
                         protected
                         {la valeur test qui servira au classement des attributs caractrisants}
                         FValTest: double;
                         public
                         {passer les paramtres}
                         constructor Create(prmAtt,prmCaracAtt: TAttribute; prmStatGlob, prmStatLoc: TCalcStatDes);
                         {calculer la valeur test}
                         procedure   ComputeValTest(); virtual; abstract;
                         {envoyer une description HTML des rsultats}
                         function    getHTMLResult(): string; virtual; abstract;
                         {description de l'attribut caractrisant}
                         function    descCaracAtt(): string; virtual;
                         {attribut  caractriser}
                         property    Attribute: TAttribute read FAttribute;
                         {attribut caractrisant}
                         property    CaracAttribute: TAttribute read FCaracAtt;
                         {la valeur test  exhiber}
                         property    ValTest: double read FValTest;
                         end;

        {croisement groupe variable continue}
        TGroupAttCaracContinue = class(TGroupAttCarac)
                                 public
                                 procedure   ComputeValTest(); override;
                                 function    getHTMLResult(): string; override;
                                 end;

        {croisement groupe et variable discrte}
        TGroupAttCaracDiscrete = class(TGroupAttCarac)
                                 private
                                 {la modalit de caractrisation}
                                 FValue: TTypeDiscrete;
                                 public
                                 constructor Create(prmAtt,prmCaracAtt: TAttribute; prmValue: TTypeDiscrete; prmStatGlob, prmStatLoc: TCalcStatDes);
                                 procedure   ComputeValTest(); override;
                                 function    getHTMLResult(): string; override;
                                 function    descCaracAtt(): string; override;
                                 end;

        {liste des rsultats pour un groupe}
        TLstGroupAttCarac = class(TObject)
                            private
                            {attribut  caractriser}
                            FAttribute: TAttribute;
                            {modalit de l'attribut  caractriser}
                            FAttValue: TTypeDiscrete;
                            {liste des stats globales de rfrence}
                            FLstStatGlob: TLstCalcStatDes;
                            {liste des stat local}
                            FLstStatLocal: TLstCalcStatDes;
                            {liste associe}
                            FLstCaracterization: TObjectList;
                            {nombre d'individus ayant servi au calcul}
                            FNbExamples: integer;
                            public
                            {initialiser la liste}
                            constructor  Create(prmAtt: TAttribute; prmValue: TTypeDiscrete; prmLstAttCarac: TLstAttributes; prmStatGlob: TLstCalcStatDes);
                            {dtruire les listes}
                            destructor   Destroy; override;
                            {calculer les indicateurs stats}
                            procedure    Refresh(prmExamples: TExamples);
                            {recalculer les indicateurs}
                            procedure    ComputeStats();
                            {envoyer une description HTML des rsultats}
                            function     getHTMLResult(): string;
                            {trier les listes}
                            procedure    SortList(prmOption: integer = -1);
                            end;

        {classe caractrisation}
        TGroupCaracterization = class(TCalcStatDes)
                                private
                                {Liste de var. caractrisantes}
                                FLstAttCarac: TLstAttributes;
                                {stat desciptives globales - liste polymorphe (discrete et continue)}
                                FLstStatGlob: TLstCalcStatDes;
                                {liste des listes des rsultats par groupe}
                                FLstGroupCarac: TObjectList;
                                public
                                {crer les listes}
                                constructor Create(prmAtt: TAttribute; prmLstAttCarac: TLstAttributes);
                                {supprimer les listes}
                                destructor  Destroy; override;
                                {prparer les stats}
                                procedure   RefreshStat(prmExamples: TExamples);override;
                                {envoyer la description des rsultats}
                                function    getHTMLResult(prmOption: integer = -1): string; override;
                                {trier les listes}
                                procedure   SortLists(prmOption: integer);
                                end;

        {caractrisation d'un ensemble de variables catgorielles,
        tri :   -1 -> pas de tri,
                 0 -> le nom de l'attribut caractrisant,
                 1 -> la valeur test,
                 2 -> la valeur absolue de la valeur test}
        TLstGroupCaracterization = class(TLstCalcStatDes)
                                   public
                                   function  getHeaderHTML(): string; override;
                                   procedure SortStats(); override;
                                   end;     

implementation

uses
        Sysutils,Classes,
        UConstConfiguration, UStringAddBuffered;

{ TGroupCaracterization }

procedure TGroupCaracterization.RefreshStat(prmExamples: TExamples);
var i: integer;
    lstExamples: TObjectList;
    ex: TExamples;
    tmp: TLstGroupAttCarac;
begin
 //les stats globales - pralable indispensable
 FLstStatGlob.RefreshStat(prmExamples);
 //crer la liste des individus par groupes
 lstExamples:= prmExamples.DispatchExamples(Attribute);
 //les stats par groupes
 for i:= 0 to pred(FLstGroupCarac.Count) do
  begin
   tmp:= FLstGroupCarac.Items[i] as TLstGroupAttCarac;
   ex:= lstExamples.Items[i] as TExamples;
   tmp.Refresh(ex);
   tmp.ComputeStats();
  end;
 lstExamples.Free;
end;

constructor TGroupCaracterization.Create(prmAtt: TAttribute;
  prmLstAttCarac: TLstAttributes);
var tmp: TLstGroupAttCarac;
    i: integer;
begin
 inherited Create(prmAtt,NIL);
 //la liste des attributs caractrisantes
 FLstAttCarac:= prmLstAttCarac;
 //liste des stats globales
 FLstStatGlob:= TLstCalcStatDes.Create(FLstAttCarac,NIL);
 //liste des rsultats par groupe
 FLstGroupCarac:= TObjectList.Create(TRUE);
 for i:= 1 to Attribute.nbValues do
  begin
   tmp:= TLstGroupAttCarac.Create(Attribute,i,FLstAttCarac,FLstStatGlob);
   FLstGroupCarac.Add(tmp);
  end;
end;

destructor TGroupCaracterization.Destroy;
begin
 FLstStatGlob.Free;
 FLstGroupCarac.Free;
 inherited destroy;
end;

function TGroupCaracterization.getHTMLResult(prmOption: integer = -1): string;
var i: integer;
    lgc: TLstGroupAttCarac;
    bs: TBufString;
    s: string;
begin
 bs:= TBufString.Create;
 bs.BeginUpdate;
 s:= '<td>';
 s:= s+HTML_HEADER_TABLE_RESULT+
     HTML_TABLE_COLOR_DATA_GRAY+
     format('<TH align=center colspan=%d>Description of "%s"</TH></TR>',[FLstGroupCarac.Count,Attribute.Name]);
 bs.AddStr(s);
 s:= '<TR>';
 bs.AddStr(s);
 for i:= 0 to pred(FLstGroupCarac.Count) do
  begin
   lgc:= FLstGroupCarac.Items[i] as TLstGroupAttCarac;
   s:= '<td>'+lgc.getHTMLResult()+'</td>';
   bs.AddStr(s);
  end;
 bs.AddStr('</tr>');
 bs.AddStr('</table></td>');
 bs.EndUpdate;
 result:= bs.BufS;
 bs.Free;
end;

procedure TGroupCaracterization.SortLists(prmOption: integer);
var i: integer;
    lgc: TLstGroupAttCarac;
begin
 //pour chaque groupe  caractriser
 for i:= 0 to pred(FLstGroupCarac.Count) do
  begin
   lgc:= FLstGroupCarac.Items[i] as TLstGroupAttCarac;
   lgc.SortList(prmOption);
  end;
end;

{ TLstGroupAttCarac }

procedure TLstGroupAttCarac.ComputeStats;
var i: integer;
    gc: TGroupAttCarac;     
begin
 for i:= 0 to pred(FLstCaracterization.Count) do
  begin
   gc:= FLstCaracterization.Items[i] as TGroupAttCarac;
   gc.ComputeValTest(); 
  end;
end;

constructor TLstGroupAttCarac.Create(prmAtt: TAttribute;
  prmValue: TTypeDiscrete; prmLstAttCarac: TLstAttributes;
  prmStatGlob: TLstCalcStatDes);
var i: integer;
    j: TTypeDiscrete;    
    stat: TCalcStatDes;
    gc: TGroupAttCarac;
begin
 inherited Create();
 //passage de pointeurs
 FAttribute:= prmAtt;
 FAttValue:= prmValue;
 FLstStatGlob:= prmStatGlob;
 //listes propritaires
 FLstStatLocal:= TLstCalcStatDes.Create(prmLstAttCarac,NIL);
 FLstCaracterization:= TObjectList.Create(TRUE);
 for i:= 0 to pred(FLstStatLocal.Count) do
  begin
   stat:= FLstStatLocal.Stat(i);
   //case stat.Attribute.Category of
   if stat.Attribute.isCategory(caContinue)
    then
     begin
      gc:= TGroupAttCaracContinue.Create(FAttribute,stat.Attribute,FLstStatGlob.Stat(i),stat);
      FLstCaracterization.Add(gc);
     end
    else
     begin
      for j:= 1 to stat.Attribute.nbValues do
       begin
        gc:= TGroupAttCaracDiscrete.Create(FAttribute,stat.Attribute,j,FLstStatGlob.Stat(i),stat);
        FLstCaracterization.Add(gc);
       end;
     end;
  end;
end;

destructor TLstGroupAttCarac.Destroy;
begin
 FLstCaracterization.Free;
 FLstStatLocal.Free;
 inherited;
end;

function TLstGroupAttCarac.getHTMLResult: string;
var s: string;
    bs: TBufString;
    i: integer;
    gc: TGroupAttCarac; 
begin
 bs:= TBufString.Create;
 bs.BeginUpdate;

 //en-tte de colonne
 s:= HTML_HEADER_TABLE_RESULT;
 
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+format('<TH colspan=4>%s=%s</TH>',[FAttribute.Name,FAttribute.LstValues.GetDescription(FAttValue)]);
 s:= s+'</TR>';
 bs.AddStr(s);

 //les exemples dans le groupe
 s:= HTML_TABLE_COLOR_DATA_BLUE;
 s:= s+format('<td colspan=2>Examples</td><td align=right colspan=2>%d</td>',[FNbExamples]);
 s:= s+'</tr>';
 bs.AddStr(s);

 //en-tte des items
 s:= HTML_TABLE_COLOR_HEADER_GRAY;
 s:= s+'<TD>Att - Desc</TD><TD>Test value</TD><TD>Group</TD><TD>Overral</TD></TR>';
 bs.AddStr(s);

 //description par item

 //les variables continues
 s:= HTML_TABLE_COLOR_HEADER_GREEN;
 s:= s+'<td colspan=4>Continuous attributes</td></tr>';
 bs.AddStr(s);
 for i:= 0 to pred(FLstCaracterization.Count) do
  begin
   gc:= FLstCaracterization.Items[i] as TGroupAttCarac;
   if gc.CaracAttribute.isCategory(caContinue)
    then
     begin
       s:= HTML_TABLE_COLOR_DATA_BLUE;
       s:= s+gc.getHTMLResult();
       s:= s+'</tr>';
       bs.AddStr(s);
    end;
  end;

 //les variables discrtes
 s:= HTML_TABLE_COLOR_HEADER_GREEN;
 s:= s+'<td colspan=4>Discrete attributes</td></tr>';
 bs.AddStr(s);
 for i:= 0 to pred(FLstCaracterization.Count) do
  begin
   gc:= FLstCaracterization.Items[i] as TGroupAttCarac;
   if gc.CaracAttribute.isCategory(caDiscrete)
    then
     begin
       s:= HTML_TABLE_COLOR_DATA_BLUE;
       s:= s+gc.getHTMLResult();
       s:= s+'</tr>';
       bs.AddStr(s);
     end;
  end;

 bs.AddStr('</table>');//pour la description du groupe
 
 bs.EndUpdate;
 result:= bs.BufS;
 bs.Free;
end;

procedure TLstGroupAttCarac.Refresh(prmExamples: TExamples);
begin
 FLstStatLocal.RefreshStat(prmExamples);
 FNbExamples:= prmExamples.Size;
end;

{**** fonctions de comparaisons ****}

{trier par le nom}
function ListSortCompareName(item1,item2: pointer): integer;
var st1,st2: TGroupAttCarac;
begin
 st1:= TGroupAttCarac(item1);
 st2:= TGroupAttCarac(item2);
 if (st1.descCaracAtt()<st2.descCaracAtt())
  then result:= -1
  else
   if (st1.descCaracAtt()>st2.descCaracAtt())
    then result:= +1
    else result:= 0;
end;

function ListSortCompareTestValue(item1,item2: pointer): integer;
var st1,st2: TGroupAttCarac;
begin
 st1:= TGroupAttCarac(item1);
 st2:= TGroupAttCarac(item2);
 if (st1.ValTest<st2.ValTest)
  then result:= +1
  else
   if (st1.ValTest>st2.ValTest)
    then result:= -1
    else result:= 0;
end;

function ListSortCompareAbsTestValue(item1,item2: pointer): integer;
var st1,st2: TGroupAttCarac;
begin
 st1:= TGroupAttCarac(item1);
 st2:= TGroupAttCarac(item2);
 if (abs(st1.ValTest)<abs(st2.ValTest))
  then result:= +1
  else
   if (abs(st1.ValTest)>abs(st2.ValTest))
    then result:= -1
    else result:= 0;
end;

{***********************************}

procedure TLstGroupAttCarac.SortList(prmOption: integer);
var funcCompare: TListSortCompare;
begin
 funcCompare:= NIL;
 case prmOption of
  0: funcCompare:= ListSortCompareName;
  1: funcCompare:= ListSortCompareTestValue;
  2: funcCompare:= ListSortCompareAbsTestValue;
 end;

 if assigned(funcCompare)
  then FLstCaracterization.Sort(funcCompare);  
end;

{ TGroupAttCarac }

constructor TGroupAttCarac.Create(prmAtt, prmCaracAtt: TAttribute;
  prmStatGlob, prmStatLoc: TCalcStatDes);
begin
 inherited Create();
 //rien n'est propritaire ici
 FAttribute:= prmAtt;
 FCaracAtt:= prmCaracAtt;
 FStatGlobal:= prmStatGlob;
 FStatLocal:= prmStatLoc;
end;

function TGroupAttCarac.descCaracAtt: string;
begin
 result:= FCaracAtt.Name;
end;

{ TGroupAttCaracDiscrete }

procedure TGroupAttCaracDiscrete.ComputeValTest;
var sg,sl: TCalcStatDesDiscrete;
    n,nk,nj,nkj: double;
    s,e: double;
begin
 sg:= FStatGlobal as TCalcStatDesDiscrete;
 sl:= FStatLocal as TCalcStatDesDiscrete;
 //pp.184 Lebart & Morineau -- approximation normale
 FValTest:= 0.0;
 n:= 1.0*sg.NbExamples;
 nk:= 1.0*sl.NbExamples;
 nj:= 1.0*sg.TabFreq.Value[FValue];
 nkj:= 1.0*sl.TabFreq.Value[FValue];
 if (n>1.0)
  then
   begin
    s:= nk*(n-nk)/(n-1.0)*nj/n*(1-nj/n);
    if (s>0)
     then
      begin
       s:= sqrt(s);
       e:= nk*nj/n;
       FValTest:= (nkj-e)/s;
      end;
   end;
end;

constructor TGroupAttCaracDiscrete.Create(prmAtt, prmCaracAtt: TAttribute;
  prmValue: TTypeDiscrete; prmStatGlob, prmStatLoc: TCalcStatDes);
begin
 inherited Create(prmAtt,prmCaracAtt,prmStatGlob,prmStatLoc);
 FValue:= prmValue;
end;

function TGroupAttCaracDiscrete.descCaracAtt: string;
begin
 result:= inherited descCaracAtt()+'='+FCaracAtt.LstValues.getDescription(FValue);
end;

function TGroupAttCaracDiscrete.getHTMLResult: string;
var s: string;
begin
 s:= format('<TD>%s</TD><TD align=right>%.1f</TD><TD align=right>%.2f%s</TD><TD align=right>%.2f%s</TD>',
       [self.descCaracAtt(),ValTest,100.0*(FStatLocal as TCalcStatDesDiscrete).TabFreq.Frequence[FValue],'%',100.0*(FStatGlobal as TCalcStatDesDiscrete).TabFreq.Frequence[FValue],'%']);
 result:= s;
end;

{ TGroupAttCaracContinue }

procedure TGroupAttCaracContinue.ComputeValTest;
var sg,sl: TCalcStatDesContinuous;
    v,n,nk: double;
begin
 sg:= FStatGlobal as TCalcStatDesContinuous;
 sl:= FStatLocal as TCalcStatDesContinuous;
 //pp.181 Lebart & Morineau
 FValTest:= 0.0;
 n:= 1.0*sg.NbExamples;
 nk:= 1.0*sl.NbExamples;
 if (nk>0) and (n>1.0)
  then
   begin
    v:= (n-nk)/(n-1.0);
    v:= v*sg.Variance/nk;
    if (v>0)
     then FValTest:= (sl.Average-sg.Average)/sqrt(v);
   end;
end;

function TGroupAttCaracContinue.getHTMLResult: string;
var s: string;
begin
 s:= format('<TD>%s</TD><TD align=right>%.1f</TD><TD align=right>%.2f</TD><TD align=right>%.2f</TD>',
       [FCaracAtt.Name,ValTest,(FStatLocal as TCalcStatDesContinuous).Average,(FStatGlobal as TCalcStatDesContinuous).Average]);
 result:= s;
end;

{ TLstGroupCaracterization }

function TLstGroupCaracterization.getHeaderHTML: string;
begin
 result:= '';
end;

procedure TLstGroupCaracterization.SortStats;
var sd: TGroupCaracterization;
    i: integer;
begin
 for i:= 0 to pred(LstStat.Count) do
  begin
   sd:= LstStat.items[i] as TGroupCaracterization;
   sd.SortLists(CompareMode);
  end;
end;

end.
