(**************************************************************************)
(* UCalcStatDesContinuousPlus.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(**************************************************************************)

{
@abstract(Unit de dfinition de l'oprateur de statistique descriptive -- Calcul d'indicateurs supplmentaires)
@author(Ricco)
@created(05/07/2004)

}

unit UCalcStatDesContinuousPlus;

interface

USES
        UCalcStatDes,
        UDatasetExamples,
        UDatasetDefinition;

CONST
        {peut tre pourrait les envisager comme des paramtres de l'algo ?}
        SDCP_NB_INTV_HISTO = 10; //nombre d'intervalles pour le calcul des histogrammes
        SDCP_NB_QUANTILES  = 4; //nombre de quantiles choisis

TYPE
        {tableau des bornes d'intervalles}
        TTabBornesSDCP = array of double;

        {tableau des effectifs dans chaque intervalle}
        TTabEffectifSDCP = array of integer;

        {tableau des quantiles}
        TTabQuantilesSDCP = array of double;

        {classe de calcul, attention le calcul incrmental n'est plus fonctionnel}
        TCalcStatDesContPlus = class(TCalcStatDesContinuous)
                               private
                               {asymtrie et son cart-type}
                               FSkewness, FStdSkewness: double;
                               {applatissement et son ecart-type}
                               FKurtosis, FStdKurtosis: double;
                               {Lambda de Bera-Jarcque -- Les tests de normalit sont reports dans l'autre composant}
                               //FLambda: double;
                               {proba du lambda}
                               //FProbaLambda: double;
                               {tendue}
                               FWidth: double;
                               {ecart moyen arithmtique}
                               FEcma: double;
                               {Rapport ema/stddev}
                               FEcma_stddev: double;
                               {mdiane}
                               FMedian: double;
                               {intervalle inter-quartile}
                               FInterQuartileRange: double;
                               {quantiles}
                               FTabQuantiles: TTabQuantilesSDCP;
                               {bornes histo}
                               FTabBornesHisto: TTabBornesSDCP;
                               {effectifs histo}
                               FTabEffectifsHisto: TTabEffectifSDCP;
                               {calculer les infos en O(n)}
                               procedure   Compute_O_n(prmExamples: TExamples);
                               {calculer les infos en O(n x log(n))}
                               procedure   Compute_O_n_log_n(prmExamples: TExamples);
                               {rcuprer le quantile  partir d'une liste trie}
                               function    getQuantile(prc: double; lst: TExamples): double;
                               {construire l'histogramme de frquences en profitant des donnes tries}
                               procedure   buildHistogram(lst: TExamples);
                               protected
                               procedure ConnectAtt(prmAtt: TAttribute); override;
                               public
                               constructor Create(prmAtt: TAttribute; prmExamples: TExamples = nil);
                               destructor  Destroy(); override;
                               procedure   BeginUpdate(); override;
                               procedure   RefreshStat(prmExamples: TExamples); override;
                               function    getHTMLResult(prmOption: integer = -1): string; override;
                               property    Median: double read FMedian;
                               end;

        TLstCalcStatDesContinuousPlus = class(TLstCalcStatDesContinuous)
                                        protected
                                        {ajouter une stat des dans la liste}
                                        procedure   AddStatFromAtt(prmAtt: TAttribute); override;
                                        public
                                        function    getHeaderHTML(): string; override;
                                        end;

implementation

uses
        Sysutils, UConstConfiguration, FMath, ULogFile, MATH;

{ TCalcStatDesContPlus }

procedure TCalcStatDesContPlus.BeginUpdate;
begin
 inherited BeginUpdate();
 FSkewness:= 0.0;
 FKurtosis:= 0.0;
end;

procedure TCalcStatDesContPlus.buildHistogram(lst: TExamples);
var range,seuil: double;
    h,id,n: integer;
begin
 range:= (self.Max-self.Min)/(1.0*SDCP_NB_INTV_HISTO);
 id:= 1;
 for h:= 1 to pred(SDCP_NB_INTV_HISTO) do
  begin
   //borne
   seuil:= self.Min+1.0*h*range;
   FTabBornesHisto[h]:= seuil;
   //compter les lments avant seuil
   n:= 0;
   while (self.Attribute.cValue[lst.Number[id]]<seuil) do
    begin
     inc(n);
     inc(id);
    end;
   //rcuprer le nombre d'lments
   FTabEffectifsHisto[h]:= n;
  end;
 //pour le dernier
 FTabEffectifsHisto[SDCP_NB_INTV_HISTO]:= succ(lst.Size-id);
end;

procedure TCalcStatDesContPlus.Compute_O_n(prmExamples: TExamples);
var i: integer;
    power_value,value,average,stddev,n: double;
begin
 FSkewness:= 0.0;
 FKurtosis:= 0.0;
 FEcma:= 0.0;
 //var temp. pour calculs rapide
 average:= self.Average;
 n:= 1.0*prmExamples.Size;
 stddev:= self.StdDev;
 //go...
 for i:= 1 to prmExamples.Size do
  begin
   value:= (self.Attribute.cValue[prmExamples.Number[i]]-average);
   FEcma:= FEcma+abs(value);
   value:= value/stddev;
   power_value:= value*value*value;
   FSkewness:= FSkewness+power_value;
   power_value:= power_value*value;
   FKurtosis:= FKurtosis+power_value;
  end;
 //ecart-moyen
 FEcma:= FEcma/n;
 if (stddev>0)
  then FEcma_stddev:= FEcma/stddev
  else FEcma_stddev:= 0.0;
 //finalement -- formules assez abracadabrantesques du Skewness et du Kurtosis tire d'EXCEL et cohrents avec STATISTICA !!! -- 05/07/2004
 //new -- 30/07/2005 -- ce sont les Skewness et Kurtosis  la sauce FISHER ( ne pas confondre avec ceux de PEARSON) - cf.http://calamar.univ-ag.fr/uag/staps/cours/stat/stat.htm#zg1 
 FSkewness:= n/((n-1.0)*(n-2.0))*FSkewness;
 FKurtosis:= n*(n+1.0)/((n-1.0)*(n-2.0)*(n-3.0))*FKurtosis-3.0*(n-1.0)*(n-1.0)/((n-2.0)*(n-3.0));
 //calcul des cart-types -- cohrents avec STATISTICA
 FStdSkewness:= SQRT( (6.0 * n * (n -1.0)) / ((n - 2.0)* (n + 1.0) * (n + 3.0)) );
 FStdKurtosis:= SQRT( (4.0 * (power(n,2.0) - 1.0) * power(FStdSkewness,2.0)) / ((n - 3.0) * (n + 5.0)) );
 //et. on en profite
 FWidth:= self.Max-self.Min;
end;

procedure TCalcStatDesContPlus.Compute_O_n_log_n(prmExamples: TExamples);
var tmpEx: TExamples;
    qt: integer;
begin
 //trier les individus
 tmpEx:= TExamples.Create(prmExamples.Size);
 tmpEx.Copy(prmExamples);
 tmpEx.QuickSortBy(self.Attribute);
 //la mdiane
 FMedian:= getQuantile(0.5,tmpEx);
 //rcuprer les quantiles
 for qt:= 1 to pred(SDCP_NB_QUANTILES) do
  FTabQuantiles[qt]:= getQuantile((1.0*qt)/(1.0*SDCP_NB_QUANTILES),tmpEx);
 //intervalle interquartile
 FInterQuartileRange:= FTabQuantiles[3]-FTabQuantiles[1];
 //construire l'histogramme des frquences
 if (self.Max>self.Min)
  then buildHistogram(tmpEx);
 //librer
 tmpEx.Free();
end;

procedure TCalcStatDesContPlus.ConnectAtt(prmAtt: TAttribute);
begin
 inherited ConnectAtt(prmAtt);
 //tableaux...
 //les quantiles
 SetLength(FTabQuantiles,succ(SDCP_NB_QUANTILES));
 //les histogrammes
 SetLength(FTabBornesHisto,succ(SDCP_NB_INTV_HISTO));
 SetLength(FTabEffectifsHisto,succ(SDCP_NB_INTV_HISTO));
end;

constructor TCalcStatDesContPlus.Create(prmAtt: TAttribute;
  prmExamples: TExamples);
begin
 //nil d'abord pour prparer les tableaux intermdiaires
 inherited Create(prmAtt,NIL);
 //si demande de calcul formule
 if assigned(prmExamples)
  then self.RefreshStat(prmExamples);
end;

destructor TCalcStatDesContPlus.Destroy;
begin
 //il y a un compteur de rfrences certes mais bon...
 SetLength(FTabQuantiles,0);
 SetLength(FTabBornesHisto,0);
 SetLength(FTabEffectifsHisto,0);
 //suite std...
 inherited Destroy();
end;

function TCalcStatDesContPlus.getHTMLResult(prmOption: integer): string;
var s: string;
    j: integer;
begin
 //nom de l'attribut
 s:= format('<TD>%s</TD>',[self.Attribute.name]);
 //statistiques descriptives
 s:= s+format('<td %s valign="top">',[HTML_BGCOLOR_DATA_BLUE]);
  s:= s+'<TABLE class="BodyStyle" cellspacing=2 cellpadding=2 width="100%">';
  s:= s+HTML_TABLE_COLOR_HEADER_BLUE+'<th colspan="2">Statistics</th></tr>';
  //param. de positionnement
  s:= s+format('<tr %s><td width="200">Average</td><td align="right" width="200">%.4f</td></tr>',[HTML_BGCOLOR_HEADER_GRAY,Average]);
  s:= s+format('<tr %s><td>Median</td><td align="right">%.4f</td></tr>',[HTML_BGCOLOR_HEADER_GRAY,FMedian]);
  //param. d'chelle
  s:= s+format('<tr %s><td>Std dev. [Coef of variation]</td><td align="right">%.4f [%.4f]</td></tr>',[HTML_BGCOLOR_DATA_GRAY,StdDev,CoefVar]);
  s:= s+format('<tr %s><td>MAD [MAD/STDDEV]</td><td align="right">%.4f [%.4f]</td></tr>',[HTML_BGCOLOR_DATA_GRAY,FEcma,FEcma_Stddev]);
  s:= s+format('<tr %s><td>Min * Max [Full range]</td><td align="right">%.2f * %.2f [%.2f]</td></tr>',[HTML_BGCOLOR_DATA_GRAY,Min,Max,FWidth]);
  s:= s+format('<tr %s><td>1st * 3rd quartile [Range]</td><td align="right">%.2f * %.2f [%.2f]</td></tr>',[HTML_BGCOLOR_DATA_GRAY,FTabQuantiles[1],FTabQuantiles[3],FInterQuartileRange]);
  //param. de distribution
  s:= s+format('<tr %s><td>Skewness (std-dev)</td><td align="right">%.4f (%.4f)</td></tr>',[HTML_BGCOLOR_HEADER_GRAY,FSkewness,FStdSkewness]);
  s:= s+format('<tr %s><td>Kurtosis (std-dev)</td><td align="right">%.4f (%.4f)</td></tr>',[HTML_BGCOLOR_HEADER_GRAY,FKurtosis,FStdKurtosis]);
  s:= s+'</table>';
 s:= s+'</td>';
 //histogramme
 s:= s+'<td valign="top">';
  s:= s+HTML_HEADER_TABLE_RESULT+HTML_TABLE_COLOR_HEADER_BLUE+
        '<th width="150">Values</th><th width="50">Count</th><th width="50">Percent</th><th width="180">Histogram</th></tr>';
  //le premier
  s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<td>x_<_%.4f</td><td align="right">%d</td><td align="right">%.2f%s</td><td>%s</td></tr>',
               [FTabBornesHisto[1],FTabEffectifsHisto[1],100.0*FTabEffectifsHisto[1]/(1.0*self.NbExamples),'%',getHTMLHistogram(trunc(50.0*FTabEffectifsHisto[1]/(1.0*self.NbExamples)))]);
  //les autres
  for j:= 2 to pred(SDCP_NB_INTV_HISTO) do
   s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<td>%.4f_=<_x_<_%.4f</td><td align="right">%d</td><td align="right">%.2f%s</td><td>%s</td></tr>',
                [FTabBornesHisto[pred(j)],FTabBornesHisto[j],FTabEffectifsHisto[j],100.0*FTabEffectifsHisto[j]/(1.0*self.NbExamples),'%',getHTMLHistogram(trunc(50.0*FTabEffectifsHisto[j]/(1.0*self.NbExamples)))]);
  //le dernier
  s:= s+format(HTML_TABLE_COLOR_DATA_BLUE+'<td>x>=_%.4f</td><td align="right">%d</td><td align="right">%.2f%s</td><td>%s</td></tr>',
               [FTabBornesHisto[pred(SDCP_NB_INTV_HISTO)],FTabEffectifsHisto[SDCP_NB_INTV_HISTO],100.0*FTabEffectifsHisto[SDCP_NB_INTV_HISTO]/(1.0*self.NbExamples),'%',getHTMLHistogram(trunc(50.0*FTabEffectifsHisto[SDCP_NB_INTV_HISTO]/(1.0*self.NbExamples)))]);
  s:= s+'</table>';      
 s:= s+'</td>';
 //and then...
 result:= s;
end;

function TCalcStatDesContPlus.getQuantile(prc: double;
  lst: TExamples): double;
var p: integer;
begin
 //position de l'lment  rcuprer
 p:= trunc(prc*lst.Size);
 if (frac(prc*lst.Size)>0)
  then result:= self.Attribute.cValue[lst.Number[succ(p)]]
  else result:= (self.Attribute.cValue[lst.Number[p]]+self.Attribute.cValue[lst.Number[succ(p)]])/2.0;
end;

procedure TCalcStatDesContPlus.RefreshStat(prmExamples: TExamples);
begin
 //calcul des moyennes, min, max, et std-dev
 inherited RefreshStat(prmExamples);
 //les O(n)
 self.Compute_O_n(prmExamples);
 //les O(n log n)
 self.Compute_O_n_log_n(prmExamples);
end;

{ TLstCalcStatDesContinuousPlus }

procedure TLstCalcStatDesContinuousPlus.AddStatFromAtt(prmAtt: TAttribute);
begin
 FLstStat.Add(TCalcStatDesContPlus.Create(prmAtt));
end;

function TLstCalcStatDesContinuousPlus.getHeaderHTML: string;
begin
 result:= HTML_TABLE_COLOR_HEADER_GRAY+'<TH>Attribute</TH><TH width="300">Stats</TH><TH>Histogram</TH></TR>';
end;

end.
