(*********************************************************************)
(* UCompFCDiscDefinition.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(*********************************************************************)

{
@abstract(Composant de base pour la discrtisation)
@author(Ricco)
@created(19/03/2004)
Composant anctre de la discrtisation, toutes les structures sont prpares,
notamment les nouvelles variables discrtes gnres et le mode de classement d'un
nouvel individu.

Les attributs  discrtiser sont dans "INPUT", selon les mthodes, il se peut que des attributs
dans TARGET aient un rle  jouer.
}
unit UCompFCDiscDefinition;

interface

USES
        Contnrs, Forms,
        UCompDefinition,
        UCompFCDefinition,
        UCompManageDataset,
        UOperatorDefinition,
        UDatasetDefinition,
        UDatasetImplementation;

TYPE
        {gnrateur de composant de base de la discrtisation}
        TGenFCDiscBase = class(TMLGenComp)
                         protected
                         procedure   GenCompInitializations(); override;
                         end;

        {composant de base de la discrtisation}
        TMLFCDiscBase = class(TMLCompFC)
                        protected
                        function    getGenericAttName(): string; virtual; abstract;
                        function    GetLogResultDescription(): string; override;
                        end;

        TSetAttCutPoints = class;

        {oprateur gnrique de discrtisation}
        TOpFCDiscBase = class(TOperatorFC)
                        protected
                        {liste des points de discrtisation}
                        FSetAttCutPoints: TSetAttCutPoints;
                        {new -- 20/04/2006 -- vrifier que la liste modifie - gnre - existe dj,
                        dans ce cas pas besoin de la gnrer de nouveau, c'est utile dans la validation croise, etc.
                        et a a un sens uniquement si le nom de l'attribut gnr est compos  partir de l'attribut source
                        cf. bug recens le 19/04/2006}
                        function concorde_generated_attributes(generated, source: TLstAttributes): boolean;
                        {au dpart, il n'y a que des inputs, et ils sont tous continus}
                        function  CheckAttributes(): boolean; override;
                        {excution}
                        function  CoreExecute(): boolean; override;
                        {prparer la liste des variables  discrtiser}
                        procedure AddAttToDiscretize();
                        {calculer les bornes de discrtisation}
                        procedure BuildCutPoints(); virtual; abstract;
                        {appliquer les bornes de discrtisation et gnrer les nouveaux attributs --  crire une seule fois}
                        procedure GenerateDiscAttributes(); virtual;
                        public
                        constructor Create(AOwner: TObject); override;
                        destructor  Destroy; override;
                        function    getHTMLResultsSummary(): string; override;
                        property    SetAttCutPoints: TSetAttCutPoints read FSetAttCutPoints;
                        end;

        {paramtre gnrique de discrtisation}
        TOpPrmFCDiscBase = class(TOperatorPrmFC)
                           protected
                           function    CreateDlgParameters(): TForm; override;
                           end;

        {ensemble de points de discrtisation}
        TSetCutPoints = array of TTypeContinue;

        {points de discrtisation d'un attribut}
        TAttCutPoints = class(TObject)
                        private
                        {attribut  discrtiser}
                        FAttToDisc: TAttContinue;
                        {liste des points de discrtisation}
                        FCutPoints: TSetCutPoints;
                        {nombre de points actuels}
                        FNbCutPoints: integer;
                        {capacit du tableau de discrtisation}
                        FNbCapacity: integer;
                        {appliquer la discrtisation sur un individu}
                        procedure   applyDisc(var att: TAttDiscrete; ex: integer);
                        public
                        constructor create(prmAtt: TAttribute; prmMinValue: TTypeContinue = -1.0e38);
                        destructor  destroy(); override;
                        procedure   addCutPoint(prmValue: TTypeContinue);
                        procedure   produceDiscretizedAtt(var att:TAttDiscrete; mlcDisc: TMLFCDiscBase); overload;
                        function    produceDiscretizedAtt(mlcDisc: TMLFCDiscBase): TAttDiscrete; overload;
                        property    AttToDisc: TAttContinue read FAttToDisc;
                        property    NbCutPoints: integer read FNbCutPoints;
                        property    CutPoints: TSetCutPoints read FCutPoints;
                        end;

        {gestion de la discrtisation d'un ensemble d'attributs}
        TSetAttCutPoints = class(TObject)
                           private
                           FLstAttCutPoints: TObjectList;
                           public
                           constructor  create();
                           destructor   destroy(); override;
                           procedure    clear();
                           procedure    addAttCutPoints(prmAttCP: TAttCutPoints);
                           function     getAttCutPoints(i: integer): TAttCutPoints;
                           function     getNbAttCutPoints(): integer;
                           end;


implementation


USES
        Windows,
        SysUtils, UConstConfiguration, ULogFile;

CONST
        DEFAULT_NB_CUT_POINTS_CAPACITY = 10;
        DELTA_ADD_CUT_POINTS_CAPACITY  = 5;

{ TGenFCDiscBase }

procedure TGenFCDiscBase.GenCompInitializations;
begin
 FMLComp:= mlcFeatureConstruction;
end;

{ TMLFCDiscBase }

function TMLFCDiscBase.GetLogResultDescription: string;
begin
 result:= format('%d discrete attributes generated...',[LocalDataset.Count]);
end;

{ TOpFCDiscBase }

procedure TOpFCDiscBase.AddAttToDiscretize;
var j: integer;
begin
 FSetAttCutPoints.clear();
 //on remplit de nouveau
 for j:= 0 to pred(workdata.LstAtts[asInput].Count) do
  FSetAttCutPoints.addAttCutPoints(TAttCutPoints.create(workdata.LstAtts[asInput].Attribute[j]));
end;

function TOpFCDiscBase.CheckAttributes: boolean;
begin
 result:= (self.WorkData.LstAtts[asInput].Count>0) and (self.WorkData.LstAtts[asInput].isAllCategory(caContinue));
end;

function TOpFCDiscBase.concorde_generated_attributes(generated,
  source: TLstAttributes): boolean;
var attSource, attDest: TAttribute;
    j: integer;
    ok: boolean;
    p: integer;
begin
 //vrifier si le nombre d'attributs concorde
 ok:= (generated.Count = source.Count);
 //si oui, vrifier que le nom de chaque attribut gnr est compos  partir du nom de l'attribut source correspondant
 if ok
  then
   begin
    for j:= 0 to pred(source.Count) do
     begin
      attDest:= generated.Attribute[j];
      attSource:= source.Attribute[j];
      //vrification par le nom, on sait que le nom de l'attribut discrtis comprend le nom de l'attribut  discrtiser
      p:= pos(attSource.Name,attDest.Name);
      //si pas trouv --> souci
      if (p = 0)
       then
        begin
         ok:= false;
         BREAK;
        end;
     end;
   end;
 result:= ok;
end;

function TOpFCDiscBase.CoreExecute: boolean;
var tps: cardinal;
begin
 result:= TRUE;
 TRY
 self.AddAttToDiscretize();
 //chercher les bornes de discrtisation
 tps:= GetTickCount();
 self.BuildCutPoints();
 tps:= GetTickCount()-tps;
 TraceLog.WriteToLogFile(format('[DISC] %d ms for cut points computation',[tps]));
 //coder le nouvel attribut
 tps:= GetTickCount();
 self.GenerateDiscAttributes();
 tps:= GetTickCount()-tps;
 TraceLog.WriteToLogFile(format('[DISC] %d ms for attribute codification',[tps]));
 EXCEPT
 result:= FALSE;
 END;
end;

constructor TOpFCDiscBase.Create(AOwner: TObject);
begin
 inherited Create(AOwner);
 FSetAttCutPoints:= TSetAttCutPoints.create();
end;

destructor TOpFCDiscBase.Destroy;
begin
 FSetAttCutPoints.Free;
 inherited;
end;

procedure TOpFCDiscBase.GenerateDiscAttributes();
var i: integer;
    att: TAttDiscrete;


begin
 //****************************************************************************************************
 //new -- 19/04/2006 -- souci, attention dans le cas o cette procdure est suivie de la discrtisation
 //le fait de supprimer les attributs gnrs pour les re-crr entrane des erreurs en cascade en validation croise
 //tout simplement parce que les pointeurs sont invalids dans la suite !
 //on va donc procder  un test pour voir si la liste concorde, dans ce cas on r-applique les bornes de discrtisation
 //sans re-crer le pointeur d'attribut qui reste valide pour le reste du diagramme
 //********************************************************************************

 {$B-}
 if not(self.concorde_generated_attributes(GenAtts,workdata.LstAtts[asInput]))
  then
   begin
    //******************
    //ancienne procdure
    //******************

    //vider les projections courantes
    GenAtts.Clear();
    //construire les projections pour chaque attribut dont on dispose des bornes
    for i:= 0 to pred(FSetAttCutPoints.getNbAttCutPoints) do
     begin
      att:= FSetAttCutPoints.getAttCutPoints(i).produceDiscretizedAtt(self.MLOwner as TMLFCDiscBase);
      GenAtts.Add(att);
     end;

   end
  else
   begin
    //***********************************************************************************
    //nouvelle procdure -- 19/04/2006 -- rutilisation des pointeurs d'attributs anciens
    //auxquels on applique la discrtisation
    //***********************************************************************************

    //construire les projections pour chaque attribut dont on dispose des bornes
    for i:= 0 to pred(FSetAttCutPoints.getNbAttCutPoints) do
     begin
      att:= GenAtts.Attribute[i] as TAttDiscrete;
      FSetAttCutPoints.getAttCutPoints(i).produceDiscretizedAtt(att,self.MLOwner as TMLFCDiscBase);
     end;
    
   end;
end;

function TOpFCDiscBase.getHTMLResultsSummary: string;
var sTmp,sCp: string;
    cpAtt: TAttCutPoints;
    i,k: integer;
begin
 //dcrire les donnes
 sTmp:= '<P><H3>Data description</H3>';
 sTmp:= sTmp+HTML_HEADER_TABLE_RESULT;
 sTmp:= sTmp+format('%s<td>Attributes discretized</td><td>%d</td></tr>',[HTML_TABLE_COLOR_DATA_BLUE,FSetAttCutPoints.getNbAttCutPoints]);
 sTmp:= sTmp+format('%s<td>Examples</td><td>%d</td></tr>',[HTML_TABLE_COLOR_DATA_BLUE,self.WorkData.Examples.Size]);
 sTmp:= sTmp+'</table>';
 //dcrire les variables construites et le nombre d'intervalles
 sTmp:= sTmp+'<P><H3>Generated attributes</H3>';
 sTmp:= sTmp+HTML_HEADER_TABLE_RESULT+HTML_TABLE_COLOR_HEADER_GRAY+
                   '<TH>Source</TH><TH>New att</TH><TH>Intervals</TH><TH>Cut points</TH></TR>';
 for i:= 0 to pred(FSetAttCutPoints.getNbAttCutPoints) do
  begin
   sTmp:= sTmp+format('%s<td>%s</td><td>%s</td><td align="right">%d</td>',
                      [HTML_TABLE_COLOR_DATA_GRAY,workdata.LstAtts[asInput].Attribute[i].Name,GenAtts.Attribute[i].Name,GenAtts.Attribute[i].nbValues]);
   cpAtt:= FSetAttCutPoints.getAttCutPoints(i);
   sCp:= '';
   for k:= 1 to cpAtt.NbCutPoints do
    sCp:= sCp+format(' %.4f ;',[cpAtt.CutPoints[k]]);
   if (length(sCp)>0)
    then sCp:= copy(sCp,1,pred(length(sCp)));
   sTmp:= sTmp+format('<td>(%s)</td></tr>',[sCp]);
  end;
 sTmp:= sTmp+'</table>';
 result:= sTmp;
end;

{ TAttCutPoints }

procedure TAttCutPoints.addCutPoint(prmValue: TTypeContinue);
begin
 //largir le tableau des cut-points ?
 if (FNbCutPoints=pred(FNbCapacity))
  then
   begin
    inc(FNbCapacity,DELTA_ADD_CUT_POINTS_CAPACITY);
    setLength(FCutPoints,succ(FNbCapacity));
   end;
 //insrer le nouveau cut-point
 inc(FNbCutPoints);
 FCutPoints[FNbCutPoints]:= prmValue;
end;

procedure TAttCutPoints.applyDisc(var att: TAttDiscrete; ex: integer);
var k: TTypeDiscrete;
    value: TTypeContinue;
begin
  value:= FAttToDisc.cValue[ex];
  //mthode simple -- trs efficace jusqu' 20 modalits -- n/2 oprations en moyenne
  //x < cutpoint -> k, x >= cutpoint -> k+1
  k:= 1;
  while (k<=FNbCutPoints) and (value>=FCutPoints[k]) do inc(k);
  //recherche binaire -- thoriquement en log(n) mais bcp de conditions ici !!!
  //non, trop compliqu et pas tranchant du tout dans ce cadre...
  //affecter
  att.dValue[ex]:= k;
end;

constructor TAttCutPoints.create(prmAtt: TAttribute;
  prmMinValue: TTypeContinue);
begin
 inherited Create();
 FAttToDisc:= prmAtt as TAttContinue;
 setLength(FCutPoints,succ(DEFAULT_NB_CUT_POINTS_CAPACITY));
 FCutPoints[0]:= prmMinValue;
 FNbCutPoints:= 0;
 FNbCapacity:= DEFAULT_NB_CUT_POINTS_CAPACITY;
end;

destructor TAttCutPoints.destroy;
begin
 setLength(FCutPoints,0);
 inherited;
end;

procedure TAttCutPoints.produceDiscretizedAtt(var att: TAttDiscrete; mlcDisc: TMLFCDiscBase);
var k,i: integer;
begin
 if not(assigned(att))
  then att:= TAttDiscrete.Create(format('d_%s_%s_%d',[mlcDisc.getGenericAttName(),FAttToDisc.Name,mlcDisc.Number]),FAttToDisc.Size)
  else att.Size:= FAttToDisc.Size;
 //vider les modalits
 att.LstValues.clear();
 //ajouter les modalits
 if (FNbCutPoints>0)
  then
   begin
    att.LstValues.getValue(format('m_<_%.8f',[FCutPoints[1]]));
    for k:= 2 to FNbCutPoints do
     att.LstValues.getValue(format('%.8f_=<_m_<_%.8f',[FCutPoints[pred(k)],FCutPoints[k]]));
    att.LstValues.getValue(format('m_>=_%.8f',[FCutPoints[FNbCutPoints]]))
   end
  else
   //il n'y a pas de cut points, donc pas de modalits
   begin
    att.LstValues.getValue('_const_');
   end;
 //coder les individus
 if (FNbCutPoints = 0)
  then
   begin
    for i:= 1 to FAttToDisc.Size do
     att.dValue[i]:= 1;
   end
  else
   begin
    for i:= 1 to FAttToDisc.Size do
     self.applyDisc(att,i);
   end;
end;

function TAttCutPoints.produceDiscretizedAtt(mlcDisc: TMLFCDiscBase): TAttDiscrete;
var att: TAttDiscrete;
begin
 att:= NIL;
 self.produceDiscretizedAtt(att,mlcDisc);
 result:= att;
end;

{ TSetAttCutPoints }

procedure TSetAttCutPoints.addAttCutPoints(prmAttCP: TAttCutPoints);
begin
 FLstAttCutPoints.Add(prmAttCP);
end;

procedure TSetAttCutPoints.clear;
begin
 FLstAttCutPoints.Clear();
end;

constructor TSetAttCutPoints.create;
begin
 inherited Create();
 //liste propritaire
 FLstAttCutPoints:= TObjectList.Create(TRUE);
end;

destructor TSetAttCutPoints.destroy;
begin
 FLstAttCutPoints.Free;
 inherited;
end;

function TSetAttCutPoints.getAttCutPoints(i: integer): TAttCutPoints;
begin
 result:= FLstAttCutPoints.Items[i] as TAttCutPoints;
end;

function TSetAttCutPoints.getNbAttCutPoints: integer;
begin
 result:= FLstAttCutPoints.Count;
end;

{ TOpPrmFCDiscBase }

function TOpPrmFCDiscBase.CreateDlgParameters: TForm;
begin
 result:= NIL;
end;

end.
