(****************************************************************)
(* UDatasetExamples.pas - Copyright (c) 2004 Ricco RAKOTOMALALA *)
(****************************************************************)

{
@abstract(Vecteur d'individus - Sera utilis pour discerner les training, les tests et les illustratifs)
@author(Ricco)
@created(12/01/2004)
Gestion d'un vecteur d'individus, tout doit tre accs sur la rapidit, il y a bcp de choses  tirer, cette
unit sera souvent mise  contribution dans les calculs !!!
}
unit UDatasetExamples;

interface

USES
        Contnrs,
        UDatasetDefinition,
        UCalcRndGenerator;

CONST
        {taille mmoire pour mmoriser un numro d'individu}
        SIZE_NUMBER_EXAMPLE = sizeof(integer);

TYPE

        {classe de poids des individus}
        TWeightExamples = class
                          private
                          {taille du tableau}
                          FSize: integer;
                          {tableau de poids}
                          FTabWeight: array of double;
                          {rcuprer un poids}
                          function    getWeight(i: integer): double;
                          {assigner un poids}
                          procedure   setWeight(i: integer; prmValue: double);
                          public
                          constructor Create(prmSize: integer);
                          destructor  Destroy; override;
                          {copier le contenu du tableau}
                          procedure   Copy(prmSource: TWeightExamples);
                          {poids uniforme - 1.0}
                          procedure   Initialize();
                          {normaliser de manire  ce que la somme des poids soit gal  1.0}
                          procedure   Normalize();
                          {construire les poids cumuls, le dernier est  zro}
                          procedure   CalcCumulated();
                          {rechercher l'index d'un individu, attention la liste doit tre cumule pour cela}
                          function    IndexOf(proba: double): integer;
                          {accs aux poids}
                          property    Weight[i: integer]: double read getWeight write setWeight;
                          end;

        {Tableau interne pour la gestion des numros}
        TTabExamples = array[1..MAX_NB_EXAMPLES] of integer;
        {Pointeur sur le tableau}
        PTabExamples = ^TTabExamples;

        {classe gestion d'un vecteur d'individus}
        TExamples = class
                    private
                    {nombre d'individus dans la liste}
                    FSize: Integer;
                    {tableau interne}
                    FTab: PTabExamples;
                    {Nombre d'ajout dans le tableau}
                    FNbAdd: Integer;
                    {modifier la taille du vecteur}
                    procedure SetSize(prmSize: integer);
                    {rcuprer un numro}
                    function  GetNumber(i: Integer): integer;
                    {placer un numro}
                    procedure SetNumber(i: Integer; prmValue: integer);
                    {procdures internes pour le tri  plusieurs critres}
                     procedure FCompare(const LstVar: array of TAttribute; a,b: Integer; Ascending: Boolean; Var RetVal: Integer);
                     procedure qSortHelp(const LstVar: array of TAttribute; pivotP: Integer; nElem: Integer;Const Ascending: Boolean);
                     procedure FSwap(a,b: Integer);
                    public
                    {construire et initialiser le tableau}
                    constructor Create(size: integer);
                    {dtruire le tableau interne}
                    destructor  Destroy; override;
                    {initialisation avec des numros successifs}
                    procedure Initialize();
                    {copier le contenu d'une autre liste}
                    procedure Copy(prmSource: TExamples);
                    {lancer la procdure d'ajout}
                    procedure BeginAdd();
                    {finir la procdure d'ajout}
                    procedure EndAdd();
                    {Ajouter un individu}
                    procedure AddExample(index: Integer);
                    {ajouter une srie d'individus puis trier}
                    procedure AddExamples(prmSource: TExamples);
                    {Echantillonnage simple, sur une proportion, en O(N), renvoie la taille de l'chantillon}
                    function Sampling(prmProportion: single; var prmEx: TExamples; seedMode: TStartSeed = seedRandom): integer; overload;
                    function Sampling(prmSize: integer; var prmEx: TExamples; seedMode: TStartSeed = seedRandom): integer; overload;
                    {chantillonnage reprsentatif - associ  un attribut}
                    function SamplingRepresentative(prmAtt: TAttribute; prmSize: integer; var prmEx: TExamples): integer; overload;
                    function SamplingRepresentative(prmAtt: TAttribute; prmProportion: single; var prmEx: TExamples): integer; overload;
                    {chantillonnage quilibr}
                    function SamplingBalanced(prmAtt: TAttribute; prmSize: integer; var prmEx: TExamples): integer; overload;
                    function SamplingBalanced(prmAtt: TAttribute; prmProportion: single; var prmEx: TExamples): integer; overload;
                    {Echantillonnage sur le mode subdivision app-test}
                    procedure SamplingSplitting(prmProportion: single; train,test: TExamples; seedMode: TStartSeed = seedRandom);
                    {Tirage avec remise}
                    procedure SampleReplicate(var prmExamples: TExamples);
                    procedure SampleReplicateWeighted(var prmExamples: TExamples; prmWeights: TWeightExamples);
                    {tri HeapSort cf. Numerical Recipes}
                    procedure HeapSort();
                    {tri QuickSort cf. Numerical Recipes}
                    procedure QuickSort();
                    {tri selon une variable, adaptation du HeapSort avec un critre, Numerical Recipes}
                    procedure HeapSortBy(prmAtt: TAttribute);
                    {idem mais from QuickSort}
                    procedure QuickSortBy(prmAtt: TAttribute);
                    {trier selon un ensemble d'attributs}
                    Procedure SortByMultipleAttributes(Const LstVar: array of TAttribute; Ascending: Boolean);
                    {dispatcher les individus selon les modalits d'une variable forcment discrte}
                    function  DispatchExamples(prmAttDisp: TAttribute): TObjectList;
                    {modifier alatoire l'ordre des individus en renvoyant une nouvelle liste}
                    function  funcRandomizeExamples(seedMode: TStartSeed): TExamples;
                    {modifier l'ordre de la liste courante -- new -- 11/05/2006 -- on peut contrler la squence des valeurs "seedStart}
                    procedure procRandomizeExamples(seedMode: TStartSeed ; seedStart : integer = 0);
                    {rcuprer le complmentaire par rapport  une taille d'ensemble d'individus donne}
                    function  getComplementaire(prmTaille: integer): TExamples;
                    {remplir avec des valeurs 1,2,3,...}
                    procedure fillValues();
                    {accs aux numros}
                    property  Number[i: Integer]: integer read GetNumber write SetNumber;
                    {accs  la taille du vecteur}
                    property  Size: Integer read FSize write SetSize;
                    end;

implementation

uses
        SysUtils,
        ULogFile;

CONST
        {Taille accroissement du tableau d'exemples pour les ajouts}
        DELTA_ADD_EXAMPLE = 10000;

{ TExamples }

procedure TExamples.AddExample(index: Integer);
begin
 inc(FNbAdd);
 if (FNbAdd>Size)
  then Size:= FNbAdd+DELTA_ADD_EXAMPLE;
 self.FTab^[FNbAdd]:= index;
end;

procedure TExamples.AddExamples(prmSource: TExamples);
var oldSize: integer;
begin
 oldSize:= self.Size;
 self.Size:= oldSize + prmSource.Size;
 move(prmSource.FTab^,self.FTab^[succ(oldSize)],prmSource.Size*SIZE_NUMBER_EXAMPLE);
 //pas de tri qui ralentirait sur les gros fichiers
 //comme il n'y a pas de cache  grer de toute manire pour l'instant
 //self.Sort();
end;

procedure TExamples.BeginAdd;
begin
 FNbAdd:= 0;
end;

procedure TExamples.Copy(prmSource: TExamples);
begin
 if (self.Size<>prmSource.Size)
  then self.Size:= prmSource.Size;
 //copie mmoire brute
 move(prmSource.FTab^,FTab^,self.Size*SIZE_NUMBER_EXAMPLE);
end;

constructor TExamples.Create(size: integer);
begin
 inherited Create();
 FSize:= -1;
 self.SetSize(size);
end;

destructor TExamples.Destroy;
begin
 ReAllocMem(FTab,0);
 inherited Destroy;
end;

function TExamples.DispatchExamples(prmAttDisp: TAttribute): TObjectList;
var lst: TObjectList;
    k,i: integer;
    ex: TExamples;
    example: integer;
begin
 //la liste est propritaire
 lst:= TObjectList.Create(TRUE);
 //pour chaque modalit de la variable
 for k:= 1 to prmAttDisp.nbValues do
  begin
   ex:= TExamples.Create(self.Size);
   ex.BeginAdd();
   lst.Add(ex);
  end;
 //puis lister maintenant
 for i:= 1 to self.Size do
  begin
   example:= self.Number[i];
   k:= pred(prmAttDisp.dValue[example]);
   (lst.Items[k] as TExamples).AddExample(example);
  end;
 //cloturer
 for k:= 0 to pred(lst.Count) do
  (lst.Items[k] as TExamples).EndAdd();
 //renvoyer la liste
 result:= lst;
end;

procedure TExamples.EndAdd;
begin
 Size:= FNbAdd;
 FNbAdd:= 0;
end;

function TExamples.GetNumber(i: Integer): integer;
begin
 result:= FTab^[i];
end;

procedure TExamples.Initialize;
var i: Integer;
begin
 for i:= 1 to self.Size do
  self.Number[i]:= i;
end;

function TExamples.Sampling(prmProportion: single;
  var prmEx: TExamples; seedMode: TStartSeed = seedRandom): integer;
var pn: integer;
begin
 pn:= TRUNC(1.0*prmProportion*self.Size);
 result:= self.Sampling(pn,prmEx,seedMode);
end;

function TExamples.Sampling(prmSize: integer;
  var prmEx: TExamples; seedMode: TStartSeed = seedRandom): integer;
var GN,pn: Integer;
    i: integer;
    rnd: TRndGenerator;
begin
 GN:= self.Size;
 //cration interne au cas o la liste n'est pas initialise
 //ce cas sera trs rare mais possible
 if not(assigned(prmEx))
  then prmEx:= TExamples.Create(GN);
 //dterminer la taille de l'chantillon
 pn:= prmSize;
 //tester la taille
 if (GN<=pn)
  then prmEx.Copy(self)
  else
   begin
     i:= 0;
     prmEx.BeginAdd();
     rnd:= TRndGenerator.Create(seedMode,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
     while (pn>0) do
      begin
       inc(i);
       if (rnd.RanMar()*GN>pn)
        then dec(GN)
        else
         begin
          prmEx.AddExample(self.Number[i]);
          dec(pn);
          dec(GN);
         end;
      end;
     rnd.Free();
     prmEx.EndAdd();
   end;
 result:= prmEx.Size;
end;

function TExamples.SamplingRepresentative(prmAtt: TAttribute;
  prmSize: integer; var prmEx: TExamples): integer;
var lstEx: TObjectList;
    exSize: array of integer;
    tmpEx: TExamples;
    i,n,s: integer;
    portion: double;
    rnd: TRndGenerator;
begin
 if not(assigned(prmEx))
  then prmEx:= TExamples.Create(0)
  else prmEx.Size:= 0;
 //crer les listes intermdiaires
 lstEx:= self.DispatchExamples(prmAtt);
 //nombre de modalits de la variable
 n:= lstEx.Count;
 //proportion dans chaque tas
 portion:= (1.0*prmSize)/(1.0*self.Size);
 //tableau pour les tailles d'chantillons
 setLength(exSize,n);
 //dterminer les tailles d'chantillons
 s:= 0;
 for i:= 0 to pred(n) do
  begin
   //reprsentatif
   exSize[i]:= TRUNC(portion*(lstEx.Items[i] as TExamples).Size);
   s:= s+exSize[i];
  end;
 //complter de manire alatoire - ce dispatch devrait tre ralis avec des probas ingales
 // voir plus tard si on veut vraiment sophistiquer
 rnd:= TRndGenerator.Create(STD_SEED_START,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
 while (s<prmSize) do
  begin
   i:= rnd.IRanMarRange(n);
   inc(exSize[i]);
   inc(s);
  end;
 rnd.Free();
 //chantillonner dans chaque portion
 tmpEx:= NIL;
 for i:= 0 to pred(n) do
  begin
   (lstEx.Items[i] as TExamples).Sampling(exSize[i],tmpEx);
   prmEx.AddExamples(tmpEx);
  end;
 //trier au final - a peut toujours servir....
 prmEx.QuickSort();
 //librer les var. temp.
 setLength(exSize,0);
 lstEx.Free;
 tmpEx.Free;
 //zoo...
 result:= prmEx.Size;
end;

function TExamples.SamplingBalanced(prmAtt: TAttribute; prmSize: integer;
  var prmEx: TExamples): integer;
var lstEx: TObjectList;
    exSize: array of integer;
    tmpEx: TExamples;
    i,n,s: integer;
    rnd: TRndGenerator;
begin
 //rduire  zro la taille pour les ajouts successifs
 if not(assigned(prmEx))
  then prmEx:= TExamples.Create(0)
  else prmEx.Size:= 0;
 //crer les listes intermdiaires
 lstEx:= self.DispatchExamples(prmAtt);
 //nombre de modalits de la variable
 n:= lstEx.Count;
 //tableau pour les tailles d'chantillons
 setLength(exSize,n);
 //dterminer les tailles d'chantillons
 s:= 0;
 for i:= 0 to pred(n) do
  begin
   //quilibr
   exSize[i]:= prmSize div n;
   s:= s+exSize[i];
  end;
 //complter de manire alatoire - le dispatch est bon ici
 rnd:= TRndGenerator.Create(STD_SEED_START,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2); 
 while (s<prmSize) do
  begin
   i:= rnd.IRanMarRange(n);
   inc(exSize[i]);
   inc(s);
  end;
 rnd.Free();
 //chantillonner dans chaque portion
 tmpEx:= NIL;
 for i:= 0 to pred(n) do
  begin
   (lstEx.Items[i] as TExamples).Sampling(exSize[i],tmpEx);
   prmEx.AddExamples(tmpEx);
  end;
 //trier au final - a peut toujours servir....
 prmEx.QuickSort();
 //librer les var. temp.
 setLength(exSize,0);
 lstEx.Free;
 tmpEx.Free;
 //zoo...
 result:= prmEx.Size;
end;

function TExamples.SamplingBalanced(prmAtt: TAttribute;
  prmProportion: single; var prmEx: TExamples): integer;
var pn: integer;
begin
 pn:= TRUNC(1.0*prmProportion*self.Size);
 result:= self.SamplingBalanced(prmAtt,pn,prmEx);
end;

function TExamples.SamplingRepresentative(prmAtt: TAttribute;
  prmProportion: single; var prmEx: TExamples): integer;
var pn: integer;
begin
 pn:= TRUNC(1.0*prmProportion*self.Size);
 result:= self.SamplingRepresentative(prmAtt,pn,prmEx);
end;

procedure TExamples.SamplingSplitting(prmProportion: single; train,
  test: TExamples; seedMode: TStartSeed = seedRandom);
var GN,pn: Integer;
    i: integer;
    rnd: TRndGenerator;
begin
 //dterminer la taille de l'chantillon
 GN:= self.Size;
 pn:= TRUNC(1.0*prmProportion*GN);
 //TraceLog.WriteToLogFile(format('SS >> (begin) sample splitting, %d train demand',[pn]));
 //commencer le partitionnement
 i:= 0;
 train.BeginAdd();
 test.BeginAdd();
 rnd:= TRndGenerator.Create(seedMode,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
 while (pn>0) or (gn>0) do
  begin
   inc(i);
   if (rnd.RanMar()*GN>pn)
    then
     begin
      dec(GN);
      test.AddExample(self.number[i]);
     end
    else
     begin
      train.AddExample(self.Number[i]);
      dec(pn);
      dec(GN);
     end
  end;
 rnd.Free();
 train.EndAdd();
 test.EndAdd();
 TraceLog.WriteToLogFile(format('SS >> sample splitting, %d train and %d test construits',[train.size,test.Size]));
end;

procedure TExamples.SetNumber(i, prmValue: integer);
begin
 FTab^[i]:= prmValue;
end;

procedure TExamples.SetSize(prmSize: integer);
begin
 if (prmSize<>FSize)
  then
   begin
    FSize:= prmSize;
    ReAllocMem(FTab,FSize*SIZE_NUMBER_EXAMPLE);
   end;
end;

procedure TExamples.HeapSort();
LABEL 99;
VAR
   l,j,ir,i,n: integer;
   rra: integer;
BEGIN
   //taille du tableau
   n:= self.Size;

   l := (n DIV 2)+1;
   ir := n;
   WHILE true DO BEGIN
      IF (l > 1) THEN BEGIN
         l := l-1;
         rra := Number[l]
      END ELSE BEGIN
         rra := Number[ir];
         Number[ir] := Number[1];
         ir := ir-1;
         IF (ir = 1) THEN BEGIN
            Number[1] := rra;
            GOTO 99
         END
      END;
      i := l;
      j := l+l;
      WHILE (j <= ir) DO BEGIN
         IF (j < ir) THEN
            IF (Number[j] < Number[j+1]) THEN j := j+1;
         IF (rra < Number[j]) THEN BEGIN
            Number[i] := Number[j];
            i := j;
            j := j+j
         END ELSE
            j := ir+1
      END;
      Number[i] := rra
   END;
99:
END;

procedure TExamples.HeapSortBy(prmAtt: TAttribute);
(*
PROCEDURE sort(n: integer; VAR ra: glsarray);
Program using routine SORT must define the type
TYPE
   glsarray = ARRAY [1..np] OF real;
in the main routine, with np >= n.   *)
LABEL 99;
VAR
   l,j,ir,i,n: integer;

   ira: integer;
   rra: TTypeContinue;

   rnd: TRndGenerator;
BEGIN
   rnd:= TRndGenerator.Create(STD_SEED_START,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
   //taille du tableau
   n:= self.Size;

   l := (n DIV 2)+1;
   ir := n;
   
   WHILE true DO
    BEGIN
    
      IF (l > 1) THEN
       BEGIN
         l := l-1;

         ira := Number[l];
         rra := prmAtt.cValue[Number[l]]
       END
      ELSE
       BEGIN
         ira := Number[ir];
         rra := prmAtt.cValue[Number[ir]];

         Number[ir] := Number[1];

         ir := ir-1;
         IF (ir = 1) THEN
          BEGIN
            Number[1] := ira;
            GOTO 99
          END
       END;
       
      i := l;
      j := l+l;

      WHILE (j <= ir) DO
       BEGIN
         IF (j < ir)
          THEN
           //ajout d'une cond. supplmentaire pour la gestion des ex-aequo
           IF (prmAtt.cValue[Number[j]] < prmAtt.cValue[Number[j+1]]) or ((prmAtt.cValue[Number[j]] = prmAtt.cValue[Number[j+1]]) and (rnd.RanMar()<0.5))
            THEN j := j+1;

         //idem, gestion des ex-aequo
         IF (rra < prmAtt.cValue[Number[j]]) or ((rra = prmAtt.cValue[Number[j]]) and (rnd.RanMar()<0.5))
          THEN
           BEGIN
            Number[i] := Number[j];
            
            i := j;
            j := j+j
           END
          ELSE
           j := ir+1
       END;

      Number[i] := ira
   END;
99:
   rnd.Free();
END;

procedure TExamples.QuickSortBy(prmAtt: TAttribute);
LABEL 11,21,22,30,99;
CONST
   m=7;
   fm=7875;
   fa=211.0;
   fc=1663.0;
   nstack=500;
VAR
   l,jstack,j,ir,iq,i: integer;
   fx,fmi,a: TTypeContinue;
   istack: ARRAY[1..nstack] OF integer;
   //new...
   i_a: integer;
   n: integer;
BEGIN
   //initialisations
   n:= self.Size; 
   //calculs
   fmi := 1.0/fm;
   jstack := 0;
   l := 1;
   ir := n;
   fx := 0.0;
   WHILE true DO BEGIN
      IF ((ir-l) < m) THEN BEGIN
         FOR j := l+1 TO ir DO BEGIN
            //a := arr[j];
            i_a:= self.Number[j];
            a:= prmAtt.cValue[i_a];
            //

            FOR i := j-1 DOWNTO 1 DO BEGIN
               //IF (arr[i] <= a) THEN GOTO 11;
               if (prmAtt.cValue[self.Number[i]] <= a) THEN GOTO 11;
               //
               //arr[i+1] := arr[i]
               self.Number[i+1]:= self.Number[i];
               //
            END;
            i := 0;
11:            //arr[i+1] := a
               self.Number[i+1]:= i_a;  
         END;
         IF (jstack = 0) THEN GOTO 99;
         ir := istack[jstack];
         l := istack[jstack-1];
         jstack := jstack-2
      END ELSE BEGIN
         i := l;
         j := ir;
         fx := (fx*fa+fc)/fm;
         fx := fx-trunc(fx);
         iq := l+(ir-l+1)*trunc(fx*fmi);
         //a := arr[iq];
         //arr[iq] := arr[l];
         i_a:= self.Number[iq];
         a:= prmAtt.cValue[i_a];
         self.Number[iq]:= self.Number[l];
         //
         
21:         IF (j > 0) THEN BEGIN
            //IF (a < arr[j]) THEN BEGIN
            If (a < prmAtt.cValue[self.Number[j]]) THEN BEGIN
            //
               j := j-1;
               GOTO 21
            END
         END;
         IF (j <= i) THEN BEGIN
            //arr[i] := a;
            self.Number[i]:= i_a;
            //
            GOTO 30
         END;
         //arr[i] := arr[j];
         self.Number[i]:= self.Number[j];
         //
         i := i+1;
22:         //IF (i <= n) THEN IF (a > arr[i]) THEN BEGIN
            IF (i <= n) THEN IF (a > prmAtt.cValue[self.Number[i]]) THEN BEGIN
            //    
            i := i+1;
            GOTO 22
         END;
         IF (j <= i) THEN BEGIN
            //arr[j] := a;
            self.Number[j]:= i_a;
            i := j;
            GOTO 30
         END;
         //arr[j] := arr[i];
         self.Number[j]:= self.Number[i];
         //
         j := j-1;
         GOTO 21;
30:         jstack := jstack+2;
         IF (jstack > nstack) THEN BEGIN
            //writeln('pause in QCKSRT - NSTACK must be made larger'); readln
            RAISE EXCEPTION.Create('pile trop petite dans quicksort...');
         END;
         IF ((ir-i) >= (i-l)) THEN BEGIN
            istack[jstack] := ir;
            istack[jstack-1] := i+1;
            ir := i-1
         END ELSE BEGIN
            istack[jstack] := i-1;
            istack[jstack-1] := l;
            l := i+1
         END
      END
   END;
99:   END;

function TExamples.funcRandomizeExamples(seedMode: TStartSeed): TExamples;
var ex: TExamples;
begin
 ex:= TExamples.Create(self.Size);
 ex.Copy(self);
 ex.procRandomizeExamples(seedMode);
 result:= ex;
end;

procedure TExamples.procRandomizeExamples(seedMode: TStartSeed ; seedStart : integer = 0);
var i,numA,numB,n: integer;
    v: integer;
    rnd: TRndGenerator;
begin
 //utiliser une squence du gnrateur
 //rnd:= TRndGenerator.Create(seedMode,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
 rnd:= TRndGenerator.Create(seedMode,seedStart,DEFAULT_SEED_VALUE_2);
 //mlanger les donnes
 n:= self.Size;
 for i:= 1 to (n div 2) do
  begin
   numA:= succ(rnd.IRanMarRange(n));
   numB:= succ(rnd.IRanMarRange(n));
   v:= self.Number[numA];
   self.Number[numA]:= self.Number[numB];
   self.Number[numB]:= v;
  end;
 //librer
 rnd.Free;
end;

procedure TExamples.SampleReplicate(var prmExamples: TExamples);
var i,idEx,n: integer;
    rnd: TRndGenerator;    
begin
 if not(assigned(prmExamples))
  then prmExamples:= TExamples.Create(self.Size);
 prmExamples.Size:= self.Size;
 n:= self.Size;//ce sera plus rapide
 rnd:= TRndGenerator.Create(STD_SEED_START,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
 for i:= 1 to n do
  begin
   idEx:= succ(rnd.IRanMarRange(n));
   prmExamples.Number[i]:= self.Number[idEx];
  end;
 rnd.Free();
end;

procedure TExamples.SampleReplicateWeighted(var prmExamples: TExamples;
  prmWeights: TWeightExamples);
var i,idEx: integer;
    rnd: TRndGenerator;
begin
 if not(assigned(prmExamples))
  then prmExamples:= TExamples.Create(self.Size);
 prmExamples.Size:= self.Size;
 rnd:= TRndGenerator.Create(STD_SEED_START,DEFAULT_SEED_VALUE_1,DEFAULT_SEED_VALUE_2);
 for i:= 1 to self.Size do
  begin
   idEx:= prmWeights.IndexOf(rnd.RanMar());
   prmExamples.Number[i]:= self.Number[idEx];
  end;
 rnd.Free();
end;

procedure TExamples.QuickSort();
LABEL 11,21,22,30,99;
CONST
   m=7;
   nstack=500;
   fm=7875;
   fa=211.0;
   fc=1663.0;
VAR
   l,jstack,j,ir,iq,i: integer;
   fx,fmi: double;
   a: integer;
   istack: ARRAY[1..nstack] OF integer;
   n: integer;
BEGIN
   //nombre d'individus
   n:= self.Size;
   //suite NRPAS13
   fmi := 1.0/fm;
   jstack := 0;
   l := 1;
   ir := n;
   fx := 0.0;
   WHILE true DO BEGIN
      IF ((ir-l) < m) THEN BEGIN
         FOR j := l+1 TO ir DO BEGIN
            a := Number[j];
            FOR i := j-1 DOWNTO 1 DO BEGIN
               IF (Number[i] <= a) THEN GOTO 11;
               Number[i+1] := Number[i]
            END;
            i := 0;
11:            Number[i+1] := a
         END;
         IF (jstack = 0) THEN GOTO 99;
         ir := istack[jstack];
         l := istack[jstack-1];
         jstack := jstack-2
      END ELSE BEGIN
         i := l;
         j := ir;
         fx := (fx*fa+fc)/fm;
         fx := fx-trunc(fx);
         iq := l+(ir-l+1)*trunc(fx*fmi);
         a := Number[iq];
         Number[iq] := Number[l];
21:         IF (j > 0) THEN BEGIN
            IF (a < Number[j]) THEN BEGIN
               j := j-1;
               GOTO 21
            END
         END;
         IF (j <= i) THEN BEGIN
            Number[i] := a;
            GOTO 30
         END;
         Number[i] := Number[j];
         i := i+1;
22:         IF (i <= n) THEN IF (a > Number[i]) THEN BEGIN
            i := i+1;
            GOTO 22
         END;
         IF (j <= i) THEN BEGIN
            Number[j] := a;
            i := j;
            GOTO 30
         END;
         Number[j] := Number[i];
         j := j-1;
         GOTO 21;
30:         jstack := jstack+2;
         IF (jstack > nstack) THEN BEGIN
            //writeln('pause in QCKSRT - NSTACK must be made larger'); readln
            RAISE Exception.Create('pause in QCKSRT - NSTACK must be made larger');
         END;
         IF ((ir-i) >= (i-l)) THEN BEGIN
            istack[jstack] := ir;
            istack[jstack-1] := i+1;
            ir := i-1
         END ELSE BEGIN
            istack[jstack] := i-1;
            istack[jstack-1] := l;
            l := i+1
         END
      END
   END;
99:   END;

procedure TExamples.FCompare(const LstVar: array of TAttribute; a, b: Integer;
  Ascending: Boolean; var RetVal: Integer);
Var i: Integer;
    att: TAttribute;
Begin
 RetVal:= 0;{galit parfaite}
 {Passer les cascades de niveaux}
 For i:= low(LstVar) To high(LstVar) Do
  Begin
   att:= LstVar[i];
   If (att.cValue[self.Number[a]]<att.cValue[self.Number[b]])
    Then
     Begin
      RetVal:= -1;
      Break;
     End
    Else If (att.cValue[self.Number[a]]>att.cValue[self.Number[b]])
     Then
      Begin
       RetVal:= +1;
       Break;
      End;
  End;
 {Si tri descendant}
 If Not(Ascending)
  Then RetVal:= (-1)*RetVal;
End;

procedure TExamples.FSwap(a, b: Integer);
Var Value: Integer;
Begin
 Value:= self.Number[a];
 self.Number[a]:= self.Number[b];
 self.Number[b]:= Value;
End;

procedure TExamples.qSortHelp(const LstVar: array of TAttribute; pivotP,
  nElem: Integer; const Ascending: Boolean);
label
 TailRecursion,
 qBreak;
var
 leftP, rightP, pivotEnd, pivotTemp, leftTemp: Integer;
 lNum: LongInt;
 retval: integer;
begin
 retval := 0;
 TailRecursion:
 if (nElem <= 2)
  then
   begin
    if (nElem = 2)
     then
      begin
       rightP := pivotP +1;
       FCompare(LstVar,pivotP,rightP,Ascending,retval);
       if (retval > 0) then Fswap(pivotP,rightP);
      end;
    exit;
   end;
 rightP := (nElem -1) + pivotP;
 leftP :=  (nElem shr 1) + pivotP;
 { sort pivot, left, and right elements for "median of 3" }
 FCompare(LstVar,leftP,rightP,Ascending,retval);
 if (retval > 0) then Fswap(leftP, rightP);
 FCompare(LstVar,leftP,pivotP,Ascending,retval);
 if (retval > 0)
  then Fswap(leftP, pivotP)
  else
   begin
    FCompare(LstVar,pivotP,rightP,Ascending,retval);
    if retval > 0 then Fswap(pivotP, rightP);
   end;
 if (nElem = 3)
  then
   begin
    Fswap(pivotP, leftP);
    exit;
   end;
 { now for the classic Horae algorithm }
 pivotEnd := pivotP + 1;
 leftP := pivotEnd;
 repeat
  FCompare(LstVar,leftP,pivotP,Ascending,retval);
  while (retval <= 0) do
   begin
    if (retval = 0)
     then
      begin
       Fswap(leftP, pivotEnd);
       Inc(pivotEnd);
      end;
    if (leftP < rightP)
     then Inc(leftP)
     else goto qBreak;
    FCompare(LstVar,leftP,pivotP,Ascending,retval);
   end; {while}
  while (leftP < rightP) do
   begin
    FCompare(LstVar,pivotP, rightP,Ascending,retval);
    if (retval < 0)
     then Dec(rightP)
     else
      begin
       FSwap(leftP, rightP);
       if (retval <> 0)
        then
         begin
          Inc(leftP);
          Dec(rightP);
         end;
       break;
      end;
   end; {while}
 until (leftP >= rightP);
qBreak:
 FCompare(LstVar,leftP,pivotP,Ascending,retval);
 if (retval <= 0) then Inc(leftP);
 leftTemp := leftP -1;
 pivotTemp := pivotP;
 while ((pivotTemp < pivotEnd) and (leftTemp >= pivotEnd)) do
  begin
   Fswap(pivotTemp, leftTemp);
   Inc(pivotTemp);
   Dec(leftTemp);
  end; {while}
 lNum := (leftP - pivotEnd);
 nElem := ((nElem + pivotP) -leftP);
 if (nElem < lNum)
  then
   begin
    qSortHelp(LstVar,leftP,nElem,Ascending);
    nElem := lNum;
   end
  else
   begin
    qSortHelp(LstVar,pivotP,lNum,Ascending);
    pivotP := leftP;
   end;
 goto TailRecursion;
end; {qSortHelp }

procedure TExamples.SortByMultipleAttributes(const LstVar: array of TAttribute;
  Ascending: Boolean);
begin
 qSortHelp(LstVar,1,self.Size,Ascending);
end;

function TExamples.getComplementaire(prmTaille: integer): TExamples;
var exIn,exOut: TExamples;
    i,e: integer;
    //new -- 08/03/2006 -- quick and fast ???
    ex_in_size, ex_in_number_e: integer;
    //tps: cardinal;


begin
 //trier les exemples en entre
 exIn:= TExamples.Create(self.Size);
 exIn.Copy(self);
 //>>
 //tps:= GetTickCount();
 //exIn.QuickSort();
 //>>> new -- 08/03/2006 --> mieux vaut le HEAPSORT : bien souvent les donnes sont dj "presque" tries, et QUICKSORT n'aime pas du tout ce cas !!!
 exIn.HeapSort();
 //tps:= GetTickCount() - tps;
 //TraceLog.WriteToLogFile(format('[TExamples --> getComplementaire] -- sorting = %d ms.',[tps]));
 //>>ne change jamais, autant en faire une var. temporaire
 ex_in_size:= exIn.Size;
 //prparer la sortie
 exOut:= TExamples.Create(prmTaille);
 exOut.BeginAdd();
 e:= 1;
 ex_in_number_e:= exIn.Number[e];
 //pour chaque individu potentiel
 for i:= 1 to prmTaille do
  begin
   {$B-}
   //if (e <= exIn.Size) and (i = exIn.Number[e])
   if (e <= ex_in_size) and (i = ex_in_number_e)
    then
     begin
      inc(e);
      ex_in_number_e:= exIn.Number[e];
     end
    else exOut.AddExample(i);
  end;
 exOut.EndAdd();
 exIn.Free;
 //and then ... donc l'appelant n'a pas besoin de crer l'instance !!!
 result:= exOut;
end;

procedure TExamples.fillValues;
var i: integer;
begin
 for i:= 1 to self.Size do
  self.Number[i]:= i;
end;

{ TWeightExamples }

procedure TWeightExamples.Copy(prmSource: TWeightExamples);
begin
 //plus efficace que copy car pas de cration de nouvelle instance (donc pas d'allocation mmoire)
 Move(prmSource.FTabWeight[0],FTabWeight[0],FSize*sizeof(double));
end;

constructor TWeightExamples.Create(prmSize: integer);
begin
 inherited Create();
 FSize:= prmSize;
 setLength(FTabWeight,succ(FSize));
end;

destructor TWeightExamples.Destroy;
begin
 setLength(FTabWeight,0);
 inherited Destroy;
end;

function TWeightExamples.getWeight(i: integer): double;
begin
 result:= FTabWeight[i];
end;

function TWeightExamples.IndexOf(proba: double): integer;
Var n: Integer;
    lower,upper: Integer;
begin
 lower:= 1;
 upper:= FSize;
 repeat
  n:= (lower+upper) div 2;
  if (FTabWeight[n]<proba)
   then lower:= n
   else upper:= n;
 until (lower=upper) or (lower=pred(upper));
 if (proba<=FTabWeight[lower])
  then result:= lower
  else result:= upper;
end;

procedure TWeightExamples.Initialize;
var i: integer;
begin
 for i:= 1 to FSize do
  FTabWeight[i]:= 1.0;
end;

procedure TWeightExamples.CalcCumulated();
var i: integer;
    s,v: double;
begin
 //cumul
 FTabWeight[0]:= 0.0;
 for i:= 1 to FSize do
  FTabWeight[i]:= FTabWeight[i]+FTabWeight[pred(i)];
 //division
 s:= FTabWeight[FSize];
 if (s>0)
  then
   begin
     for i:= 1 to FSize do
      FTabWeight[i]:= FTabWeight[i]/s;
     //et la marge
     FTabWeight[0]:= 1.0;
   end
  //sinon on met des poids constants
  else
   begin
    v:= 1.0/(1.0*FSize);
    for i:= 1 to FSize do
     FTabWeight[i]:= v*(1.0*i);
    FTabWeight[0]:= 1.0;
   end;
end;

procedure TWeightExamples.setWeight(i: integer; prmValue: double);
begin
 FTabWeight[i]:= prmValue;
end;

procedure TWeightExamples.Normalize;
var i: integer;
    s: double;
begin
 s:= 0.0;
 for i:= 1 to FSize do
  s:= s+FTabWeight[i];
 If (s>0)
  then
   for i:= 1 to FSize do
    FTabWeight[i]:= FTabWeight[i]/s;
end;

end.

