(*************************************************************************)
(* UCompSpvMultinomialLogReg.pas - Copyright (c) 2005 Ricco RAKOTOMALALA *)
(*************************************************************************)

{
@abstract(Rgression logistique multinomiale)
@author(Ricco)
@created(18/04/2005)

A partir de la Rcupration de la classe Logistic.java
>> version 1.32 de WEKA 3-4-4

 DEBUT >> documentation issue du code source

 * Second implementation for building and using a multinomial logistic
 * regression model with a ridge estimator.  <p>
 *
 * There are some modifications, however, compared to the paper of le
 * Cessie and van Houwelingen(1992): <br>
 *
 * If there are k classes for n instances with m attributes, the
 * parameter matrix B to be calculated will be an m*(k-1) matrix.<br>
 *
 * The probability for class j except the last class is <br>
 * Pj(Xi) = exp(XiBj)/((sum[j=1..(k-1)]exp(Xi*Bj))+1) <br>
 * The last class has probability <br>
 * 1-(sum[j=1..(k-1)]Pj(Xi)) = 1/((sum[j=1..(k-1)]exp(Xi*Bj))+1) <br>
 *
 * The (negative) multinomial log-likelihood is thus: <br>
 * L = -sum[i=1..n]
 * sum[j=1..(k-1)](Yij * ln(Pj(Xi))) +
 * (1 - (sum[j=1..(k-1)]Yij)) * ln(1 - sum[j=1..(k-1)]Pj(Xi))
 *  + ridge * (B^2) <br>
 *
 * In order to find the matrix B for which L is minimised, a
 * Quasi-Newton Method is used to search for the optimized values of
 * the m*(k-1) variables.  Note that before we use the optimization
 * procedure, we "squeeze" the matrix B into a m*(k-1) vector.  For
 * details of the optimization procedure, please check
 * weka.core.Optimization class. <p>
 *
 * Although original Logistic Regression does not deal with instance
 * weights, we modify the algorithm a little bit to handle the
 * instance weights. <p>
 *
 * Reference: le Cessie, S. and van Houwelingen, J.C. (1992). <i>
 * Ridge Estimators in Logistic Regression.</i> Applied Statistics,
 * Vol. 41, No. 1, pp. 191-201. <p>
 *
 * Missing values are replaced using a ReplaceMissingValuesFilter, and
 * nominal attributes are transformed into numeric attributes using a
 * NominalToBinaryFilter.<p>
 *
 * Valid options are:<p>
 *
 * -D <br>
 * Turn on debugging output.<p>
 *
 * -R <ridge> <br>
 * Set the ridge parameter for the log-likelihood.<p>
 *
 * -M <number of iterations> <br> Set the maximum number of iterations
 * (default -1, iterates until convergence).<p>
 *
 * @author Xin Xu (xx5@cs.waikato.ac.nz)
 * @version $Revision: 1.32 $ */

 << FIN

}

unit UCompSpvMultinomialLogReg;

interface

USES
       Classes, Forms, IniFiles,
       UCompDefinition,
       UCompSpvLDefinition,
       UOperatorDefinition,
       UDatasetImplementation,
       UCalcSpvMutlinomialLogReg;

TYPE
       TMLGCompSpvMLR = class(TMLGenCompSpvLearning)
                        protected
                        procedure   GenCompInitializations(); override;
                        public
                        function    GetClassMLComponent: TClassMLComponent; override;
                        end;

       TMLCompSpvMLR = class(TMLCompSpvLearning)
                       protected
                       function    getClassOperator: TClassOperator; override;
                       function    GetLogResultDescription(): string; override;
                       end;

       TOpSpvMLR = class(TOpSpvLearningContinuous)
                   protected
                   function    getClassParameter: TClassOperatorParameter; override;
                   function    getClassSpvLearning(): TClassCalcSpvLearning; override;
                   end;

       TOpPrmSpvMLR = class(TOpPrmSpvLearning)
                      private
                      //nombre maximum d'itrations -- rgle d'arrt
                      FMaxIteration: integer;
                      //Ridge
                      FRidge: double;
                      //type d'optimisation
                      FOptMethod: TEnumOptLogistic;
                      protected
                      function    CreateDlgParameters(): TForm; override;
                      procedure   SetDefaultParameters(); override;
                      public
                      //affichage
                      function    getHTMLParameters(): string; override;
                      //I-O
                      procedure   LoadFromStream(prmStream: TStream); override;
                      procedure   SaveToStream(prmStream: TStream); override;
                      procedure   LoadFromINI(prmSection: string; prmINI: TMemIniFile); override;
                      procedure   SaveToINI(prmSection: string; prmINI: TMemIniFile); override;
                      property    MaxIteration: integer read FMaxIteration write FMaxIteration;
                      property    Ridge: double read FRidge write FRidge;
                      property    OptMethod: TEnumOptLogistic read FOptMethod write FOptMethod;
                      end;

implementation

USES
       Sysutils, UConstConfiguration, UDlgOpPrmSpvMLogReg;


{ TMLGCompSpvMLR }

procedure TMLGCompSpvMLR.GenCompInitializations;
begin
 FMLComp:= mlcSpvLearning;
end;

function TMLGCompSpvMLR.GetClassMLComponent: TClassMLComponent;
begin
 result:= TMLCompSpvMLR;
end;

{ TMLCompSpvMLR }

function TMLCompSpvMLR.getClassOperator: TClassOperator;
begin
 result:= TOpSpvMLR;
end;

function TMLCompSpvMLR.GetLogResultDescription: string;
begin
 result:= format('[Multinomial Logistic Regression] result generated [%s]',[self.Description]);
end;

{ TOpSpvMLR }

function TOpSpvMLR.getClassParameter: TClassOperatorParameter;
begin
 result:= TOpPrmSpvMLR;
end;

function TOpSpvMLR.getClassSpvLearning: TClassCalcSpvLearning;
begin
 result:= TLogistic;
end;

{ TOpPrmSpvMLR }

function TOpPrmSpvMLR.CreateDlgParameters: TForm;
begin
 result:= TdlgOpPrmSpvMLogReg.CreateFromOpPrm(self);
end;

function TOpPrmSpvMLR.getHTMLParameters: string;
var s: string;
begin
 s:= HTML_HEADER_TABLE_RESULT;
 s:= s+HTML_TABLE_COLOR_HEADER_GRAY+'<TH colspan=2>Parameters</TH></TR>';
 //>>
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Optimization method</TD><TD align="right">%s</TD></TR>',[STR_OPT_LOGISTIC[FOptMethod]]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Max. iterations</TD><TD align="right">%d</TD></TR>',[FMaxIteration]);
 s:= s+HTML_TABLE_COLOR_DATA_GRAY+format('<TD>Ridge</TD><TD align="right">%.2e</TD></TR>',[FRidge]);
 //<<
 s:= s+'</table>';
 result:= s;
end;

procedure TOpPrmSpvMLR.LoadFromINI(prmSection: string;
  prmINI: TMemIniFile);
begin
 FMaxIteration:= prmINI.ReadInteger(prmSection,'max_iterations',FMaxIteration);
 FRidge:= prmINI.ReadFloat(prmSection,'ridge',FRidge);
 FOptMethod:= TEnumOptLogistic(prmINI.ReadInteger(prmSection,'optimization',ord(FOptMethod)));
end;

procedure TOpPrmSpvMLR.LoadFromStream(prmStream: TStream);
begin
 prmStream.ReadBuffer(FMaxIteration,sizeof(FMaxIteration));
 prmStream.ReadBuffer(FRidge,sizeof(FRidge));
 prmStream.ReadBuffer(FOptMethod,sizeof(FOptMethod));
end;

procedure TOpPrmSpvMLR.SaveToINI(prmSection: string; prmINI: TMemIniFile);
begin
 prmINI.WriteInteger(prmSection,'max_iterations',FMaxIteration);
 prmINI.WriteFloat(prmSection,'ridge',FRidge);
 prmINI.WriteInteger(prmSection,'optimization',ord(FOptMethod));
end;

procedure TOpPrmSpvMLR.SaveToStream(prmStream: TStream);
begin
 prmStream.WriteBuffer(FMaxIteration,sizeof(FMaxIteration));
 prmStream.WriteBuffer(FRidge,sizeof(FRidge));
 prmStream.WriteBuffer(FOptMethod,sizeof(FOptMethod));
end;

procedure TOpPrmSpvMLR.SetDefaultParameters;
begin
 FMaxIteration:= 50;
 FRidge:= +1.0e-8;//+1.0e-8
 FOptMethod:= optMLR_BFGSGradient;
end;

initialization
 RegisterClass(TMLGCompSpvMLR);
end.
