Installation et importation des fonctions¶

In [ ]:
#version de python -> il faut au moins une version 3.10
import sys
sys.version
Out[ ]:
'3.10.10 | packaged by conda-forge | (main, Mar 24 2023, 20:00:38) [MSC v.1934 64 bit (AMD64)]'
In [ ]:
#la libraire -> version 0.0.3 (ce jour)
#!pip install scientisttools
In [ ]:
#attention aux dépendances -- plusieurs à installer à la main
#ATTENTION : le code ici est relatif à ma configuration
#!pip install mapply
#!pip install adjustText
#!pip install factor_analyzer
#... et d'autres selon votre configuration... voir fichier environnement (yaml)
In [ ]:
#les fonctions
from scientisttools.decomposition import FAMD
from scientisttools.extractfactor import get_eig,get_famd_ind,get_famd_col,get_famd_var,get_famd_mod,summaryFAMD
from scientisttools.pyplot import plotFAMD,plot_eigenvalues,plot_cosines,plot_contrib,plot_correlation_circle

Importation des données¶

In [ ]:
#changement de dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")

#chargement des données
import pandas
D = pandas.read_excel("Tennis_Players_AFDM.xlsx",index_col=0)
D
Out[ ]:
Taille Lateralite MainsRevers Titres Finales TitresGC RolandGarros BestClassDouble
Joueur
Agassi 180 droitier deux 60 30 8 vainqueur 123
Becker 191 droitier une 49 28 6 demi 6
Borg 180 droitier deux 64 25 11 vainqueur 890
Connors 178 gaucher deux 109 52 8 demi 370
Courier 185 droitier deux 23 13 4 vainqueur 20
Edberg 187 droitier une 41 36 6 finale 1
Kafelnikov 190 droitier deux 26 20 2 vainqueur 4
Kuerten 190 droitier une 20 9 3 vainqueur 38
Lendl 187 droitier une 94 50 8 vainqueur 20
McEnroe 180 gaucher une 77 31 7 finale 1
Nastase 180 droitier une 58 38 2 vainqueur 59
Rafter 185 droitier une 11 14 2 demi 6
Safin 193 droitier deux 15 12 2 demi 71
Sampras 185 droitier une 64 24 14 demi 27
Vilas 180 gaucher une 62 40 4 vainqueur 175
Wilander 182 droitier deux 33 27 7 vainqueur 3
Djokovic 188 droitier deux 79 34 17 vainqueur 114
Federer 185 droitier une 103 54 20 vainqueur 24
Murray 191 droitier deux 46 22 3 finale 51
Nadal 185 gaucher deux 85 37 19 vainqueur 26

AFDM avec scientisttools¶

In [ ]:
#documentation
help(FAMD)
Help on class FAMD in module scientisttools.decomposition:

class FAMD(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin)
 |  FAMD(normalize=True, n_components=None, row_labels=None, quanti_labels=None, quali_labels=None, row_sup_labels=None, quanti_sup_labels=None, quali_sup_labels=None, graph=False, figsize=None)
 |  
 |  Factor Analysis of Mixed Data
 |  
 |  Performs Factor Analysis of Mixed Data (FAMD) with supplementary 
 |  individuals, supplementary quantitative variables and supplementary
 |  categorical variables.
 |  
 |  Parameters:
 |  -----------
 |  see scientisttools.decomposition.PCA and scientisttools.decomposition.MCA
 |  
 |  Method resolution order:
 |      FAMD
 |      sklearn.base.BaseEstimator
 |      sklearn.base.TransformerMixin
 |      sklearn.utils._set_output._SetOutputMixin
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, normalize=True, n_components=None, row_labels=None, quanti_labels=None, quali_labels=None, row_sup_labels=None, quanti_sup_labels=None, quali_sup_labels=None, graph=False, figsize=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  fit(self, X)
 |  
 |  fit_transform(self, X, y=None)
 |      Fit the model with X and apply the dimensionality reduction on X.
 |      
 |      Parameters
 |      ----------
 |      X : pd.DataFrame, shape (n_samples, n_features)
 |          New data, where n_samples in the number of samples
 |          and n_features is the number of features.
 |      
 |      y : None
 |          y is ignored
 |      
 |      Returns
 |      -------
 |      X_new : array-like, shape (n_samples, n_components)
 |  
 |  transform(self, X)
 |      Apply the dimensionality reduction on X
 |      
 |      X is projected on the first axes previous extracted from a training set.
 |      
 |      Parameters
 |      ----------
 |      X : DataFrame, shape (n_rows_sup, n_columns)
 |          New data, where n_row_sup is the number of supplementary
 |          row points and n_columns is the number of columns
 |          X rows correspond to supplementary row points that are 
 |          projected on the axes
 |          X is a table containing numeric values
 |      
 |      y : None
 |          y is ignored
 |      
 |      Returns
 |      -------
 |      X_new : DataFrame of float, shape (n_rows_sup, n_components_)
 |              X_new : coordinates of the projections of the supplementary
 |              row points on the axes.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.BaseEstimator:
 |  
 |  __getstate__(self)
 |  
 |  __repr__(self, N_CHAR_MAX=700)
 |      Return repr(self).
 |  
 |  __setstate__(self, state)
 |  
 |  get_params(self, deep=True)
 |      Get parameters for this estimator.
 |      
 |      Parameters
 |      ----------
 |      deep : bool, default=True
 |          If True, will return the parameters for this estimator and
 |          contained subobjects that are estimators.
 |      
 |      Returns
 |      -------
 |      params : dict
 |          Parameter names mapped to their values.
 |  
 |  set_params(self, **params)
 |      Set the parameters of this estimator.
 |      
 |      The method works on simple estimators as well as on nested objects
 |      (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
 |      parameters of the form ``<component>__<parameter>`` so that it's
 |      possible to update each component of a nested object.
 |      
 |      Parameters
 |      ----------
 |      **params : dict
 |          Estimator parameters.
 |      
 |      Returns
 |      -------
 |      self : estimator instance
 |          Estimator instance.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from sklearn.base.BaseEstimator:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.utils._set_output._SetOutputMixin:
 |  
 |  set_output(self, *, transform=None)
 |      Set output container.
 |      
 |      See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
 |      for an example on how to use the API.
 |      
 |      Parameters
 |      ----------
 |      transform : {"default", "pandas"}, default=None
 |          Configure output of `transform` and `fit_transform`.
 |      
 |          - `"default"`: Default output format of a transformer
 |          - `"pandas"`: DataFrame output
 |          - `None`: Transform configuration is unchanged
 |      
 |      Returns
 |      -------
 |      self : estimator instance
 |          Estimator instance.
 |  
 |  ----------------------------------------------------------------------
 |  Class methods inherited from sklearn.utils._set_output._SetOutputMixin:
 |  
 |  __init_subclass__(auto_wrap_output_keys=('transform',), **kwargs) from builtins.type
 |      This method is called when a class is subclassed.
 |      
 |      The default implementation does nothing. It may be
 |      overridden to extend subclasses.

In [ ]:
#plus d'infos sur les paramètres
from scientisttools.decomposition import PCA
help(PCA)
Help on class PCA in module scientisttools.decomposition:

class PCA(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin)
 |  PCA(normalize=True, n_components=None, row_labels=None, col_labels=None, row_sup_labels=None, quanti_sup_labels=None, quali_sup_labels=None, graph=False, figsize=None)
 |  
 |  Principal Component Analysis
 |  
 |  This class inherits from sklearn BaseEstimator and TransformerMixin class
 |  
 |  This is a standard Principal Component Analysis implementation
 |  bases on the Singular Value Decomposition
 |  
 |  Performs Principal Component Analysis (PCA) with supplementary 
 |  individuals, supplementary quantitative variables and supplementary
 |  categorical variables.
 |  
 |  Parameters
 |  ----------
 |  normalize : bool, default = True
 |      - If True : the data are scaled to unit variance.
 |      - If False : the data are not scaled to unit variance.
 |  
 |  n_components : int, float or None, default = None
 |      Number of components to keep.
 |      - If n_components is None, keep all the components.
 |      - If 0 <= n_components < 1, select the number of components such
 |          that the amount of variance that needs to be explained is
 |          greater than the percentage specified by n_components.
 |      - If 1 <= n_components :
 |          - If n_components is int, select a number of components
 |              equal to n_components
 |          - If n_components is float, select the higher number of 
 |              components lower than n_components
 |  
 |  row_labels : array of strings or None, default = None
 |      - If row_labels is an array of strings : this array provides the
 |        row labels.
 |            If the shape of the array doesn't match with the number of
 |            rows : labels are automatically computed for each row.
 |      - If row_labels is None : labels are automatically computed for
 |        each row.
 |   
 |  col_labels : array of strings or None, default = None
 |      - If col_labels is an array of strings : this array provides the
 |        column labels.
 |            If the shape of the array doesn't match with the number of 
 |            columns : labels are automatically computed for each
 |            column.
 |      - If col_labels is None : labels are automatically computed for
 |        each column.
 |  
 |  row_sup_labels : array of strings or None, defulat = None
 |      This array provides the supplementary individuals labels
 |  
 |  quanti_sup_labels : arrays of strings or None, default = None
 |      This array provides the quantitative supplementary variables labels
 |  
 |  quali_sup_labels : array of strings or None, default = None
 |      This array provides the categorical supplementary variables labels
 |  
 |  graph : boolean
 |  
 |  figsize : tuple or None
 |  
 |  Attributes
 |  ----------
 |  n_components_ : int
 |      The estimated number of components.
 |  
 |  row_labels_ : array of strings
 |      Labels for the rows.
 |  
 |  col_labels_ : array of strings
 |      Labels for the columns.
 |  
 |  row_sup_labels_ : array of strings or None
 |      Labels of supplementary individuals labels
 |  
 |  quanti_sup_labels_ : arrays f strings or None
 |      Labels of quantitative supplementary variables
 |  
 |  quali_sup_labels_ :
 |  
 |  mod_sup_labels_ : list of strings
 |                      labels for the categories supplementary
 |  
 |  short_sup_labels_ : list of strings
 |                      Short labels for the categories supplementary 
 |  
 |  eig_ : array of float
 |      A 4 x n_components_ matrix containing all the eigenvalues
 |      (1st row), difference (2nd row) the percentage of variance (3rd row) and the
 |      cumulative percentage of variance (4th row).
 |  
 |  eigen_vectors_ : array of float
 |      Eigen vectors extracted from the Principal Components Analysis.
 |  
 |  row_coord_ : ndarray of shape (n_rows,n_components_)
 |      A n_rows x n_components_ matrix containing the row coordinates.
 |  
 |  col_coord_ : ndarray of shape (n_columns,n_components_)
 |      A n_columns x n_components_ matrix containing the column
 |      coordinates.
 |      
 |  row_contrib_ : ndarray of shape (n_rows,n_components_)
 |      A n_rows x n_components_ matrix containing the row
 |      contributions.
 |  
 |  col_contrib_ : ndarray of shape (n_columns,n_components_)
 |      A n_columns x n_components_ matrix containing the column
 |      contributions.
 |  
 |  row_cos2_ : ndarray of shape (n_rows,n_components_)
 |      A n_rows x n_components_ matrix containing the row cosines.
 |  
 |  col_cos2_ : ndarray of shape (n_columns,n_components_)
 |      A n_columns x n_components_ matrix containing the column
 |      cosines.
 |  
 |  col_cor_ : ndarray of shape (n_columns,n_components_)
 |      A n_columns x n_components_ matrix containing the correlations
 |      between variables (= columns) and axes.
 |  
 |  means_ : ndarray of shape (n_columns,)
 |      The mean for each variable (= for each column).
 |  
 |  std_ : ndarray of shape (n_columns,)
 |      The standard deviation for each variable (= for each column).
 |  
 |  ss_col_coord_ : ndarray of shape (n_columns,)
 |      The sum of squared of columns coordinates.
 |  
 |  model_ : string
 |      The model fitted = 'pca'
 |  
 |  Method resolution order:
 |      PCA
 |      sklearn.base.BaseEstimator
 |      sklearn.base.TransformerMixin
 |      sklearn.utils._set_output._SetOutputMixin
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, normalize=True, n_components=None, row_labels=None, col_labels=None, row_sup_labels=None, quanti_sup_labels=None, quali_sup_labels=None, graph=False, figsize=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  fit(self, X, y=None)
 |      Fit the model to X
 |      
 |      Parameters
 |      ----------
 |      X : DataFrame of float, shape (n_rows, n_columns)
 |      
 |      y : None
 |          y is ignored
 |      
 |      Returns:
 |      --------
 |      self : object
 |              Returns the instance itself
 |  
 |  fit_transform(self, X, y=None)
 |      Fit the model with X and apply the dimensionality reduction on X.
 |      
 |      Parameters
 |      ----------
 |      X : pd.DataFrame, shape (n_samples, n_features)
 |          New data, where n_samples in the number of samples
 |          and n_features is the number of features.
 |      
 |      Returns
 |      -------
 |      X_new : array-like, shape (n_samples, n_components)
 |  
 |  transform(self, X, y=None)
 |      Apply the dimensionality reduction on X
 |      
 |      X is projected on the first axes previous extracted from a training set.
 |      
 |      Parameters
 |      ----------
 |      X : DataFrame of float, shape (n_rows_sup, n_columns)
 |          New data, where n_row_sup is the number of supplementary
 |          row points and n_columns is the number of columns
 |          X rows correspond to supplementary row points that are 
 |          projected on the axes
 |          X is a table containing numeric values
 |      
 |      y : None
 |          y is ignored
 |      
 |      Returns
 |      -------
 |      X_new : DataFrame of float, shape (n_rows_sup, n_components_)
 |              X_new : coordinates of the projections of the supplementary
 |              row points on the axes.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.BaseEstimator:
 |  
 |  __getstate__(self)
 |  
 |  __repr__(self, N_CHAR_MAX=700)
 |      Return repr(self).
 |  
 |  __setstate__(self, state)
 |  
 |  get_params(self, deep=True)
 |      Get parameters for this estimator.
 |      
 |      Parameters
 |      ----------
 |      deep : bool, default=True
 |          If True, will return the parameters for this estimator and
 |          contained subobjects that are estimators.
 |      
 |      Returns
 |      -------
 |      params : dict
 |          Parameter names mapped to their values.
 |  
 |  set_params(self, **params)
 |      Set the parameters of this estimator.
 |      
 |      The method works on simple estimators as well as on nested objects
 |      (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
 |      parameters of the form ``<component>__<parameter>`` so that it's
 |      possible to update each component of a nested object.
 |      
 |      Parameters
 |      ----------
 |      **params : dict
 |          Estimator parameters.
 |      
 |      Returns
 |      -------
 |      self : estimator instance
 |          Estimator instance.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from sklearn.base.BaseEstimator:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.utils._set_output._SetOutputMixin:
 |  
 |  set_output(self, *, transform=None)
 |      Set output container.
 |      
 |      See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
 |      for an example on how to use the API.
 |      
 |      Parameters
 |      ----------
 |      transform : {"default", "pandas"}, default=None
 |          Configure output of `transform` and `fit_transform`.
 |      
 |          - `"default"`: Default output format of a transformer
 |          - `"pandas"`: DataFrame output
 |          - `None`: Transform configuration is unchanged
 |      
 |      Returns
 |      -------
 |      self : estimator instance
 |          Estimator instance.
 |  
 |  ----------------------------------------------------------------------
 |  Class methods inherited from sklearn.utils._set_output._SetOutputMixin:
 |  
 |  __init_subclass__(auto_wrap_output_keys=('transform',), **kwargs) from builtins.type
 |      This method is called when a class is subclassed.
 |      
 |      The default implementation does nothing. It may be
 |      overridden to extend subclasses.

Lancement des calculs¶

In [ ]:
#instaciation
afdm = FAMD(n_components = 2,
            row_labels=list(D.index[0:16]), #jusqu'à Wilander
            row_sup_labels=list(D.index[16:]), #à partir de Djokovic
            quanti_labels=['Taille','Titres','Finales','TitresGC'],
            quanti_sup_labels=['BestClassDouble'],
            quali_labels=['Lateralite','MainsRevers','RolandGarros'])

#entraînement
afdm.fit(D)
Out[ ]:
FAMD(n_components=2, quali_labels=['Lateralite', 'MainsRevers', 'RolandGarros'],
     quanti_labels=['Taille', 'Titres', 'Finales', 'TitresGC'],
     quanti_sup_labels=['BestClassDouble'],
     row_labels=['Agassi', 'Becker', 'Borg', 'Connors', 'Courier', 'Edberg',
                 'Kafelnikov', 'Kuerten', 'Lendl', 'McEnroe', 'Nastase',
                 'Rafter', 'Safin', 'Sampras', 'Vilas', 'Wilander'],
     row_sup_labels=['Djokovic', 'Federer', 'Murray', 'Nadal'])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
FAMD(n_components=2, quali_labels=['Lateralite', 'MainsRevers', 'RolandGarros'],
     quanti_labels=['Taille', 'Titres', 'Finales', 'TitresGC'],
     quanti_sup_labels=['BestClassDouble'],
     row_labels=['Agassi', 'Becker', 'Borg', 'Connors', 'Courier', 'Edberg',
                 'Kafelnikov', 'Kuerten', 'Lendl', 'McEnroe', 'Nastase',
                 'Rafter', 'Safin', 'Sampras', 'Vilas', 'Wilander'],
     row_sup_labels=['Djokovic', 'Federer', 'Murray', 'Nadal'])
In [ ]:
#valeurs propres
print(get_eig(afdm))
       eigenvalue  difference  proportion  cumulative
Dim.1    3.125964    1.701771   39.074556   39.074556
Dim.2    1.424193    0.286425   17.802418   56.876974

Inspection des résultats¶

Résultats globaux¶

In [ ]:
#summary
summaryFAMD(afdm,to_markdown=True)
                     Factor Analysis of Mixed Data - Results                     

Importance of components
|                         |   Dim.1 |   Dim.2 |
|:------------------------|--------:|--------:|
| Variance                |   3.126 |   1.424 |
| Difference              |   1.702 |   0.286 |
| % of var.               |  39.075 |  17.802 |
| Cumulative of % of var. |  39.075 |  56.877 |

Individuals (the 10 first)

|            |   d(i,G) |   p(i) |   I(i,G) |   Dim.1 |    ctr |   cos2 |   Dim.2 |    ctr |   cos2 |
|:-----------|---------:|-------:|---------:|--------:|-------:|-------:|--------:|-------:|-------:|
| Agassi     |    1.954 |  0.062 |    0.239 |  -0.555 |  0.616 |  0.081 |  -1.73  | 13.129 |  0.783 |
| Becker     |    2.275 |  0.062 |    0.323 |   0.835 |  1.395 |  0.135 |   1.241 |  6.763 |  0.298 |
| Borg       |    2.423 |  0.062 |    0.367 |  -0.716 |  1.025 |  0.087 |  -1.875 | 15.431 |  0.599 |
| Connors    |    4.292 |  0.062 |    1.152 |  -3.52  | 24.775 |  0.673 |  -0.681 |  2.035 |  0.025 |
| Courier    |    2.245 |  0.062 |    0.315 |   1.66  |  5.51  |  0.547 |  -1.165 |  5.96  |  0.269 |
| Edberg     |    2.968 |  0.062 |    0.551 |  -0.23  |  0.106 |  0.006 |   2.088 | 19.138 |  0.495 |
| Kafelnikov |    2.489 |  0.062 |    0.387 |   1.985 |  7.879 |  0.636 |  -0.77  |  2.601 |  0.096 |
| Kuerten    |    2.727 |  0.062 |    0.465 |   2.269 | 10.297 |  0.693 |   0.52  |  1.186 |  0.036 |
| Lendl      |    2.834 |  0.062 |    0.502 |  -1.497 |  4.484 |  0.279 |  -0.038 |  0.006 |  0     |
| McEnroe    |    3.768 |  0.062 |    0.887 |  -2.504 | 12.537 |  0.442 |   1.949 | 16.67  |  0.268 |

Supplementary individuals

|          |   Dist |   Dim.1 |   cos2 |   Dim.2 |   cos2 |
|:---------|-------:|--------:|-------:|--------:|-------:|
| Djokovic |  3.864 |  -1.162 |  0.09  |  -1.678 |  0.189 |
| Federer  |  5.206 |  -3.131 |  0.362 |  -0.717 |  0.019 |
| Murray   |  3.385 |   1.042 |  0.095 |   1.127 |  0.111 |
| Nadal    |  4.847 |  -2.89  |  0.355 |  -1.543 |  0.101 |

Continuous variables

|          |   Dim.1 |    ctr |   cos2 |   Dim.2 |   ctr |   cos2 |
|:---------|--------:|-------:|-------:|--------:|------:|-------:|
| Taille   |   0.763 | 18.621 |  0.582 |   0.34  | 8.097 |  0.115 |
| Titres   |  -0.936 | 28.023 |  0.876 |  -0.074 | 0.385 |  0.005 |
| Finales  |  -0.864 | 23.86  |  0.746 |   0.007 | 0.003 |  0     |
| TitresGC |  -0.557 |  9.943 |  0.311 |  -0.181 | 2.301 |  0.033 |

Supplementary continuous variable

|                 |   Dim.1 |   cos2 |   Dim.2 |   cos2 |
|:----------------|--------:|-------:|--------:|-------:|
| BestClassDouble |  -0.328 |  0.108 |   -0.49 |   0.24 |

Categories

|                        |   d(k,G) |   p(k) |   I(k,G) |   Dim.1 |    ctr |   cos2 |   vtest |   Dim.2 |    ctr |   cos2 |   vtest |
|:-----------------------|---------:|-------:|---------:|--------:|-------:|-------:|--------:|--------:|-------:|-------:|--------:|
| Lateralite_droitier    |    0.48  |  0.116 |    0.027 |   0.602 |  3.016 |  1.572 |   2.747 |  -0.113 |  0.509 |  0.055 |  -0.761 |
| Lateralite_gaucher     |    2.082 |  0.027 |    0.116 |  -2.61  | 13.071 |  1.572 |  -2.747 |   0.488 |  2.205 |  0.055 |   0.761 |
| MainsRevers_deux       |    1.134 |  0.062 |    0.08  |   0.294 |  0.386 |  0.067 |   0.567 |  -1.062 | 24.335 |  0.877 |  -3.04  |
| MainsRevers_une        |    0.882 |  0.08  |    0.062 |  -0.228 |  0.3   |  0.067 |  -0.567 |   0.826 | 18.927 |  0.877 |   3.04  |
| RolandGarros_demi      |    1.483 |  0.045 |    0.098 |   0.294 |  0.276 |  0.039 |   0.434 |   0.49  |  3.706 |  0.109 |   1.073 |
| RolandGarros_finale    |    2.646 |  0.018 |    0.125 |  -1.367 |  2.391 |  0.267 |  -1.132 |   2.019 | 25.113 |  0.582 |   2.476 |
| RolandGarros_vainqueur |    0.882 |  0.08  |    0.062 |   0.14  |  0.114 |  0.025 |   0.349 |  -0.721 | 14.419 |  0.668 |  -2.653 |

Categorical variables

|              |   eta2.1 |   cos2.1 |   eta2.2 |   cos2.2 |
|:-------------|---------:|---------:|---------:|---------:|
| Lateralite   |    0.503 |    0.503 |    0.039 |    0.039 |
| MainsRevers  |    0.021 |    0.021 |    0.616 |    0.616 |
| RolandGarros |    0.087 |    0.043 |    0.616 |    0.308 |

Informations sur les variables¶

In [ ]:
#position des variables (importance des variables)
plotFAMD(afdm,choice='var',repel=True)

Variables quantitatives¶

In [ ]:
#récupérer les informations sur les variabes
info_var = get_famd_col(afdm)
info_var
Out[ ]:
{'corr':             Taille    Titres   Finales  TitresGC
 Taille    1.000000 -0.592010 -0.529501 -0.364681
 Titres   -0.592010  1.000000  0.873995  0.601113
 Finales  -0.529501  0.873995  1.000000  0.333035
 TitresGC -0.364681  0.601113  0.333035  1.000000,
 'pcorr':             Taille    Titres   Finales  TitresGC
 Taille    1.000000 -0.215440 -0.043335 -0.033367
 Titres   -0.215440  1.000000  0.863026  0.653212
 Finales  -0.043335  0.863026  1.000000 -0.495977
 TitresGC -0.033367  0.653212 -0.495977  1.000000,
 'coord':              Dim.1     Dim.2
 Taille    0.762939  0.339587
 Titres   -0.935937 -0.074027
 Finales  -0.863628  0.006709
 TitresGC -0.557497 -0.181030,
 'cos2':              Dim.1     Dim.2
 Taille    0.582077  0.115319
 Titres    0.875979  0.005480
 Finales   0.745854  0.000045
 TitresGC  0.310803  0.032772,
 'contrib':               Dim.1     Dim.2
 Taille    18.620702  8.097177
 Titres    28.022668  0.384778
 Finales   23.859953  0.003161
 TitresGC   9.942619  2.301077,
 'ftest':              Dim.1     Dim.2
 Taille    3.617129  1.275016
 Titres   -6.144733 -0.267397
 Finales  -4.714070  0.024191
 TitresGC -2.268591 -0.659986,
 'quanti_sup': {'corr':                    Taille    Titres   Finales  TitresGC
  BestClassDouble -0.442624  0.350269  0.160395   0.40567,
  'coord':                     Dim.1     Dim.2
  BestClassDouble -0.328395 -0.490059,
  'cos2':                     Dim.1     Dim.2
  BestClassDouble  0.107843  0.240158,
  'ftest':                     Dim.1     Dim.2
  BestClassDouble -1.229594 -1.933074}}
In [ ]:
#coordonnées --> corrélations des variables avec les facteurs
info_var['coord']
Out[ ]:
Dim.1 Dim.2
Taille 0.762939 0.339587
Titres -0.935937 -0.074027
Finales -0.863628 0.006709
TitresGC -0.557497 -0.181030
In [ ]:
#cercle des corrélations
import matplotlib.pyplot as plt
fig,axe = plt.subplots(figsize=(6,6))
plot_correlation_circle(afdm,ax=axe)
plt.show()
In [ ]:
#COS2 - qualité de représentation
info_var['cos2']
Out[ ]:
Dim.1 Dim.2
Taille 0.582077 0.115319
Titres 0.875979 0.005480
Finales 0.745854 0.000045
TitresGC 0.310803 0.032772
In [ ]:
#contributions des variables quantitatives
plot_contrib(afdm,choice='var',axis=0)

Variables qualitatives¶

In [ ]:
#coordonnées des modalités
pandas.DataFrame(afdm.mod_coord_,index=afdm.mod_labels_,columns=afdm.dim_index_)
Out[ ]:
Dim.1 Dim.2
Lateralite_droitier 0.602308 -0.112717
Lateralite_gaucher -2.610001 0.488441
MainsRevers_deux 0.293516 -1.062163
MainsRevers_une -0.228291 0.826127
RolandGarros_demi 0.293940 0.490470
RolandGarros_finale -1.367098 2.018646
RolandGarros_vainqueur 0.140499 -0.721071
In [ ]:
#positions dans le premier plan factoriel
plotFAMD(afdm,choice='mod',repel=True, xlim=(-2.5,2.5), ylim=(-2.5,2.5))
In [ ]:
#contributions des modalités
plot_contrib(afdm,choice='mod',axis=0)

Informations sur les individus¶

In [ ]:
#directement représentation graphique
plotFAMD(afdm,choice='ind',repel=True,ind_sup=True)
In [ ]:
#contributions des individus (top 5)
plot_contrib(afdm,choice='ind',axis=0,top_contrib=5)