Configuration, vérification
#chargement de la libraire
library(reticulate)
#connexion avec Python
#environnement "base" d'Anaconda
reticulate::use_python("C:/Users/ricco/anaconda3")
#vérification de la version
reticulate::py_config()
## python: C:/Users/ricco/anaconda3/python.exe
## libpython: C:/Users/ricco/anaconda3/python312.dll
## pythonhome: C:/Users/ricco/anaconda3
## version: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct 4 2024, 13:17:27) [MSC v.1929 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:/Users/ricco/anaconda3/Lib/site-packages/numpy
## numpy_version: 1.26.4
##
## NOTE: Python version was forced by use_python() function
#essai d'exécution
reticulate::py_eval("3**3")
## [1] 27
Traitements sous R
Chargement des données
#importation des données
library(readxl)
df <- readxl::read_excel("heart_disease_male.xlsx")
str(df)
## tibble [209 × 7] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:209] 43 39 39 42 49 50 59 54 59 56 ...
## $ chest_pain : chr [1:209] "asympt" "atyp_angina" "non_anginal" "non_anginal" ...
## $ rest_bpress : num [1:209] 140 120 160 160 140 140 140 200 130 170 ...
## $ blood_sugar : chr [1:209] "f" "f" "t" "f" ...
## $ max_heart_rate : num [1:209] 135 160 160 146 130 135 119 142 125 122 ...
## $ exercice_angina: chr [1:209] "yes" "yes" "no" "no" ...
## $ disease : chr [1:209] "positive" "negative" "negative" "negative" ...
Analyse factorielle des données mixtes sous R
#isoler les descripteurs
X <- df[1:6]
colnames(X)
## [1] "age" "chest_pain" "rest_bpress" "blood_sugar"
## [5] "max_heart_rate" "exercice_angina"
# FAMD
library(FactoMineR)
res <- FactoMineR::FAMD(X,ncp=2,graph=FALSE)
#affichage
print(res$eig)
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 2.311369 28.89211 28.89211
## comp 2 1.119654 13.99568 42.88779
#coordonnées des individus
head(res$ind$coord)
## Dim.1 Dim.2
## 1 0.9535241 -1.0043685
## 2 -1.0365063 -0.9633966
## 3 -0.5403707 3.2596526
## 4 -0.8021856 1.0455921
## 5 0.3646007 -0.2912144
## 6 0.3096903 -0.2311542
#projetés dans le plan
plot(res$ind$coord)

#récupération des coordonnées dans un data frame
factCoord <- as.data.frame(res$ind$coord)
head(factCoord)
#rajouter la classe
factCoord$disease <- df$disease
head(factCoord)
Régression logistique sous Python
#récupération des données à partir de R
dfR = r.factCoord
print(type(dfR))
## <class 'pandas.core.frame.DataFrame'>
#afficher les infos
dfR.info()
## <class 'pandas.core.frame.DataFrame'>
## Index: 209 entries, 1 to 209
## Data columns (total 3 columns):
## # Column Non-Null Count Dtype
## --- ------ -------------- -----
## 0 Dim.1 209 non-null float64
## 1 Dim.2 209 non-null float64
## 2 disease 209 non-null object
## dtypes: float64(2), object(1)
## memory usage: 6.5+ KB
#premières lignes
dfR.head()
## Dim.1 Dim.2 disease
## 1 0.953524 -1.004369 positive
## 2 -1.036506 -0.963397 negative
## 3 -0.540371 3.259653 negative
## 4 -0.802186 1.045592 negative
## 5 0.364601 -0.291214 negative
#régression logistique
from sklearn.linear_model import LogisticRegression
modele = LogisticRegression()
#entraînement
modele.fit(dfR.iloc[:,:-1],dfR.disease)
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
#affichage des coefficients
modele.coef_
## array([[ 0.9742213 , -0.36294673]])
#et l'intercept
modele.intercept_
## array([-0.32582659])
#mettre les infos dans un vecteur numpy
import numpy
coefs = numpy.concatenate((modele.coef_[0],modele.intercept_))
print(coefs)
## [ 0.9742213 -0.36294673 -0.32582659]
Graphique dans R
#récupération des coefficients
RCoefs <- py$coefs
print(RCoefs)
## [1] 0.9742213 -0.3629467 -0.3258266
#calcul de la pente et de l'origine de la droite
a <- -RCoefs[3]/RCoefs[2]
b <- -RCoefs[1]/RCoefs[2]
#frontières dans le plan
y <- factor(factCoord$disease)
plot(factCoord[,1],factCoord[,2],col=c("red","blue")[y])
legend(-3,3,levels(y),fill=c('red','blue'),cex=0.8)
abline(a,b,lty=6,lwd=3)

Matrice de confusion sous Python
#et matrice de confusion
import matplotlib.pyplot as plt
from sklearn import metrics
metrics.ConfusionMatrixDisplay.from_estimator(modele,dfR.iloc[:,:-1],dfR.disease)
## <sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at 0x000001DAD7D4B8F0>
plt.show()
