library(readxl)
D <- readxl::read_excel("heart_afdm_clustering.xlsx",sheet="var actives")
str(D)
## tibble [270 × 11] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:270] 70 67 57 64 74 65 56 59 60 63 ...
## $ sexe : chr [1:270] "masculin" "feminin" "masculin" "masculin" ...
## $ type_douleur: chr [1:270] "D" "C" "B" "D" ...
## $ pression : num [1:270] 130 115 124 128 120 120 130 110 140 150 ...
## $ cholester : num [1:270] 322 564 261 263 269 177 256 239 293 407 ...
## $ sucre : chr [1:270] "A" "A" "A" "A" ...
## $ taux_max : num [1:270] 109 160 141 105 121 140 142 142 170 154 ...
## $ angine : chr [1:270] "non" "non" "non" "oui" ...
## $ depression : num [1:270] 24 16 3 2 2 4 6 12 12 40 ...
## $ pic : num [1:270] 2 2 1 2 1 1 2 2 2 2 ...
## $ vaisseau : chr [1:270] "D" "A" "A" "B" ...
#premières lignes
head(D)
#factorminer
library(FactoMineR)
#analyse factorielle des données mixtes
afdm <- FAMD(D,ncp=5)
## Warning: ggrepel: 181 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 181 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
summary(afdm)
##
## Call:
## FAMD(base = D, ncp = 5)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Variance 2.857 1.546 1.350 1.228 1.129
## % of var. 19.043 10.308 9.001 8.190 7.525
## Cumulative % of var. 19.043 29.351 38.353 46.542 54.068
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
## 1 | 5.029 | 2.924 1.108 0.338 | 0.827 0.164 0.027 | -0.776
## 2 | 6.829 | 0.275 0.010 0.002 | 3.486 2.911 0.261 | -2.131
## 3 | 3.006 | -1.482 0.285 0.243 | -0.157 0.006 0.003 | -0.672
## 4 | 3.658 | 2.185 0.619 0.357 | -1.284 0.395 0.123 | -1.673
## 5 | 4.645 | 0.417 0.023 0.008 | 0.372 0.033 0.006 | -2.939
## 6 | 2.841 | -0.422 0.023 0.022 | -0.912 0.199 0.103 | -0.307
## 7 | 3.887 | 0.911 0.108 0.055 | -0.026 0.000 0.000 | -0.077
## 8 | 3.122 | 1.464 0.278 0.220 | -1.916 0.879 0.377 | -1.041
## 9 | 3.461 | 0.889 0.103 0.066 | 0.823 0.162 0.057 | -0.318
## 10 | 5.959 | 2.631 0.897 0.195 | 2.850 1.945 0.229 | -0.645
## ctr cos2
## 1 0.165 0.024 |
## 2 1.245 0.097 |
## 3 0.124 0.050 |
## 4 0.768 0.209 |
## 5 2.369 0.400 |
## 6 0.026 0.012 |
## 7 0.002 0.000 |
## 8 0.297 0.111 |
## 9 0.028 0.008 |
## 10 0.114 0.012 |
##
## Continuous variables
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## age | 0.537 10.082 0.288 | 0.462 13.818 0.214 | -0.257 4.889
## pression | 0.318 3.541 0.101 | 0.553 19.767 0.306 | 0.246 4.478
## cholester | 0.174 1.063 0.030 | 0.505 16.525 0.256 | -0.426 13.459
## taux_max | -0.711 17.696 0.505 | 0.178 2.044 0.032 | 0.195 2.814
## depression | 0.705 17.420 0.498 | -0.026 0.042 0.001 | 0.378 10.597
## pic | 0.646 14.595 0.417 | -0.075 0.360 0.006 | 0.399 11.768
## cos2
## age 0.066 |
## pression 0.060 |
## cholester 0.182 |
## taux_max 0.038 |
## depression 0.143 |
## pic 0.159 |
##
## Categories (the 10 first)
## Dim.1 ctr cos2 v.test Dim.2 ctr cos2 v.test
## feminin | -0.378 0.564 0.059 -2.528 | 0.933 11.743 0.360 8.489 |
## masculin | 0.180 0.268 0.059 2.528 | -0.444 5.583 0.360 -8.489 |
## A | 0.191 0.033 0.003 0.525 | 0.822 2.091 0.050 3.065 |
## B | -1.702 5.520 0.422 -7.087 | 0.094 0.057 0.001 0.530 |
## C | -0.730 1.913 0.186 -4.558 | 0.378 1.749 0.050 3.207 |
## D | 0.972 5.528 0.566 9.018 | -0.389 3.030 0.091 -4.912 |
## A | -0.051 0.027 0.014 -1.191 | -0.200 1.424 0.211 -6.323 |
## B | 0.294 0.157 0.014 1.191 | 1.149 8.188 0.211 6.323 |
## non | -0.699 4.012 0.630 -9.671 | 0.294 2.422 0.111 5.528 |
## oui | 1.421 8.159 0.630 9.671 | -0.598 4.925 0.111 -5.528 |
## Dim.3 ctr cos2 v.test
## feminin -0.415 3.049 0.071 -4.042 |
## masculin 0.197 1.450 0.071 4.042 |
## A 2.462 24.626 0.448 9.828 |
## B -0.373 1.189 0.020 -2.262 |
## C 0.211 0.711 0.015 1.911 |
## D -0.389 3.967 0.091 -5.253 |
## A -0.100 0.470 0.053 -3.396 |
## B 0.577 2.704 0.053 3.396 |
## non 0.134 0.660 0.023 2.698 |
## oui -0.273 1.343 0.023 -2.698 |
#coordonnées des 10 premiers individus
head(afdm$ind$coord,10)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 2.9236614 0.82727957 -0.77636193 -0.7658230 -1.2464301
## 2 0.2746058 3.48618681 -2.13077617 0.8115838 -2.2266582
## 3 -1.4820922 -0.15669953 -0.67202499 0.3646365 0.5057802
## 4 2.1850086 -1.28415069 -1.67272630 -0.9667281 0.6056009
## 5 0.4167746 0.37219755 -2.93907227 -0.5645674 0.6458562
## 6 -0.4223041 -0.91159880 -0.30716132 -0.2766911 0.3131758
## 7 0.9108194 -0.02589533 -0.07672574 -2.7319840 0.9436402
## 8 1.4644450 -1.91575936 -1.04091107 -0.6474286 0.4982558
## 9 0.8892931 0.82306108 -0.31807057 1.5288849 1.0831128
## 10 2.6305767 2.84977754 -0.64497428 0.7304879 -2.2293733
#nuages par paires de variables
pairs(afdm$ind$coord,cex=0.75)
#matrice des distances à partir des 5 premiers facteur
md <- dist(afdm$ind$coord)
#cah
cah <- hclust(md,method="ward.D2")
plot(cah)
#découpage en 2 groupes
groupes <- cutree(cah,k=2)
table(groupes)
## groupes
## 1 2
## 77 193
#nuage avec les groupes d'appartenance
pairs(afdm$ind$coord,col=c('cornflowerblue','yellowgreen')[groupes],cex=0.75)
#moyennes marginales var. quanti
sapply(D[sapply(D,is.numeric)],mean)
## age pression cholester taux_max depression pic
## 54.433333 131.344444 249.659259 149.677778 10.500000 1.585185
#moyennes conditionnelles var. quanti
lapply(D[sapply(D,is.numeric)],function(x){tapply(x,groupes,mean)})
## $age
## 1 2
## 58.97403 52.62176
##
## $pression
## 1 2
## 134.4416 130.1088
##
## $cholester
## 1 2
## 258.4675 246.1451
##
## $taux_max
## 1 2
## 125.7273 159.2332
##
## $depression
## 1 2
## 19.896104 6.751295
##
## $pic
## 1 2
## 2.025974 1.409326
#proportions marginales var. qualitatives
sapply(D[sapply(D,is.character)],function(x){prop.table(table(x))})
## $sexe
## x
## feminin masculin
## 0.3222222 0.6777778
##
## $type_douleur
## x
## A B C D
## 0.07407407 0.15555556 0.29259259 0.47777778
##
## $sucre
## x
## A B
## 0.8518519 0.1481481
##
## $angine
## x
## non oui
## 0.6703704 0.3296296
##
## $vaisseau
## x
## A B C D
## 0.59259259 0.21481481 0.12222222 0.07037037
#proportions dans les classes
sapply(D[sapply(D,is.character)],function(x){prop.table(table(groupes,x),1)})
## $sexe
## x
## groupes feminin masculin
## 1 0.2597403 0.7402597
## 2 0.3471503 0.6528497
##
## $type_douleur
## x
## groupes A B C D
## 1 0.00000000 0.03896104 0.09090909 0.87012987
## 2 0.10362694 0.20207254 0.37305699 0.32124352
##
## $sucre
## x
## groupes A B
## 1 0.93506494 0.06493506
## 2 0.81865285 0.18134715
##
## $angine
## x
## groupes non oui
## 1 0.2727273 0.7272727
## 2 0.8290155 0.1709845
##
## $vaisseau
## x
## groupes A B C D
## 1 0.29870130 0.32467532 0.19480519 0.18181818
## 2 0.70984456 0.17098446 0.09326425 0.02590674
#chargement variable illustrative
DIllus <- readxl::read_excel("heart_afdm_clustering.xlsx",sheet="var illustrative")
head(DIllus)
#proportions des modalités
prop.table(table(DIllus$coeur))
##
## absence presence
## 0.5555556 0.4444444
#croisement entre les classes réelles et les groupes clustering
table(DIllus$coeur,groupes)
## groupes
## 1 2
## absence 9 141
## presence 68 52