Importation et préparation des données
# Q.0 - importer l'échantillon d'apprentissage
# situé dans la première feuille
library(xlsx)
DTrain <- read.xlsx("waveform.xlsx",sheetIndex=1,stringsAsFactors=TRUE,encoding="UTF-8")
# description des données
str(DTrain)
## 'data.frame': 500 obs. of 8 variables:
## $ V05 : num 1.54 0.95 2.48 4.35 4.04 3.89 2.7 2.53 2.68 1.44 ...
## $ V07 : num 4.48 2.71 3.16 6.64 4.34 3.67 3.66 3.51 3.67 2.59 ...
## $ V10 : num 1.51 2.09 2.06 3.57 2.61 2.62 3 2.12 2.91 2.11 ...
## $ V11 : num 1.09 1.5 2.24 1.47 0.62 2.73 3.7 2.23 1.45 1.58 ...
## $ V12 : num 1.32 1.09 2.87 0.95 1.81 1.61 3.19 2.81 1.18 -0.72 ...
## $ V15 : num 2.25 2.85 3.42 1.13 -0.14 2.21 4.26 2.92 1.24 3.66 ...
## $ V18 : num 0.53 1.87 1.58 -1.43 -0.77 0.2 1.95 3.11 0.51 0.36 ...
## $ classe: Factor w/ 3 levels "A","B","C": 1 1 1 1 1 1 1 1 1 1 ...
# Q.1 - stat. descriptives
# non, les variables explicatives ne sont pas centrées (moyenne <> 0)
# 173 de la classe "A", 157 de "B" et 170 de "C"
print(summary(DTrain))
## V05 V07 V10 V11
## Min. :-2.600 Min. :-2.670 Min. :-1.790 Min. :-0.600
## 1st Qu.:-0.095 1st Qu.: 1.150 1st Qu.: 1.877 1st Qu.: 2.185
## Median : 1.090 Median : 2.400 Median : 2.915 Median : 3.195
## Mean : 1.255 Mean : 2.598 Mean : 2.938 Mean : 3.323
## 3rd Qu.: 2.520 3rd Qu.: 4.135 3rd Qu.: 3.935 3rd Qu.: 4.532
## Max. : 5.920 Max. : 7.690 Max. : 6.770 Max. : 8.050
## V12 V15 V18 classe
## Min. :-0.780 Min. :-2.140 Min. :-2.740 A:173
## 1st Qu.: 1.935 1st Qu.: 1.278 1st Qu.: 0.150 B:157
## Median : 2.985 Median : 2.735 Median : 1.105 C:170
## Mean : 3.028 Mean : 2.764 Mean : 1.112
## 3rd Qu.: 4.072 3rd Qu.: 4.263 3rd Qu.: 2.013
## Max. : 7.300 Max. : 7.960 Max. : 5.260
# Q.2 - projeter dans le plan par paire
# on note que les différentes classes se distinguent bien
# selon les combinaisons de variables
pairs(DTrain[-ncol(DTrain)],col=c('black','red','green')[DTrain$classe],pch=19,cex=0.5)
# Q.3 + Q.4 -- centrer et réduire les explicatives
ZTrain <- scale(DTrain[-ncol(DTrain)],center=TRUE,scale=TRUE)
# summary des données
print(summary(ZTrain))
## V05 V07 V10 V11
## Min. :-2.28698 Min. :-2.7023 Min. :-3.14061 Min. :-2.4250
## 1st Qu.:-0.80080 1st Qu.:-0.7430 1st Qu.:-0.70449 1st Qu.:-0.7033
## Median :-0.09776 Median :-0.1018 Median :-0.01533 Median :-0.0789
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.75064 3rd Qu.: 0.7881 3rd Qu.: 0.66220 3rd Qu.: 0.7480
## Max. : 2.76780 Max. : 2.6116 Max. : 2.54534 Max. : 2.9225
## V12 V15 V18
## Min. :-2.53965 Min. :-2.47443 Min. :-2.844481
## 1st Qu.:-0.72896 1st Qu.:-0.75022 1st Qu.:-0.710315
## Median :-0.02869 Median :-0.01488 Median :-0.005081
## Mean : 0.00000 Mean : 0.00000 Mean : 0.000000
## 3rd Qu.: 0.69659 3rd Qu.: 0.75577 3rd Qu.: 0.665077
## Max. : 2.84907 Max. : 2.62124 Max. : 3.063244
# Q.5 -- propriétés supplémentaires de l'objet
# fourni par scale()
print(attributes(ZTrain))
## $dim
## [1] 500 7
##
## $dimnames
## $dimnames[[1]]
## NULL
##
## $dimnames[[2]]
## [1] "V05" "V07" "V10" "V11" "V12" "V15" "V18"
##
##
## $`scaled:center`
## V05 V07 V10 V11 V12 V15 V18
## 1.25478 2.59846 2.93808 3.32262 3.02802 2.76450 1.11188
##
## $`scaled:scale`
## V05 V07 V10 V11 V12 V15 V18
## 1.685532 1.949606 1.505464 1.617583 1.499429 1.982077 1.354159
# Q.6
# accès à l'attribut scaled:center
# représentant les moyennes utilisées
print(attr(ZTrain,'scaled:center'))
## V05 V07 V10 V11 V12 V15 V18
## 1.25478 2.59846 2.93808 3.32262 3.02802 2.76450 1.11188
# et les écarts-type
print(attr(ZTrain,'scaled:scale'))
## V05 V07 V10 V11 V12 V15 V18
## 1.685532 1.949606 1.505464 1.617583 1.499429 1.982077 1.354159
Régression logistique avec “nnet”
# Q.7 -- librairie nnet
library(nnet)
# Q.8 - modéliser (l'algo est celui du perceptron en vrai - descente du gradient)
# il faut travailler sur les données standardisées parce que
# l'heuristique d'optimisation peut être gênée par les différences
# d'échelle entre les variables
# nous obtenons les coefficients estimés, la déviance, le critère AIC
# curieusement (?), nous ne disposons que de 2 équations pour 3 classes ???
mNet <- nnet::multinom(classe ~ ., data = cbind(ZTrain,DTrain['classe']))
## # weights: 27 (16 variable)
## initial value 549.306144
## iter 10 value 206.319054
## iter 20 value 179.322581
## final value 178.812650
## converged
print(mNet)
## Call:
## nnet::multinom(formula = classe ~ ., data = cbind(ZTrain, DTrain["classe"]))
##
## Coefficients:
## (Intercept) V05 V07 V10 V11 V12 V15
## B -0.6740575 -0.2869636 0.7071511 1.344863 1.791235 1.052232 -1.73265071
## C -0.5643332 -2.1829750 -1.2259345 0.814202 1.397061 1.367325 -0.07096102
## V18
## B -1.0414078
## C -0.2278076
##
## Residual Deviance: 357.6253
## AIC: 389.6253
# Q.9 - attributs de l'objet
print(attributes(mNet))
## $names
## [1] "n" "nunits" "nconn" "conn"
## [5] "nsunits" "decay" "entropy" "softmax"
## [9] "censored" "value" "wts" "convergence"
## [13] "fitted.values" "residuals" "lev" "call"
## [17] "terms" "weights" "deviance" "rank"
## [21] "lab" "coefnames" "vcoefnames" "xlevels"
## [25] "edf" "AIC"
##
## $class
## [1] "multinom" "nnet"
# Q.10 - summary
sNet <- summary(mNet)
print(sNet)
## Call:
## nnet::multinom(formula = classe ~ ., data = cbind(ZTrain, DTrain["classe"]))
##
## Coefficients:
## (Intercept) V05 V07 V10 V11 V12 V15
## B -0.6740575 -0.2869636 0.7071511 1.344863 1.791235 1.052232 -1.73265071
## C -0.5643332 -2.1829750 -1.2259345 0.814202 1.397061 1.367325 -0.07096102
## V18
## B -1.0414078
## C -0.2278076
##
## Std. Errors:
## (Intercept) V05 V07 V10 V11 V12 V15
## B 0.3789441 0.3154739 0.3281692 0.3012618 0.3080071 0.3228184 0.359432
## C 0.3713243 0.3358100 0.3547562 0.2813144 0.2992985 0.2988285 0.314818
## V18
## B 0.2765070
## C 0.2326525
##
## Residual Deviance: 357.6253
## AIC: 389.6253
# Q.11 - attributs de l'objet summary
print(attributes(sNet))
## $names
## [1] "n" "nunits" "nconn" "conn"
## [5] "nsunits" "decay" "entropy" "softmax"
## [9] "censored" "value" "wts" "convergence"
## [13] "fitted.values" "residuals" "lev" "call"
## [17] "terms" "weights" "deviance" "rank"
## [21] "lab" "coefnames" "vcoefnames" "xlevels"
## [25] "edf" "AIC" "is.binomial" "digits"
## [29] "coefficients" "standard.errors"
##
## $class
## [1] "summary.multinom"
# Q.12 - coefficients
print(sNet$coefficients)
## (Intercept) V05 V07 V10 V11 V12 V15
## B -0.6740575 -0.2869636 0.7071511 1.344863 1.791235 1.052232 -1.73265071
## C -0.5643332 -2.1829750 -1.2259345 0.814202 1.397061 1.367325 -0.07096102
## V18
## B -1.0414078
## C -0.2278076
# type d'objet
print(class(sNet$coefficients))
## [1] "matrix" "array"
# Q.13 - probabilités approchées
# nous disposons des valeurs pour les 3 classes !
head(sNet$fitted.values)
## A B C
## 1 0.9816813 0.016439970 1.878687e-03
## 2 0.9803203 0.004538032 1.514166e-02
## 3 0.9613791 0.023623419 1.499748e-02
## 4 0.3310797 0.668901640 1.870174e-05
## 5 0.6616134 0.338247655 1.389403e-04
## 6 0.8140623 0.184657167 1.280535e-03
# dimensions (nb. obs. apprentissage x nb. de classes)
print(dim(sNet$fitted.values))
## [1] 500 3
# Q.14 - vérifier que la some par ligne vaut bien 1
# c'est le cas, heureusement !
print(rowSums(sNet$fitted.values))
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Q.15
# vérifions pour le premier individu
# cours -- page 5
# Q.15a + Q.15b - logit modalité B + exp()
eB <- exp(sum(c(1,ZTrain[1,]) * sNet$coefficients["B",]))
print(eB)
## [1] 0.01674675
# modalité C
eC <- exp(sum(c(1,ZTrain[1,]) * sNet$coefficients["C",]))
print(eC)
## [1] 0.001913744
# Q.15c - proba B
pB <- eB/(1.0 + (eB + eC))
print(pB)
## [1] 0.01643997
# proba C
pC <- eC/(1.0 + (eB + eC))
print(pC)
## [1] 0.001878687
# Q.15d - proba (par différence)
pA <- 1.0 - (pB + pC)
print(pA)
## [1] 0.9816813
# Q.16
# vérifier la pertinence globale du modèle
# via le test du rapport de vraisemblance
# Q.16a - null modèle
mNull <- nnet::multinom(classe ~ 1, data = DTrain)
## # weights: 6 (2 variable)
## initial value 549.306144
## final value 548.868278
## converged
print(mNull)
## Call:
## nnet::multinom(formula = classe ~ 1, data = DTrain)
##
## Coefficients:
## (Intercept)
## B -0.09704616
## C -0.01749279
##
## Residual Deviance: 1097.737
## AIC: 1101.737
# Q.16b - statistique du test du rapport de vraisemblance
LRGlob <- mNull$deviance - mNet$deviance
print(paste('Stat de test =',LRGlob))
## [1] "Stat de test = 740.111254720694"
# Q.16c - degrés de liberté
ddlGlob <- mNet$edf - mNull$edf
print(paste('degrés de liberté =',ddlGlob))
## [1] "degrés de liberté = 14"
# Q.16c + Q.16d - p-value
print(paste('p-value =',pchisq(LRGlob,ddlGlob,lower.tail=FALSE)))
## [1] "p-value = 7.01830654766149e-149"
# Q.17
# vérifier la pertinence de chaque variable
# via le test du rapport de vraisemblance de nullité des coefs. associés
for (j in 1:ncol(ZTrain)){
#régression sans la variable à tester
mTemp <- nnet::multinom(classe ~ ., data = cbind(ZTrain[,-j],DTrain['classe']))
#Rapport de vraisemblance
LR <- mTemp$deviance - mNet$deviance
#degré de liberté
ddl <- mNet$edf - mTemp$edf
#p-value
pval <- pchisq(LR,ddl,lower.tail=FALSE)
#affichage
print(paste(colnames(ZTrain)[j],": "))
print(c(LR,ddl,pval))
}
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 214.223489
## iter 20 value 210.713376
## final value 210.697144
## converged
## [1] "V05 : "
## [1] 6.376899e+01 2.000000e+00 1.421478e-14
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 196.794337
## iter 20 value 193.740664
## final value 193.713567
## converged
## [1] "V07 : "
## [1] 2.980183e+01 2.000000e+00 3.377645e-07
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 211.492946
## iter 20 value 190.459968
## final value 190.406889
## converged
## [1] "V10 : "
## [1] 2.318848e+01 2.000000e+00 9.219050e-06
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 213.658486
## iter 20 value 204.523704
## final value 204.465619
## converged
## [1] "V11 : "
## [1] 5.130594e+01 2.000000e+00 7.228651e-12
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 202.867700
## iter 20 value 192.335120
## final value 191.974427
## converged
## [1] "V12 : "
## [1] 2.632355e+01 2.000000e+00 1.922706e-06
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 201.525247
## iter 20 value 195.922767
## final value 195.899833
## converged
## [1] "V15 : "
## [1] 3.417437e+01 2.000000e+00 3.794293e-08
## # weights: 24 (14 variable)
## initial value 549.306144
## iter 10 value 189.567924
## iter 20 value 187.063187
## final value 187.044944
## converged
## [1] "V18 : "
## [1] 1.646459e+01 2.000000e+00 2.659256e-04
Prédiction et évaluation sur l’échantillon test
# Q.18
# charger l'échantillon test
DTest <- read.xlsx("waveform.xlsx",sheetIndex=2,stringsAsFactors=TRUE,encoding="UTF-8")
# structure
str(DTest)
## 'data.frame': 5000 obs. of 8 variables:
## $ V05 : num 0.6 2.3 2.42 1.13 2.66 -1.28 0.84 -1.92 0.53 0.47 ...
## $ V07 : num 0.85 5.52 4.94 2.37 2.69 1.6 3.67 1.91 0.24 0.86 ...
## $ V10 : num 0.89 2.22 2.07 4.84 3.53 4.76 5.2 2.02 1.75 4.5 ...
## $ V11 : num 1.08 2.81 0.51 4.65 4.82 5.55 8.16 3.63 3.92 6.83 ...
## $ V12 : num 4.2 1.61 1.45 4.05 4.79 4.3 3.29 3.91 5.68 6.94 ...
## $ V15 : num 4.59 1.88 1.41 1.24 1.73 2.37 0.4 4.89 3.81 1.08 ...
## $ V18 : num 3.32 1.41 0.62 -1.43 0.13 0.69 0.66 -0.66 1.51 -0.41 ...
## $ classe: Factor w/ 3 levels "A","B","C": 3 2 1 2 2 3 2 3 3 3 ...
# Q.19 -- résumé stat.
# à comparer avec les stats calculées sur
# l'échantillon d'apprentissage
print(summary(DTest))
## V05 V07 V10 V11
## Min. :-3.4800 Min. :-3.320 Min. :-1.790 Min. :-1.480
## 1st Qu.: 0.0375 1st Qu.: 1.110 1st Qu.: 1.880 1st Qu.: 2.040
## Median : 1.1200 Median : 2.500 Median : 3.000 Median : 3.170
## Mean : 1.3109 Mean : 2.662 Mean : 2.989 Mean : 3.337
## 3rd Qu.: 2.5400 3rd Qu.: 4.210 3rd Qu.: 4.080 3rd Qu.: 4.550
## Max. : 6.5000 Max. : 8.760 Max. : 7.630 Max. : 9.060
## V12 V15 V18 classe
## Min. :-1.690 Min. :-2.560 Min. :-4.080 A:1657
## 1st Qu.: 1.920 1st Qu.: 1.120 1st Qu.:-0.010 B:1647
## Median : 3.000 Median : 2.490 Median : 0.940 C:1696
## Mean : 3.014 Mean : 2.648 Mean : 1.001
## 3rd Qu.: 4.082 3rd Qu.: 4.183 3rd Qu.: 1.960
## Max. : 7.400 Max. : 8.720 Max. : 6.200
# Q.20 + Q.21
# centrer et réduire
# /!\ avec les param. (moyennes, écarts-type)
# calculés sur l'échantillon d'apprentissage
# parce que les individus de l'échantillon test
# représentent la population de déploiement
# ils doivent être traités individuellement et non pas collectivement
# aucun paramètre ne doit être calculé sur cet échantillon
# cf. utilisation des param. center et scale dans la fonction scale(.)
ZTest <- scale(DTest[-ncol(DTest)],center=attr(ZTrain,'scaled:center'),scale=attr(ZTrain,'scaled:scale'))
# stat. descriptives
print(summary(ZTest))
## V05 V07 V10 V11
## Min. :-2.80907 Min. :-3.03572 Min. :-3.14061 Min. :-2.969009
## 1st Qu.:-0.72219 1st Qu.:-0.76347 1st Qu.:-0.70283 1st Qu.:-0.792924
## Median :-0.07996 Median :-0.05050 Median : 0.04113 Median :-0.094351
## Mean : 0.03329 Mean : 0.03249 Mean : 0.03360 Mean : 0.008644
## 3rd Qu.: 0.76250 3rd Qu.: 0.82660 3rd Qu.: 0.75852 3rd Qu.: 0.758774
## Max. : 3.11191 Max. : 3.16040 Max. : 3.11659 Max. : 3.546884
## V12 V15 V18
## Min. :-3.146545 Min. :-2.68632 Min. :-3.83402
## 1st Qu.:-0.738962 1st Qu.:-0.82969 1st Qu.:-0.82847
## Median :-0.018687 Median :-0.13849 Median :-0.12693
## Mean :-0.009608 Mean :-0.05894 Mean :-0.08216
## 3rd Qu.: 0.703255 3rd Qu.: 0.71541 3rd Qu.: 0.62631
## Max. : 2.915764 Max. : 3.00468 Max. : 3.75740
# Q.22 -- prédiction
predNet <- predict(mNet,newdata=ZTest,type="class")
# Q.23 -- distribution des classes prédites
print(table(predNet))
## predNet
## A B C
## 1655 1721 1624
# Q.24 - construire la matrice de confusion
mcNet <- table(DTest$classe,predNet)
print(mcNet)
## predNet
## A B C
## A 1323 199 135
## B 143 1379 125
## C 189 143 1364
# Q.24a - calculer le taux de reconnaissance à partir de la matrice de confusion
accNet <- sum(diag(mcNet))/sum(mcNet)
print(accNet)
## [1] 0.8132
# Q.24b - calculer le taux d'erreur à partir de la matrice de confusion
errNet <- 1 - accNet
print(errNet)
## [1] 0.1868
# Q.24c - calculer les rappels par classe
rappelNet <- diag(mcNet)/rowSums(mcNet)
print(rappelNet)
## A B C
## 0.7984309 0.8372799 0.8042453
# Q.24d - calculer les précisions par classe
prNet <- diag(mcNet)/colSums(mcNet)
print(prNet)
## A B C
## 0.7993958 0.8012783 0.8399015
# Q.25
# vérification avec la librairie "caret"
library(caret)
## Le chargement a nécessité le package : lattice
## Le chargement a nécessité le package : ggplot2
# confrontation des résultats -- ok
print(caret::confusionMatrix(data=predNet,reference=DTest$classe))
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C
## A 1323 143 189
## B 199 1379 143
## C 135 125 1364
##
## Overall Statistics
##
## Accuracy : 0.8132
## 95% CI : (0.8021, 0.8239)
## No Information Rate : 0.3392
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7198
##
## Mcnemar's Test P-Value : 0.0002283
##
## Statistics by Class:
##
## Class: A Class: B Class: C
## Sensitivity 0.7984 0.8373 0.8042
## Specificity 0.9007 0.8980 0.9213
## Pos Pred Value 0.7994 0.8013 0.8399
## Neg Pred Value 0.9001 0.9183 0.9017
## Prevalence 0.3314 0.3294 0.3392
## Detection Rate 0.2646 0.2758 0.2728
## Detection Prevalence 0.3310 0.3442 0.3248
## Balanced Accuracy 0.8496 0.8676 0.8628
Traitement avzc le package “VGAM”
# Q.26 - chargement de la librairie
library(VGAM)
## Le chargement a nécessité le package : stats4
## Le chargement a nécessité le package : splines
##
## Attachement du package : 'VGAM'
## L'objet suivant est masqué depuis 'package:caret':
##
## predictors
# Q.27 - modélisation
# cf. attentivement le paramétrage
mVg <- VGAM::vglm(classe ~ ., data = cbind(ZTrain,DTrain['classe']), family=multinomial(refLevel = "A"))
print(mVg)
##
## Call:
## VGAM::vglm(formula = classe ~ ., family = multinomial(refLevel = "A"),
## data = cbind(ZTrain, DTrain["classe"]))
##
##
## Coefficients:
## (Intercept):1 (Intercept):2 V05:1 V05:2 V07:1
## -0.67395734 -0.56426635 -0.28696650 -2.18294744 0.70713336
## V07:2 V10:1 V10:2 V11:1 V11:2
## -1.22590914 1.34485051 0.81421167 1.79124390 1.39708662
## V12:1 V12:2 V15:1 V15:2 V18:1
## 1.05221109 1.36733030 -1.73263912 -0.07098318 -1.04142264
## V18:2
## -0.22780723
##
## Degrees of Freedom: 1000 Total; 984 Residual
## Residual deviance: 357.6253
## Log-likelihood: -178.8127
##
## This is a multinomial logit model with 3 levels
# Q.28 - affichage détaillé
# on les tests de significativité des coefs.
# mais par équation, très peu exploitable
sVg <- summary(mVg)
print(sVg)
##
## Call:
## VGAM::vglm(formula = classe ~ ., family = multinomial(refLevel = "A"),
## data = cbind(ZTrain, DTrain["classe"]))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.67396 0.37893 -1.779 0.075312 .
## (Intercept):2 -0.56427 0.37132 -1.520 0.128604
## V05:1 -0.28697 0.31547 -0.910 0.363009
## V05:2 -2.18295 0.33580 -6.501 8.00e-11 ***
## V07:1 0.70713 0.32816 2.155 0.031175 *
## V07:2 -1.22591 0.35475 -3.456 0.000549 ***
## V10:1 1.34485 0.30126 4.464 8.04e-06 ***
## V10:2 0.81421 0.28131 2.894 0.003800 **
## V11:1 1.79124 0.30800 5.816 6.04e-09 ***
## V11:2 1.39709 0.29930 4.668 3.04e-06 ***
## V12:1 1.05221 0.32282 3.259 0.001116 **
## V12:2 1.36733 0.29883 4.576 4.75e-06 ***
## V15:1 -1.73264 0.35943 -4.821 1.43e-06 ***
## V15:2 -0.07098 0.31482 -0.225 0.821610
## V18:1 -1.04142 0.27650 -3.766 0.000166 ***
## V18:2 -0.22781 0.23265 -0.979 0.327491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##
## Residual deviance: 357.6253 on 984 degrees of freedom
##
## Log-likelihood: -178.8127 on 984 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 1 of the response
# Q.29 - prediction en test
# type = "response" est celui qui nous intéresse
predVglm <- predictvglm(object=mVg,newdata=as.data.frame(ZTest),type="response")
# Q.30 - premières lignes
# on dispose des probas d'affectation aux classes
print(head(predVglm))
## A B C
## 1 0.7366413241 0.0002518254 0.2631068504
## 2 0.8085895247 0.1891902382 0.0022202371
## 3 0.9690689180 0.0305145662 0.0004165158
## 4 0.0015288560 0.9557480685 0.0427230755
## 5 0.0141765521 0.9480264368 0.0377970111
## 6 0.0005085881 0.0794625956 0.9200288163
# Q.31 - conversion en prédiction des classes
predClassVglm <- factor(c("A","B","C")[apply(predVglm,1,which.max)])
print(head(predClassVglm))
## [1] A A A B B C
## Levels: A B C
# Q.32 - dist. des prédictions
print(table(predClassVglm))
## predClassVglm
## A B C
## 1655 1721 1624
# Q.33 - confrontation avec la classe observée
print(caret::confusionMatrix(data=predClassVglm,reference=DTest$classe))
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C
## A 1323 143 189
## B 199 1379 143
## C 135 125 1364
##
## Overall Statistics
##
## Accuracy : 0.8132
## 95% CI : (0.8021, 0.8239)
## No Information Rate : 0.3392
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7198
##
## Mcnemar's Test P-Value : 0.0002283
##
## Statistics by Class:
##
## Class: A Class: B Class: C
## Sensitivity 0.7984 0.8373 0.8042
## Specificity 0.9007 0.8980 0.9213
## Pos Pred Value 0.7994 0.8013 0.8399
## Neg Pred Value 0.9001 0.9183 0.9017
## Prevalence 0.3314 0.3294 0.3392
## Detection Rate 0.2646 0.2758 0.2728
## Detection Prevalence 0.3310 0.3442 0.3248
## Balanced Accuracy 0.8496 0.8676 0.8628
Comparaison avec un arbre de décision
# Q.34 - arbre de décision avec rpart
library(rpart)
# Q.35
# on peut travailler directement avec les variables originelles
# le traitement des données centrées et réduites
# donnerait exactement le même résultat
mArbre <- rpart(classe ~ ., data = DTrain)
print(mArbre)
## n= 500
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 500 327 A (0.34600000 0.31400000 0.34000000)
## 2) V05>=0.38 323 173 A (0.46439628 0.42414861 0.11145511)
## 4) V10< 2.775 145 30 A (0.79310345 0.11034483 0.09655172)
## 8) V18>=-0.235 130 20 A (0.84615385 0.05384615 0.10000000) *
## 9) V18< -0.235 15 6 B (0.33333333 0.60000000 0.06666667) *
## 5) V10>=2.775 178 57 B (0.19662921 0.67977528 0.12359551)
## 10) V15< 2.115 126 21 B (0.13492063 0.83333333 0.03174603) *
## 11) V15>=2.115 52 34 A (0.34615385 0.30769231 0.34615385)
## 22) V12< 3.37 27 9 A (0.66666667 0.18518519 0.14814815) *
## 23) V12>=3.37 25 11 C (0.00000000 0.44000000 0.56000000)
## 46) V18< 0.76 14 4 B (0.00000000 0.71428571 0.28571429) *
## 47) V18>=0.76 11 1 C (0.00000000 0.09090909 0.90909091) *
## 3) V05< 0.38 177 43 C (0.12994350 0.11299435 0.75706215)
## 6) V11< 1.705 12 2 A (0.83333333 0.00000000 0.16666667) *
## 7) V11>=1.705 165 33 C (0.07878788 0.12121212 0.80000000)
## 14) V15< 2.435 30 15 B (0.00000000 0.50000000 0.50000000)
## 28) V05>=-0.26 10 1 B (0.00000000 0.90000000 0.10000000) *
## 29) V05< -0.26 20 6 C (0.00000000 0.30000000 0.70000000) *
## 15) V15>=2.435 135 18 C (0.09629630 0.03703704 0.86666667) *
# Q.37 - affichage graphique
library(rpart.plot)
rpart.plot(mArbre)
# Q.38 - liste des attributs des l'arbre
print(attributes(mArbre))
## $names
## [1] "frame" "where" "call"
## [4] "terms" "cptable" "method"
## [7] "parms" "control" "functions"
## [10] "numresp" "splits" "variable.importance"
## [13] "y" "ordered"
##
## $xlevels
## named list()
##
## $ylevels
## [1] "A" "B" "C"
##
## $class
## [1] "rpart"
# structure de l'arbre
print(mArbre$frame[c('var','n','yval')])
## var n yval
## 1 V05 500 1
## 2 V10 323 1
## 4 V18 145 1
## 8 <leaf> 130 1
## 9 <leaf> 15 2
## 5 V15 178 2
## 10 <leaf> 126 2
## 11 V12 52 1
## 22 <leaf> 27 1
## 23 V18 25 3
## 46 <leaf> 14 2
## 47 <leaf> 11 3
## 3 V11 177 3
## 6 <leaf> 12 1
## 7 V15 165 3
## 14 V05 30 2
## 28 <leaf> 10 2
## 29 <leaf> 20 3
## 15 <leaf> 135 3
# Q.39
# liste des variables prises en compte dans l'arbre
# il n'y a pas V07, qui est pourtant significative dans la régression
print(setdiff(unique(mArbre$frame$'var'),c('<leaf>')))
## [1] "V05" "V10" "V18" "V15" "V12" "V11"
# Q.40
# détail et variables compétitives dans l'arbre sur chaque noeud
# voir en particulier : << Primary Splits >>
# sur la racine par exemple, on voit que V07 était en 3e position
# dans la compétition pour segmenter le sommet
print(summary(mArbre))
## Call:
## rpart(formula = classe ~ ., data = DTrain)
## n= 500
##
## CP nsplit rel error xerror xstd
## 1 0.33944954 0 1.0000000 1.0122324 0.03234630
## 2 0.26299694 1 0.6605505 0.7094801 0.03410190
## 3 0.02446483 2 0.3975535 0.4403670 0.03096517
## 4 0.01834862 5 0.3241590 0.3944954 0.02991914
## 5 0.01223242 6 0.3058104 0.4189602 0.03049867
## 6 0.01000000 9 0.2691131 0.4097859 0.03028727
##
## Variable importance
## V05 V10 V15 V11 V18 V07 V12
## 22 17 17 13 12 11 9
##
## Node number 1: 500 observations, complexity param=0.3394495
## predicted class=A expected loss=0.654 P(node) =1
## class counts: 173 157 170
## probabilities: 0.346 0.314 0.340
## left son=2 (323 obs) right son=3 (177 obs)
## Primary splits:
## V05 < 0.38 to the right, improve=71.51910, (0 missing)
## V15 < 1.93 to the left, improve=69.90867, (0 missing)
## V07 < 2.58 to the right, improve=61.33074, (0 missing)
## V11 < 2.92 to the left, improve=52.97582, (0 missing)
## V10 < 2.775 to the left, improve=50.01740, (0 missing)
## Surrogate splits:
## V07 < 2.3 to the right, agree=0.734, adj=0.249, (0 split)
## V12 < 3.5 to the left, agree=0.722, adj=0.215, (0 split)
## V15 < 2.96 to the left, agree=0.700, adj=0.153, (0 split)
## V11 < 5.985 to the left, agree=0.670, adj=0.068, (0 split)
## V18 < 1.875 to the left, agree=0.664, adj=0.051, (0 split)
##
## Node number 2: 323 observations, complexity param=0.2629969
## predicted class=A expected loss=0.5356037 P(node) =0.646
## class counts: 150 137 36
## probabilities: 0.464 0.424 0.111
## left son=4 (145 obs) right son=5 (178 obs)
## Primary splits:
## V10 < 2.775 to the left, improve=54.39788, (0 missing)
## V15 < 2.205 to the right, improve=47.54085, (0 missing)
## V11 < 3.45 to the left, improve=41.59162, (0 missing)
## V18 < 0.795 to the right, improve=37.19672, (0 missing)
## V12 < 3.365 to the left, improve=18.48477, (0 missing)
## Surrogate splits:
## V15 < 2.535 to the right, agree=0.743, adj=0.428, (0 split)
## V11 < 2.885 to the left, agree=0.721, adj=0.379, (0 split)
## V18 < 0.905 to the right, agree=0.700, adj=0.331, (0 split)
## V07 < 1.355 to the left, agree=0.619, adj=0.152, (0 split)
## V12 < 2.765 to the left, agree=0.588, adj=0.083, (0 split)
##
## Node number 3: 177 observations, complexity param=0.02446483
## predicted class=C expected loss=0.2429379 P(node) =0.354
## class counts: 23 20 134
## probabilities: 0.130 0.113 0.757
## left son=6 (12 obs) right son=7 (165 obs)
## Primary splits:
## V11 < 1.705 to the left, improve=11.020240, (0 missing)
## V12 < 2.855 to the left, improve= 8.911673, (0 missing)
## V15 < 2.435 to the left, improve= 8.332924, (0 missing)
## V07 < 2.83 to the right, improve= 7.413908, (0 missing)
## V18 < -0.045 to the left, improve= 6.303253, (0 missing)
## Surrogate splits:
## V10 < -0.4 to the left, agree=0.949, adj=0.250, (0 split)
## V18 < 4.205 to the right, agree=0.938, adj=0.083, (0 split)
##
## Node number 4: 145 observations, complexity param=0.01223242
## predicted class=A expected loss=0.2068966 P(node) =0.29
## class counts: 115 16 14
## probabilities: 0.793 0.110 0.097
## left son=8 (130 obs) right son=9 (15 obs)
## Primary splits:
## V18 < -0.235 to the right, improve=7.563042, (0 missing)
## V15 < 1.335 to the right, improve=6.238181, (0 missing)
## V11 < 3.96 to the left, improve=6.221483, (0 missing)
## V07 < 0.135 to the right, improve=4.691366, (0 missing)
## V12 < 3.505 to the left, improve=3.521881, (0 missing)
## Surrogate splits:
## V15 < -0.065 to the right, agree=0.917, adj=0.200, (0 split)
## V05 < 4.34 to the left, agree=0.903, adj=0.067, (0 split)
## V11 < 4.46 to the left, agree=0.903, adj=0.067, (0 split)
##
## Node number 5: 178 observations, complexity param=0.02446483
## predicted class=B expected loss=0.3202247 P(node) =0.356
## class counts: 35 121 22
## probabilities: 0.197 0.680 0.124
## left son=10 (126 obs) right son=11 (52 obs)
## Primary splits:
## V15 < 2.115 to the left, improve=15.451320, (0 missing)
## V07 < 2.475 to the right, improve=11.976110, (0 missing)
## V11 < 2.745 to the left, improve=11.261400, (0 missing)
## V18 < 0.86 to the right, improve=10.659630, (0 missing)
## V12 < 1.4 to the left, improve= 6.268115, (0 missing)
## Surrogate splits:
## V07 < 2.21 to the right, agree=0.820, adj=0.385, (0 split)
## V18 < 1.06 to the left, agree=0.764, adj=0.192, (0 split)
## V05 < 0.695 to the right, agree=0.736, adj=0.096, (0 split)
## V12 < 4.845 to the left, agree=0.725, adj=0.058, (0 split)
## V10 < 2.79 to the right, agree=0.713, adj=0.019, (0 split)
##
## Node number 6: 12 observations
## predicted class=A expected loss=0.1666667 P(node) =0.024
## class counts: 10 0 2
## probabilities: 0.833 0.000 0.167
##
## Node number 7: 165 observations, complexity param=0.01223242
## predicted class=C expected loss=0.2 P(node) =0.33
## class counts: 13 20 132
## probabilities: 0.079 0.121 0.800
## left son=14 (30 obs) right son=15 (135 obs)
## Primary splits:
## V15 < 2.435 to the left, improve=8.788552, (0 missing)
## V07 < 2.83 to the right, improve=7.929258, (0 missing)
## V18 < -0.045 to the left, improve=6.572547, (0 missing)
## V10 < 3.925 to the right, improve=6.372385, (0 missing)
## V11 < 5.415 to the right, improve=4.838432, (0 missing)
## Surrogate splits:
## V10 < 4.82 to the right, agree=0.830, adj=0.067, (0 split)
## V11 < 6.86 to the right, agree=0.830, adj=0.067, (0 split)
## V18 < -1.355 to the left, agree=0.824, adj=0.033, (0 split)
##
## Node number 8: 130 observations
## predicted class=A expected loss=0.1538462 P(node) =0.26
## class counts: 110 7 13
## probabilities: 0.846 0.054 0.100
##
## Node number 9: 15 observations
## predicted class=B expected loss=0.4 P(node) =0.03
## class counts: 5 9 1
## probabilities: 0.333 0.600 0.067
##
## Node number 10: 126 observations
## predicted class=B expected loss=0.1666667 P(node) =0.252
## class counts: 17 105 4
## probabilities: 0.135 0.833 0.032
##
## Node number 11: 52 observations, complexity param=0.02446483
## predicted class=A expected loss=0.6538462 P(node) =0.104
## class counts: 18 16 18
## probabilities: 0.346 0.308 0.346
## left son=22 (27 obs) right son=23 (25 obs)
## Primary splits:
## V12 < 3.37 to the left, improve=8.813903, (0 missing)
## V11 < 3.37 to the left, improve=7.556914, (0 missing)
## V18 < 1.125 to the right, improve=6.325014, (0 missing)
## V07 < 1.46 to the right, improve=5.961790, (0 missing)
## V05 < 1.57 to the right, improve=5.602051, (0 missing)
## Surrogate splits:
## V11 < 3.705 to the left, agree=0.865, adj=0.72, (0 split)
## V05 < 1.125 to the right, agree=0.731, adj=0.44, (0 split)
## V07 < 1.89 to the right, agree=0.712, adj=0.40, (0 split)
## V18 < -0.085 to the right, agree=0.712, adj=0.40, (0 split)
## V10 < 3.855 to the left, agree=0.692, adj=0.36, (0 split)
##
## Node number 14: 30 observations, complexity param=0.01223242
## predicted class=B expected loss=0.5 P(node) =0.06
## class counts: 0 15 15
## probabilities: 0.000 0.500 0.500
## left son=28 (10 obs) right son=29 (20 obs)
## Primary splits:
## V05 < -0.26 to the right, improve=4.800000, (0 missing)
## V07 < 2.83 to the right, improve=4.565217, (0 missing)
## V11 < 5.275 to the right, improve=3.516746, (0 missing)
## V18 < 0.24 to the left, improve=3.516746, (0 missing)
## V10 < 3.77 to the right, improve=1.984127, (0 missing)
## Surrogate splits:
## V12 < 3.915 to the left, agree=0.800, adj=0.4, (0 split)
## V11 < 5.63 to the right, agree=0.733, adj=0.2, (0 split)
## V18 < -0.175 to the left, agree=0.733, adj=0.2, (0 split)
## V07 < 2.73 to the right, agree=0.700, adj=0.1, (0 split)
## V15 < 1.395 to the left, agree=0.700, adj=0.1, (0 split)
##
## Node number 15: 135 observations
## predicted class=C expected loss=0.1333333 P(node) =0.27
## class counts: 13 5 117
## probabilities: 0.096 0.037 0.867
##
## Node number 22: 27 observations
## predicted class=A expected loss=0.3333333 P(node) =0.054
## class counts: 18 5 4
## probabilities: 0.667 0.185 0.148
##
## Node number 23: 25 observations, complexity param=0.01834862
## predicted class=C expected loss=0.44 P(node) =0.05
## class counts: 0 11 14
## probabilities: 0.000 0.440 0.560
## left son=46 (14 obs) right son=47 (11 obs)
## Primary splits:
## V18 < 0.76 to the left, improve=4.787532, (0 missing)
## V15 < 3.245 to the left, improve=3.853333, (0 missing)
## V12 < 3.975 to the left, improve=3.383492, (0 missing)
## V07 < 1.56 to the right, improve=2.371282, (0 missing)
## V05 < 0.65 to the left, improve=1.462857, (0 missing)
## Surrogate splits:
## V07 < 0.85 to the right, agree=0.84, adj=0.636, (0 split)
## V10 < 3.76 to the right, agree=0.72, adj=0.364, (0 split)
## V11 < 5.085 to the left, agree=0.72, adj=0.364, (0 split)
## V12 < 4.11 to the left, agree=0.72, adj=0.364, (0 split)
## V15 < 3.89 to the left, agree=0.72, adj=0.364, (0 split)
##
## Node number 28: 10 observations
## predicted class=B expected loss=0.1 P(node) =0.02
## class counts: 0 9 1
## probabilities: 0.000 0.900 0.100
##
## Node number 29: 20 observations
## predicted class=C expected loss=0.3 P(node) =0.04
## class counts: 0 6 14
## probabilities: 0.000 0.300 0.700
##
## Node number 46: 14 observations
## predicted class=B expected loss=0.2857143 P(node) =0.028
## class counts: 0 10 4
## probabilities: 0.000 0.714 0.286
##
## Node number 47: 11 observations
## predicted class=C expected loss=0.09090909 P(node) =0.022
## class counts: 0 1 10
## probabilities: 0.000 0.091 0.909
##
## n= 500
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 500 327 A (0.34600000 0.31400000 0.34000000)
## 2) V05>=0.38 323 173 A (0.46439628 0.42414861 0.11145511)
## 4) V10< 2.775 145 30 A (0.79310345 0.11034483 0.09655172)
## 8) V18>=-0.235 130 20 A (0.84615385 0.05384615 0.10000000) *
## 9) V18< -0.235 15 6 B (0.33333333 0.60000000 0.06666667) *
## 5) V10>=2.775 178 57 B (0.19662921 0.67977528 0.12359551)
## 10) V15< 2.115 126 21 B (0.13492063 0.83333333 0.03174603) *
## 11) V15>=2.115 52 34 A (0.34615385 0.30769231 0.34615385)
## 22) V12< 3.37 27 9 A (0.66666667 0.18518519 0.14814815) *
## 23) V12>=3.37 25 11 C (0.00000000 0.44000000 0.56000000)
## 46) V18< 0.76 14 4 B (0.00000000 0.71428571 0.28571429) *
## 47) V18>=0.76 11 1 C (0.00000000 0.09090909 0.90909091) *
## 3) V05< 0.38 177 43 C (0.12994350 0.11299435 0.75706215)
## 6) V11< 1.705 12 2 A (0.83333333 0.00000000 0.16666667) *
## 7) V11>=1.705 165 33 C (0.07878788 0.12121212 0.80000000)
## 14) V15< 2.435 30 15 B (0.00000000 0.50000000 0.50000000)
## 28) V05>=-0.26 10 1 B (0.00000000 0.90000000 0.10000000) *
## 29) V05< -0.26 20 6 C (0.00000000 0.30000000 0.70000000) *
## 15) V15>=2.435 135 18 C (0.09629630 0.03703704 0.86666667) *
# Q.41 - prédiction
predArbre <- predict(mArbre,newdata=DTest,type="class")
print(table(predArbre))
## predArbre
## A B C
## 1735 1855 1410
# Q.42 - performances - nettement moins bon que la régression logistique
# sur ces données
print(caret::confusionMatrix(data=predArbre,reference=DTest$classe))
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C
## A 1156 203 376
## B 346 1283 226
## C 155 161 1094
##
## Overall Statistics
##
## Accuracy : 0.7066
## 95% CI : (0.6938, 0.7192)
## No Information Rate : 0.3392
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5602
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: A Class: B Class: C
## Sensitivity 0.6976 0.7790 0.6450
## Specificity 0.8268 0.8294 0.9044
## Pos Pred Value 0.6663 0.6916 0.7759
## Neg Pred Value 0.8466 0.8843 0.8323
## Prevalence 0.3314 0.3294 0.3392
## Detection Rate 0.2312 0.2566 0.2188
## Detection Prevalence 0.3470 0.3710 0.2820
## Balanced Accuracy 0.7622 0.8042 0.7747