# Q.0 - Importation des données
#charger le package xlsx pour lire les fichiers Excel
library(xlsx)
#charger les données - attention option stringsAsFactors
#faire attention également à l'encodage
donnees <- read.xlsx(file="etude_ronflement.xlsx",header=T,sheetIndex=1,stringsAsFactors=TRUE,encoding="UTF-8")
#liste des variables
str(donnees)
## 'data.frame': 100 obs. of 7 variables:
## $ AGE : num 56 51 61 38 41 57 41 74 53 44 ...
## $ POIDS : num 58 91 98 74 57 99 61 108 104 91 ...
## $ TAILLE: num 164 195 188 161 163 189 167 194 194 180 ...
## $ ALCOOL: num 7 2 0 8 6 4 6 5 5 10 ...
## $ SEXE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ TABAC : num 1 0 1 0 1 0 0 0 0 0 ...
## $ RONFLE: Factor w/ 2 levels "non","oui": 2 2 2 2 2 2 2 2 2 2 ...
#résumé - description des données
print(summary(donnees))
## AGE POIDS TAILLE ALCOOL
## Min. :23.00 Min. : 42.00 Min. :158.0 Min. : 0.00
## 1st Qu.:43.00 1st Qu.: 77.00 1st Qu.:166.0 1st Qu.: 0.00
## Median :52.00 Median : 95.00 Median :186.0 Median : 2.00
## Mean :52.27 Mean : 90.41 Mean :181.1 Mean : 2.95
## 3rd Qu.:62.25 3rd Qu.:107.00 3rd Qu.:194.0 3rd Qu.: 4.25
## Max. :74.00 Max. :120.00 Max. :208.0 Max. :15.00
## SEXE TABAC RONFLE
## Min. :0.00 Min. :0.00 non:65
## 1st Qu.:0.00 1st Qu.:0.00 oui:35
## Median :0.00 Median :0.00
## Mean :0.25 Mean :0.36
## 3rd Qu.:0.25 3rd Qu.:1.00
## Max. :1.00 Max. :1.00
# Q.1 - régression sur l'ensemble des variables
modele <- glm(RONFLE ~ ., data = donnees, family = binomial)
#description approfondie des résultats
res.modele <- summary(modele)
print(res.modele)
##
## Call:
## glm(formula = RONFLE ~ ., family = binomial, data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5911 -0.8516 -0.5317 1.0415 2.3542
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.35016 5.99539 -1.059 0.28952
## AGE 0.06213 0.02330 2.666 0.00767 **
## POIDS -0.01543 0.03319 -0.465 0.64195
## TAILLE 0.01510 0.04754 0.318 0.75079
## ALCOOL 0.23654 0.08611 2.747 0.00601 **
## SEXE -0.65218 0.67369 -0.968 0.33301
## TABAC 1.20057 0.55798 2.152 0.03143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 109.42 on 93 degrees of freedom
## AIC: 123.42
##
## Number of Fisher Scoring iterations: 4
# liste des attributs de l'objet issu de summary
# nous les exploiterons par la suite
print(attributes(res.modele))
## $names
## [1] "call" "terms" "family" "deviance"
## [5] "aic" "contrasts" "df.residual" "null.deviance"
## [9] "df.null" "iter" "deviance.resid" "coefficients"
## [13] "aliased" "dispersion" "df" "cov.unscaled"
## [17] "cov.scaled"
##
## $class
## [1] "summary.glm"
# Q.2 - régression sans poids
modele2 <- glm(RONFLE ~ AGE+TAILLE+ALCOOL+SEXE+TABAC, data = donnees, family = binomial)
res.modele2 <- summary(modele2)
print(res.modele2)
##
## Call:
## glm(formula = RONFLE ~ AGE + TAILLE + ALCOOL + SEXE + TABAC,
## family = binomial, data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6255 -0.8575 -0.5232 1.0644 2.3344
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.088805 3.441777 -1.188 0.23484
## AGE 0.063029 0.023296 2.706 0.00682 **
## TAILLE -0.005343 0.017993 -0.297 0.76651
## ALCOOL 0.237349 0.086062 2.758 0.00582 **
## SEXE -0.631755 0.672293 -0.940 0.34737
## TABAC 1.182496 0.554754 2.132 0.03304 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 109.63 on 94 degrees of freedom
## AIC: 121.63
##
## Number of Fisher Scoring iterations: 4
#test du rapport de vraisemblance
#différence entre les déviances
LR <- res.modele2$deviance - res.modele$deviance
#DF <- 1 (parce que une variable retirée)
DF <- res.modele2$df.residual - res.modele$df.residual
#p-value
PValue <- pchisq(LR,DF,lower.tail=F)
#affichage
print(paste("LR =",LR))
## [1] "LR = 0.21577510900778"
print(paste("DDL =",DF))
## [1] "DDL = 1"
print(paste("p-value =",PValue))
## [1] "p-value = 0.642278102987281"
#Q.3 - test pour retrait simultanément de poids et taille
#régression sans poids et taille
modele3 <- glm(RONFLE ~ AGE+ALCOOL+SEXE+TABAC, data = donnees, family = binomial)
res.modele3 <- summary(modele3)
print(res.modele3)
##
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL + SEXE + TABAC, family = binomial,
## data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5979 -0.8658 -0.5205 1.0697 2.3331
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.01747 1.47967 -3.391 0.000697 ***
## AGE 0.06258 0.02320 2.698 0.006978 **
## ALCOOL 0.23373 0.08503 2.749 0.005979 **
## SEXE -0.64018 0.67270 -0.952 0.341274
## TABAC 1.17352 0.55272 2.123 0.033740 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 109.72 on 95 degrees of freedom
## AIC: 119.72
##
## Number of Fisher Scoring iterations: 4
#test du rapport de vraisemblance
LR <- res.modele3$deviance - res.modele$deviance
DF <- res.modele3$df.residual - res.modele$df.residual #DF <- 2
PValue <- pchisq(LR,DF,lower.tail=F)
#affichage
print(paste("LR =",LR))
## [1] "LR = 0.304053898982573"
print(paste("DDL =",DF))
## [1] "DDL = 2"
print(paste("p-value =",PValue))
## [1] "p-value = 0.858965131755864"
#4. test SEXE pris isolément
print(summary(glm(RONFLE ~ SEXE, data = donnees, family=binomial)))
##
## Call:
## glm(formula = RONFLE ~ SEXE, family = binomial, data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0108 -1.0108 -0.6681 1.3537 1.7941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4055 0.2357 -1.720 0.0854 .
## SEXE -0.9808 0.5528 -1.774 0.0760 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 125.97 on 98 degrees of freedom
## AIC: 129.97
##
## Number of Fisher Scoring iterations: 4
#attention interprétation : SEXE = 1, femme ; SEXE = 0, homme
# Q.5 - impact de boire sur le ronflement
#recodage en 0/1 de ALCOOL
alcool.bin <- ifelse(donnees$ALCOOL > 0, 1, 0)
#régression
print(summary(glm(donnees$RONFLE ~ alcool.bin, family=binomial)))
##
## Call:
## glm(formula = donnees$RONFLE ~ alcool.bin, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0906 -1.0906 -0.6945 1.2668 1.7552
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2993 0.3761 -3.455 0.00055 ***
## alcool.bin 1.0916 0.4595 2.376 0.01751 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 123.43 on 98 degrees of freedom
## AIC: 127.43
##
## Number of Fisher Scoring iterations: 4
# Q.6 - interaction alcool x tabac
print(summary(glm(RONFLE ~ TABAC+alcool.bin+TABAC*alcool.bin,data=donnees,family=binomial)))
##
## Call:
## glm(formula = RONFLE ~ TABAC + alcool.bin + TABAC * alcool.bin,
## family = binomial, data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2435 -1.0474 -0.3288 1.3132 2.4267
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.890 1.027 -2.813 0.0049 **
## TABAC 2.262 1.117 2.025 0.0428 *
## alcool.bin 2.577 1.071 2.406 0.0161 *
## TABAC:alcool.bin -1.794 1.284 -1.398 0.1623
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 116.79 on 96 degrees of freedom
## AIC: 124.79
##
## Number of Fisher Scoring iterations: 5
#on aurait pu faire (R se charge d'introduire les interactions d'ordre inférieur)
#cf. la notion de "hiérarchiquement bien formulé"
print(summary(glm(RONFLE ~ TABAC*alcool.bin,data=donnees,family=binomial)))
##
## Call:
## glm(formula = RONFLE ~ TABAC * alcool.bin, family = binomial,
## data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2435 -1.0474 -0.3288 1.3132 2.4267
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.890 1.027 -2.813 0.0049 **
## TABAC 2.262 1.117 2.025 0.0428 *
## alcool.bin 2.577 1.071 2.406 0.0161 *
## TABAC:alcool.bin -1.794 1.284 -1.398 0.1623
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 116.79 on 96 degrees of freedom
## AIC: 124.79
##
## Number of Fisher Scoring iterations: 5
# Q.7 - meta-niveau alcool
# niv0 sont ceux qui boivent pas
# codage imbriqué pour indiquer une gradation
# dans la consommation d'alcool
# attention, codage 1, 2, 3 sujet à caution
niv1 <- ifelse(donnees$ALCOOL >= 1, 1, 0)
niv2 <- ifelse(donnees$ALCOOL >= 6, 1, 0)
niv3 <- ifelse(donnees$ALCOOL >= 11, 1, 0)
# régression avec codage imbriqué
print(summary(glm(donnees$RONFLE ~ niv1+niv2+niv3,family=binomial)))
##
## Call:
## glm(formula = donnees$RONFLE ~ niv1 + niv2 + niv3, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2278 -1.0277 -0.6945 1.2168 1.7552
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2993 0.3761 -3.455 0.00055 ***
## niv1 0.9364 0.4974 1.883 0.05975 .
## niv2 0.4807 0.5849 0.822 0.41116
## niv3 -0.1178 1.4954 -0.079 0.93722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 122.73 on 96 degrees of freedom
## AIC: 130.73
##
## Number of Fisher Scoring iterations: 4
#8.a.b AGE et ALCOOL
modele8 <- summary(glm(RONFLE ~ AGE+ALCOOL,data=donnees,family=binomial))
print(modele8)
##
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL, family = binomial, data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6382 -0.9074 -0.5816 1.0921 1.9151
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.65698 1.34017 -3.475 0.000511 ***
## AGE 0.06377 0.02244 2.842 0.004484 **
## ALCOOL 0.19973 0.07037 2.838 0.004535 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 114.80 on 97 degrees of freedom
## AIC: 120.8
##
## Number of Fisher Scoring iterations: 4
# Q.8c - test des 2 coefs = test de significativité globale
#khi2
khi2 <- modele8$null.deviance - modele8$deviance
print(khi2)
## [1] 14.69128
#ddl
ddl <- modele8$df.null - modele8$df.residual
print(ddl)
## [1] 2
#p-value
pvalue <- pchisq(khi2,ddl,lower.tail=FALSE)
print(pvalue)
## [1] 0.0006454015
#8.c interaction age * alcool
print(summary(glm(RONFLE ~ AGE+ALCOOL+AGE*ALCOOL,data=donnees,family=binomial)))
##
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL + AGE * ALCOOL, family = binomial,
## data = donnees)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5468 -0.9761 -0.5366 1.1550 1.9857
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.959402 1.851390 -3.219 0.00129 **
## AGE 0.088039 0.032090 2.744 0.00608 **
## ALCOOL 0.571723 0.348796 1.639 0.10119
## AGE:ALCOOL -0.007457 0.006697 -1.113 0.26550
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.49 on 99 degrees of freedom
## Residual deviance: 113.45 on 96 degrees of freedom
## AIC: 121.45
##
## Number of Fisher Scoring iterations: 4