# Q.0 - Importation des données

#charger le package xlsx pour lire les fichiers Excel
library(xlsx)

#charger les données - attention option stringsAsFactors
#faire attention également à l'encodage
donnees <- read.xlsx(file="etude_ronflement.xlsx",header=T,sheetIndex=1,stringsAsFactors=TRUE,encoding="UTF-8")

#liste des variables
str(donnees)
## 'data.frame':    100 obs. of  7 variables:
##  $ AGE   : num  56 51 61 38 41 57 41 74 53 44 ...
##  $ POIDS : num  58 91 98 74 57 99 61 108 104 91 ...
##  $ TAILLE: num  164 195 188 161 163 189 167 194 194 180 ...
##  $ ALCOOL: num  7 2 0 8 6 4 6 5 5 10 ...
##  $ SEXE  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ TABAC : num  1 0 1 0 1 0 0 0 0 0 ...
##  $ RONFLE: Factor w/ 2 levels "non","oui": 2 2 2 2 2 2 2 2 2 2 ...
#résumé - description des données
print(summary(donnees))
##       AGE            POIDS            TAILLE          ALCOOL     
##  Min.   :23.00   Min.   : 42.00   Min.   :158.0   Min.   : 0.00  
##  1st Qu.:43.00   1st Qu.: 77.00   1st Qu.:166.0   1st Qu.: 0.00  
##  Median :52.00   Median : 95.00   Median :186.0   Median : 2.00  
##  Mean   :52.27   Mean   : 90.41   Mean   :181.1   Mean   : 2.95  
##  3rd Qu.:62.25   3rd Qu.:107.00   3rd Qu.:194.0   3rd Qu.: 4.25  
##  Max.   :74.00   Max.   :120.00   Max.   :208.0   Max.   :15.00  
##       SEXE          TABAC      RONFLE  
##  Min.   :0.00   Min.   :0.00   non:65  
##  1st Qu.:0.00   1st Qu.:0.00   oui:35  
##  Median :0.00   Median :0.00           
##  Mean   :0.25   Mean   :0.36           
##  3rd Qu.:0.25   3rd Qu.:1.00           
##  Max.   :1.00   Max.   :1.00
# Q.1 - régression sur l'ensemble des variables
modele <- glm(RONFLE ~ ., data = donnees, family = binomial)

#description approfondie des résultats
res.modele <- summary(modele)
print(res.modele)
## 
## Call:
## glm(formula = RONFLE ~ ., family = binomial, data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5911  -0.8516  -0.5317   1.0415   2.3542  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -6.35016    5.99539  -1.059  0.28952   
## AGE          0.06213    0.02330   2.666  0.00767 **
## POIDS       -0.01543    0.03319  -0.465  0.64195   
## TAILLE       0.01510    0.04754   0.318  0.75079   
## ALCOOL       0.23654    0.08611   2.747  0.00601 **
## SEXE        -0.65218    0.67369  -0.968  0.33301   
## TABAC        1.20057    0.55798   2.152  0.03143 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 109.42  on 93  degrees of freedom
## AIC: 123.42
## 
## Number of Fisher Scoring iterations: 4
# liste des attributs de l'objet issu de summary
# nous les exploiterons par la suite
print(attributes(res.modele))
## $names
##  [1] "call"           "terms"          "family"         "deviance"      
##  [5] "aic"            "contrasts"      "df.residual"    "null.deviance" 
##  [9] "df.null"        "iter"           "deviance.resid" "coefficients"  
## [13] "aliased"        "dispersion"     "df"             "cov.unscaled"  
## [17] "cov.scaled"    
## 
## $class
## [1] "summary.glm"
# Q.2 - régression sans poids
modele2 <- glm(RONFLE ~ AGE+TAILLE+ALCOOL+SEXE+TABAC, data = donnees, family = binomial)
res.modele2 <- summary(modele2)
print(res.modele2)
## 
## Call:
## glm(formula = RONFLE ~ AGE + TAILLE + ALCOOL + SEXE + TABAC, 
##     family = binomial, data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6255  -0.8575  -0.5232   1.0644   2.3344  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -4.088805   3.441777  -1.188  0.23484   
## AGE          0.063029   0.023296   2.706  0.00682 **
## TAILLE      -0.005343   0.017993  -0.297  0.76651   
## ALCOOL       0.237349   0.086062   2.758  0.00582 **
## SEXE        -0.631755   0.672293  -0.940  0.34737   
## TABAC        1.182496   0.554754   2.132  0.03304 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 109.63  on 94  degrees of freedom
## AIC: 121.63
## 
## Number of Fisher Scoring iterations: 4
#test du rapport de vraisemblance

#différence entre les déviances
LR <- res.modele2$deviance - res.modele$deviance

#DF <- 1 (parce que une variable retirée)
DF <- res.modele2$df.residual - res.modele$df.residual

#p-value
PValue <- pchisq(LR,DF,lower.tail=F)

#affichage
print(paste("LR =",LR))
## [1] "LR = 0.21577510900778"
print(paste("DDL =",DF))
## [1] "DDL = 1"
print(paste("p-value =",PValue))
## [1] "p-value = 0.642278102987281"
#Q.3 - test pour retrait simultanément de poids et taille

#régression sans poids et taille
modele3 <- glm(RONFLE ~ AGE+ALCOOL+SEXE+TABAC, data = donnees, family = binomial)
res.modele3 <- summary(modele3)
print(res.modele3)
## 
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL + SEXE + TABAC, family = binomial, 
##     data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5979  -0.8658  -0.5205   1.0697   2.3331  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.01747    1.47967  -3.391 0.000697 ***
## AGE          0.06258    0.02320   2.698 0.006978 ** 
## ALCOOL       0.23373    0.08503   2.749 0.005979 ** 
## SEXE        -0.64018    0.67270  -0.952 0.341274    
## TABAC        1.17352    0.55272   2.123 0.033740 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 109.72  on 95  degrees of freedom
## AIC: 119.72
## 
## Number of Fisher Scoring iterations: 4
#test du rapport de vraisemblance
LR <- res.modele3$deviance - res.modele$deviance
DF <- res.modele3$df.residual - res.modele$df.residual #DF <- 2
PValue <- pchisq(LR,DF,lower.tail=F)

#affichage
print(paste("LR =",LR))
## [1] "LR = 0.304053898982573"
print(paste("DDL =",DF))
## [1] "DDL = 2"
print(paste("p-value =",PValue))
## [1] "p-value = 0.858965131755864"
#4. test SEXE pris isolément
print(summary(glm(RONFLE ~ SEXE, data = donnees, family=binomial)))
## 
## Call:
## glm(formula = RONFLE ~ SEXE, family = binomial, data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0108  -1.0108  -0.6681   1.3537   1.7941  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.4055     0.2357  -1.720   0.0854 .
## SEXE         -0.9808     0.5528  -1.774   0.0760 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 125.97  on 98  degrees of freedom
## AIC: 129.97
## 
## Number of Fisher Scoring iterations: 4
#attention interprétation : SEXE = 1, femme ; SEXE = 0, homme
# Q.5 - impact de boire sur le ronflement

#recodage en 0/1 de ALCOOL
alcool.bin <- ifelse(donnees$ALCOOL > 0, 1, 0)

#régression
print(summary(glm(donnees$RONFLE ~ alcool.bin, family=binomial)))
## 
## Call:
## glm(formula = donnees$RONFLE ~ alcool.bin, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0906  -1.0906  -0.6945   1.2668   1.7552  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2993     0.3761  -3.455  0.00055 ***
## alcool.bin    1.0916     0.4595   2.376  0.01751 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 123.43  on 98  degrees of freedom
## AIC: 127.43
## 
## Number of Fisher Scoring iterations: 4
# Q.6 - interaction alcool x tabac
print(summary(glm(RONFLE ~ TABAC+alcool.bin+TABAC*alcool.bin,data=donnees,family=binomial)))
## 
## Call:
## glm(formula = RONFLE ~ TABAC + alcool.bin + TABAC * alcool.bin, 
##     family = binomial, data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2435  -1.0474  -0.3288   1.3132   2.4267  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -2.890      1.027  -2.813   0.0049 **
## TABAC               2.262      1.117   2.025   0.0428 * 
## alcool.bin          2.577      1.071   2.406   0.0161 * 
## TABAC:alcool.bin   -1.794      1.284  -1.398   0.1623   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 116.79  on 96  degrees of freedom
## AIC: 124.79
## 
## Number of Fisher Scoring iterations: 5
#on aurait pu faire (R se charge d'introduire les interactions d'ordre inférieur)
#cf. la notion de "hiérarchiquement bien formulé"
print(summary(glm(RONFLE ~ TABAC*alcool.bin,data=donnees,family=binomial)))
## 
## Call:
## glm(formula = RONFLE ~ TABAC * alcool.bin, family = binomial, 
##     data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2435  -1.0474  -0.3288   1.3132   2.4267  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -2.890      1.027  -2.813   0.0049 **
## TABAC               2.262      1.117   2.025   0.0428 * 
## alcool.bin          2.577      1.071   2.406   0.0161 * 
## TABAC:alcool.bin   -1.794      1.284  -1.398   0.1623   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 116.79  on 96  degrees of freedom
## AIC: 124.79
## 
## Number of Fisher Scoring iterations: 5
# Q.7 - meta-niveau alcool

# niv0 sont ceux qui boivent pas
# codage imbriqué pour indiquer une gradation
# dans la consommation d'alcool
# attention, codage 1, 2, 3 sujet à caution
niv1 <- ifelse(donnees$ALCOOL >= 1, 1, 0)
niv2 <- ifelse(donnees$ALCOOL >= 6, 1, 0)
niv3 <- ifelse(donnees$ALCOOL >= 11, 1, 0)

# régression avec codage imbriqué
print(summary(glm(donnees$RONFLE ~ niv1+niv2+niv3,family=binomial)))
## 
## Call:
## glm(formula = donnees$RONFLE ~ niv1 + niv2 + niv3, family = binomial)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2278  -1.0277  -0.6945   1.2168   1.7552  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2993     0.3761  -3.455  0.00055 ***
## niv1          0.9364     0.4974   1.883  0.05975 .  
## niv2          0.4807     0.5849   0.822  0.41116    
## niv3         -0.1178     1.4954  -0.079  0.93722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 122.73  on 96  degrees of freedom
## AIC: 130.73
## 
## Number of Fisher Scoring iterations: 4
#8.a.b AGE et ALCOOL
modele8 <- summary(glm(RONFLE ~ AGE+ALCOOL,data=donnees,family=binomial))
print(modele8)
## 
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL, family = binomial, data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6382  -0.9074  -0.5816   1.0921   1.9151  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.65698    1.34017  -3.475 0.000511 ***
## AGE          0.06377    0.02244   2.842 0.004484 ** 
## ALCOOL       0.19973    0.07037   2.838 0.004535 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 114.80  on 97  degrees of freedom
## AIC: 120.8
## 
## Number of Fisher Scoring iterations: 4
# Q.8c - test des 2 coefs = test de significativité globale

#khi2
khi2 <- modele8$null.deviance - modele8$deviance
print(khi2)
## [1] 14.69128
#ddl
ddl <- modele8$df.null - modele8$df.residual
print(ddl)
## [1] 2
#p-value
pvalue <- pchisq(khi2,ddl,lower.tail=FALSE)
print(pvalue)
## [1] 0.0006454015
#8.c interaction age * alcool
print(summary(glm(RONFLE ~ AGE+ALCOOL+AGE*ALCOOL,data=donnees,family=binomial)))
## 
## Call:
## glm(formula = RONFLE ~ AGE + ALCOOL + AGE * ALCOOL, family = binomial, 
##     data = donnees)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5468  -0.9761  -0.5366   1.1550   1.9857  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -5.959402   1.851390  -3.219  0.00129 **
## AGE          0.088039   0.032090   2.744  0.00608 **
## ALCOOL       0.571723   0.348796   1.639  0.10119   
## AGE:ALCOOL  -0.007457   0.006697  -1.113  0.26550   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 129.49  on 99  degrees of freedom
## Residual deviance: 113.45  on 96  degrees of freedom
## AIC: 121.45
## 
## Number of Fisher Scoring iterations: 4