Version de R¶

In [1]:
R.version
               _                           
platform       x86_64-pc-linux-gnu         
arch           x86_64                      
os             linux-gnu                   
system         x86_64, linux-gnu           
status                                     
major          4                           
minor          4.3                         
year           2025                        
month          02                          
day            28                          
svn rev        87843                       
language       R                           
version.string R version 4.4.3 (2025-02-28)
nickname       Trophy Case                 

Importation et inspection des données¶

In [2]:
#importation des données
library(readxl)
D <- readxl::read_excel("/content/habitudes_de_vie.xlsx")
str(D)
tibble [360 × 6] (S3: tbl_df/tbl/data.frame)
 $ TYPELAIT     : chr [1:360] "2%MILK" "SKIM" "NOMILK" "NOMILK" ...
 $ SELALIMENT   : chr [1:360] "MODERATE" "MODERATE" "NONE" "NONE" ...
 $ SELCONSO     : chr [1:360] "MODERATE" "LOW" "???" "LOW" ...
 $ ACTIVITESPORT: chr [1:360] "DAILY" "DAILY" "DAILY" "DAILY" ...
 $ FUMER        : chr [1:360] "REGULAR" "NEVER" "FORMER" "OCCASIONAL" ...
 $ HAB_BOISSON  : chr [1:360] "OCCASIONAL" "NEVER" "NEVER" "REGULAR" ...
In [3]:
#transformer en dataframe de factor
#sinon warning de "arules" (booléen ou factor acceptés)
D <- as.data.frame(lapply(D,factor))
str(D)
'data.frame':	360 obs. of  6 variables:
 $ TYPELAIT     : Factor w/ 5 levels "2%MILK","NOMILK",..: 1 4 2 2 1 1 5 2 1 1 ...
 $ SELALIMENT   : Factor w/ 5 levels "???","ALOT","MODERATE",..: 3 3 4 4 4 4 2 5 4 4 ...
 $ SELCONSO     : Factor w/ 6 levels "???","HIGH","LOW",..: 4 3 1 3 3 6 4 2 3 6 ...
 $ ACTIVITESPORT: Factor w/ 6 levels "???","DAILY",..: 2 2 2 2 3 2 3 3 2 2 ...
 $ FUMER        : Factor w/ 5 levels "????","FORMER",..: 5 3 2 4 5 3 5 5 3 5 ...
 $ HAB_BOISSON  : Factor w/ 4 levels "FORMER","NEVER",..: 3 2 2 4 3 3 3 4 2 4 ...
In [4]:
#premières lignes
head(D)
A data.frame: 6 × 6
TYPELAITSELALIMENTSELCONSOACTIVITESPORTFUMERHAB_BOISSON
<fct><fct><fct><fct><fct><fct>
12%MILKMODERATEMODERATEDAILYREGULAR OCCASIONAL
2SKIM MODERATELOW DAILYNEVER NEVER
3NOMILKNONE ??? DAILYFORMER NEVER
4NOMILKNONE LOW DAILYOCCASIONALREGULAR
52%MILKNONE LOW NEVERREGULAR OCCASIONAL
62%MILKNONE VERYLOW DAILYNEVER OCCASIONAL

Extraction des itemsets fréquents¶

In [6]:
#installer le package à la volée
#install.packages("arules")
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

In [7]:
#chargement et version du package "arules"
library(arules)
packageVersion("arules")
Loading required package: Matrix


Attaching package: ‘arules’


The following objects are masked from ‘package:base’:

    abbreviate, write


[1] ‘1.7.9’
In [8]:
#paramétrage et extraction des itemsets (cf. l'option target)
#singletons ou paires d'items fréquents (support >= 0.3)
params <- list(supp=0.3, minlen=1, maxlen=2, target="frequent itemsets")
freq <- apriori(D, parameter=params)
Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
         NA    0.1    1 none FALSE            TRUE       5     0.3      1
 maxlen            target  ext
      2 frequent itemsets TRUE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 108 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[31 item(s), 360 transaction(s)] done [0.00s].
sorting and recoding items ... [9 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2
Warning message in apriori(D, parameter = params):
“Mining stopped (maxlen reached). Only patterns up to a length of 2 returned!”
 done [0.01s].
sorting transactions ... done [0.00s].
writing ... [10 set(s)] done [0.00s].
creating S4 object  ... done [0.00s].
In [9]:
#affichage des itemsets
inspect(freq)
     items                                  support   count
[1]  {FUMER=REGULAR}                        0.3166667 114  
[2]  {SELALIMENT=NONE}                      0.3166667 114  
[3]  {SELCONSO=LOW}                         0.3416667 123  
[4]  {ACTIVITESPORT=DAILY}                  0.3500000 126  
[5]  {SELALIMENT=VERYLITTLE}                0.3666667 132  
[6]  {FUMER=FORMER}                         0.3888889 140  
[7]  {ACTIVITESPORT=NEVER}                  0.4777778 172  
[8]  {HAB_BOISSON=REGULAR}                  0.5916667 213  
[9]  {TYPELAIT=2%MILK}                      0.6416667 231  
[10] {TYPELAIT=2%MILK, HAB_BOISSON=REGULAR} 0.4027778 145  
In [10]:
#tri selon support décroissant
inspect(sort(freq,by="support"))
     items                                  support   count
[1]  {TYPELAIT=2%MILK}                      0.6416667 231  
[2]  {HAB_BOISSON=REGULAR}                  0.5916667 213  
[3]  {ACTIVITESPORT=NEVER}                  0.4777778 172  
[4]  {TYPELAIT=2%MILK, HAB_BOISSON=REGULAR} 0.4027778 145  
[5]  {FUMER=FORMER}                         0.3888889 140  
[6]  {SELALIMENT=VERYLITTLE}                0.3666667 132  
[7]  {ACTIVITESPORT=DAILY}                  0.3500000 126  
[8]  {SELCONSO=LOW}                         0.3416667 123  
[9]  {FUMER=REGULAR}                        0.3166667 114  
[10] {SELALIMENT=NONE}                      0.3166667 114  
In [11]:
#récupération dans un data frame
#toutes post-opérations compatibles avec des dataframe
#possibles dès lors
res <- DATAFRAME(freq)
res
A data.frame: 10 × 3
itemssupportcount
<fct><dbl><int>
1{FUMER=REGULAR} 0.3166667114
2{SELALIMENT=NONE} 0.3166667114
3{SELCONSO=LOW} 0.3416667123
4{ACTIVITESPORT=DAILY} 0.3500000126
5{SELALIMENT=VERYLITTLE} 0.3666667132
6{FUMER=FORMER} 0.3888889140
7{ACTIVITESPORT=NEVER} 0.4777778172
8{HAB_BOISSON=REGULAR} 0.5916667213
9{TYPELAIT=2%MILK} 0.6416667231
10{TYPELAIT=2%MILK,HAB_BOISSON=REGULAR}0.4027778145
In [12]:
#vérification 1 - support
prop.table(table(D$FUMER))
       ????      FORMER       NEVER  OCCASIONAL     REGULAR 
0.002777778 0.388888889 0.261111111 0.030555556 0.316666667 
In [13]:
#vérification 2 - support
prop.table(table(D$TYPELAIT,D$HAB_BOISSON))
           
                 FORMER       NEVER  OCCASIONAL     REGULAR
  2%MILK    0.016666667 0.161111111 0.061111111 0.402777778
  NOMILK    0.005555556 0.011111111 0.005555556 0.041666667
  POWDER    0.000000000 0.002777778 0.000000000 0.002777778
  SKIM      0.000000000 0.019444444 0.005555556 0.027777778
  WHOLEMILK 0.008333333 0.063888889 0.047222222 0.116666667
In [14]:
#filtrage des itemsets
#rechercher qui concernent HAB_BOISSON
res[grep("HAB_BOISSON",res$items),]
A data.frame: 2 × 3
itemssupportcount
<fct><dbl><int>
8{HAB_BOISSON=REGULAR} 0.5916667213
10{TYPELAIT=2%MILK,HAB_BOISSON=REGULAR}0.4027778145

Extraction des règles¶

In [15]:
#paramétrage et extraction
#cf. notamment l'option target
params.regles <- list(supp=0.1, conf=0.7, minlen=2, maxlen=4, target="rules")
regles <- apriori(D,parameter=params.regles)
Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen
        0.7    0.1    1 none FALSE            TRUE       5     0.1      2
 maxlen target  ext
      4  rules TRUE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 36 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[31 item(s), 360 transaction(s)] done [0.00s].
sorting and recoding items ... [17 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4
Warning message in apriori(D, parameter = params.regles):
“Mining stopped (maxlen reached). Only patterns up to a length of 4 returned!”
 done [0.01s].
writing ... [11 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].
In [16]:
#affichage
inspect(regles)
     lhs                         rhs                     support confidence  coverage     lift count
[1]  {SELALIMENT=MODERATE,                                                                          
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1250000  0.7142857 0.1750000 1.113173    45
[2]  {TYPELAIT=2%MILK,                                                                              
      SELALIMENT=MODERATE}    => {HAB_BOISSON=REGULAR} 0.1250000  0.7258065 0.1722222 1.226715    45
[3]  {SELCONSO=MODERATE,                                                                            
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1305556  0.7014925 0.1861111 1.093235    47
[4]  {SELALIMENT=NONE,                                                                              
      SELCONSO=VERYLOW}       => {TYPELAIT=2%MILK}     0.1361111  0.7777778 0.1750000 1.212121    49
[5]  {SELCONSO=VERYLOW,                                                                             
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1027778  0.7400000 0.1388889 1.153247    37
[6]  {SELALIMENT=NONE,                                                                              
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1250000  0.7142857 0.1750000 1.113173    45
[7]  {ACTIVITESPORT=DAILY,                                                                          
      FUMER=FORMER}           => {HAB_BOISSON=REGULAR} 0.1194444  0.8269231 0.1444444 1.397616    43
[8]  {ACTIVITESPORT=DAILY,                                                                          
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1694444  0.7261905 0.2333333 1.131725    61
[9]  {TYPELAIT=2%MILK,                                                                              
      ACTIVITESPORT=DAILY}    => {HAB_BOISSON=REGULAR} 0.1694444  0.7349398 0.2305556 1.242152    61
[10] {SELALIMENT=VERYLITTLE,                                                                        
      FUMER=FORMER}           => {HAB_BOISSON=REGULAR} 0.1083333  0.7090909 0.1527778 1.198464    39
[11] {SELALIMENT=VERYLITTLE,                                                                        
      ACTIVITESPORT=NEVER}    => {TYPELAIT=2%MILK}     0.1166667  0.7000000 0.1666667 1.090909    42
In [17]:
#trier selon le lift décroissant
inspect(sort(regles,by="lift"))
     lhs                         rhs                     support confidence  coverage     lift count
[1]  {ACTIVITESPORT=DAILY,                                                                          
      FUMER=FORMER}           => {HAB_BOISSON=REGULAR} 0.1194444  0.8269231 0.1444444 1.397616    43
[2]  {TYPELAIT=2%MILK,                                                                              
      ACTIVITESPORT=DAILY}    => {HAB_BOISSON=REGULAR} 0.1694444  0.7349398 0.2305556 1.242152    61
[3]  {TYPELAIT=2%MILK,                                                                              
      SELALIMENT=MODERATE}    => {HAB_BOISSON=REGULAR} 0.1250000  0.7258065 0.1722222 1.226715    45
[4]  {SELALIMENT=NONE,                                                                              
      SELCONSO=VERYLOW}       => {TYPELAIT=2%MILK}     0.1361111  0.7777778 0.1750000 1.212121    49
[5]  {SELALIMENT=VERYLITTLE,                                                                        
      FUMER=FORMER}           => {HAB_BOISSON=REGULAR} 0.1083333  0.7090909 0.1527778 1.198464    39
[6]  {SELCONSO=VERYLOW,                                                                             
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1027778  0.7400000 0.1388889 1.153247    37
[7]  {ACTIVITESPORT=DAILY,                                                                          
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1694444  0.7261905 0.2333333 1.131725    61
[8]  {SELALIMENT=MODERATE,                                                                          
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1250000  0.7142857 0.1750000 1.113173    45
[9]  {SELALIMENT=NONE,                                                                              
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1250000  0.7142857 0.1750000 1.113173    45
[10] {SELCONSO=MODERATE,                                                                            
      HAB_BOISSON=REGULAR}    => {TYPELAIT=2%MILK}     0.1305556  0.7014925 0.1861111 1.093235    47
[11] {SELALIMENT=VERYLITTLE,                                                                        
      ACTIVITESPORT=NEVER}    => {TYPELAIT=2%MILK}     0.1166667  0.7000000 0.1666667 1.090909    42
In [18]:
#récupération dans un data.frame
dfRegles <- DATAFRAME(regles)
dfRegles
A data.frame: 11 × 7
LHSRHSsupportconfidencecoverageliftcount
<fct><fct><dbl><dbl><dbl><dbl><int>
1{SELALIMENT=MODERATE,HAB_BOISSON=REGULAR} {TYPELAIT=2%MILK} 0.12500000.71428570.17500001.11317345
2{TYPELAIT=2%MILK,SELALIMENT=MODERATE} {HAB_BOISSON=REGULAR}0.12500000.72580650.17222221.22671545
3{SELCONSO=MODERATE,HAB_BOISSON=REGULAR} {TYPELAIT=2%MILK} 0.13055560.70149250.18611111.09323547
4{SELALIMENT=NONE,SELCONSO=VERYLOW} {TYPELAIT=2%MILK} 0.13611110.77777780.17500001.21212149
5{SELCONSO=VERYLOW,HAB_BOISSON=REGULAR} {TYPELAIT=2%MILK} 0.10277780.74000000.13888891.15324737
6{SELALIMENT=NONE,HAB_BOISSON=REGULAR} {TYPELAIT=2%MILK} 0.12500000.71428570.17500001.11317345
7{ACTIVITESPORT=DAILY,FUMER=FORMER} {HAB_BOISSON=REGULAR}0.11944440.82692310.14444441.39761643
8{ACTIVITESPORT=DAILY,HAB_BOISSON=REGULAR} {TYPELAIT=2%MILK} 0.16944440.72619050.23333331.13172561
9{TYPELAIT=2%MILK,ACTIVITESPORT=DAILY} {HAB_BOISSON=REGULAR}0.16944440.73493980.23055561.24215261
10{SELALIMENT=VERYLITTLE,FUMER=FORMER} {HAB_BOISSON=REGULAR}0.10833330.70909090.15277781.19846439
11{SELALIMENT=VERYLITTLE,ACTIVITESPORT=NEVER}{TYPELAIT=2%MILK} 0.11666670.70000000.16666671.09090942
In [19]:
#avec possibilité de filtrage toujours
#ex. règles avec HAB_BOISSON dans le conséquent
dfRegles[grep("HAB_BOISSON",dfRegles$RHS),]
A data.frame: 4 × 7
LHSRHSsupportconfidencecoverageliftcount
<fct><fct><dbl><dbl><dbl><dbl><int>
2{TYPELAIT=2%MILK,SELALIMENT=MODERATE}{HAB_BOISSON=REGULAR}0.12500000.72580650.17222221.22671545
7{ACTIVITESPORT=DAILY,FUMER=FORMER} {HAB_BOISSON=REGULAR}0.11944440.82692310.14444441.39761643
9{TYPELAIT=2%MILK,ACTIVITESPORT=DAILY}{HAB_BOISSON=REGULAR}0.16944440.73493980.23055561.24215261
10{SELALIMENT=VERYLITTLE,FUMER=FORMER} {HAB_BOISSON=REGULAR}0.10833330.70909090.15277781.19846439