In [ ]:
#changement de dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")

#chargement des données
import pandas
vote_dep = pandas.read_excel("vote_clustering_deployment.xlsx")
vote_dep.head()
Out[ ]:
budget physician salvador nicaraguan missile education
0 y n y n n n
1 y n n y y n
2 y n n y y n
3 y n neither neither n n
4 n y y n n y
In [ ]:
#info
vote_dep.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   budget      100 non-null    object
 1   physician   100 non-null    object
 2   salvador    100 non-null    object
 3   nicaraguan  100 non-null    object
 4   missile     100 non-null    object
 5   education   100 non-null    object
dtypes: object(6)
memory usage: 4.8+ KB
In [ ]:
#chargement du modèle
import pickle

#ouvrir le fichier en lecture binaire
f = open("workflow.sav","rb")

#chargement
modele = pickle.load(f)

#fermeture
f.close()
In [ ]:
#inspection rapide du modèle
print(modele.named_steps)
{'acm': MCA(n_components=2,
    var_labels=Index(['budget', 'physician', 'salvador', 'nicaraguan', 'missile',
       'education'],
      dtype='object')), 'km': KMeans(n_clusters=3, random_state=0)}
In [ ]:
#affectation des individus aux groupes
pred_clus = modele.predict(vote_dep.values)

#check
print(pred_clus)
[0 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 1 1 0 1 1 0 0 1
 1 1 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 1 0 1 0
 0 0 1 1 1 0 1 1 1 1 1 0 1 1 0 2 1 1 0 0 0 0 0 0 1 1]
In [ ]:
#comptage
pandas.Series(pred_clus).value_counts()
Out[ ]:
1    50
0    49
2     1
dtype: int64
In [ ]:
#exportation
dExport = pandas.DataFrame(pred_clus,columns=['cluster'])
dExport.to_excel('output.xlsx',index=False)