#changement de dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")
#chargement des données
import pandas
vote_dep = pandas.read_excel("vote_clustering_deployment.xlsx")
vote_dep.head()
budget | physician | salvador | nicaraguan | missile | education | |
---|---|---|---|---|---|---|
0 | y | n | y | n | n | n |
1 | y | n | n | y | y | n |
2 | y | n | n | y | y | n |
3 | y | n | neither | neither | n | n |
4 | n | y | y | n | n | y |
#info
vote_dep.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 budget 100 non-null object 1 physician 100 non-null object 2 salvador 100 non-null object 3 nicaraguan 100 non-null object 4 missile 100 non-null object 5 education 100 non-null object dtypes: object(6) memory usage: 4.8+ KB
#chargement du modèle
import pickle
#ouvrir le fichier en lecture binaire
f = open("workflow.sav","rb")
#chargement
modele = pickle.load(f)
#fermeture
f.close()
#inspection rapide du modèle
print(modele.named_steps)
{'acm': MCA(n_components=2, var_labels=Index(['budget', 'physician', 'salvador', 'nicaraguan', 'missile', 'education'], dtype='object')), 'km': KMeans(n_clusters=3, random_state=0)}
#affectation des individus aux groupes
pred_clus = modele.predict(vote_dep.values)
#check
print(pred_clus)
[0 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 1 1 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 0 1 1 1 0 1 1 1 1 1 0 1 1 0 2 1 1 0 0 0 0 0 0 1 1]
#comptage
pandas.Series(pred_clus).value_counts()
1 50 0 49 2 1 dtype: int64
#exportation
dExport = pandas.DataFrame(pred_clus,columns=['cluster'])
dExport.to_excel('output.xlsx',index=False)