#changement de dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")
#chargement
import pandas
D = pandas.read_excel("artificial_2D.xlsx")
D.head()
X1 | X2 | |
---|---|---|
0 | 4.524627 | -4.522390 |
1 | 3.711003 | 0.608184 |
2 | 6.558806 | -4.890597 |
3 | 2.528326 | 1.751365 |
4 | 0.186561 | 2.410052 |
import seaborn as sns
sns.scatterplot(D,x="X1",y="X2",color="silver")
<Axes: xlabel='X1', ylabel='X2'>
#librairie pour K-Means
from sklearn.cluster import KMeans
kmc = KMeans(n_clusters=3,random_state=0,n_init=1)
kmc.fit(D)
#groupes d'appartenance
kmc.labels_
c:\Users\ricco\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2. warnings.warn(
array([0, 2, 0, 2, 1, 1, 0, 1, 2, 2, 2, 2, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2, 0, 2, 1, 2, 1, 1, 2, 1, 1, 0, 0, 1, 2, 0, 0, 2, 0, 0, 0, 1, 0, 1, 2, 1, 0, 0, 2, 2, 1, 1, 2, 2, 1, 2, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1, 0, 0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 0, 0, 2, 2, 0, 2, 1, 2, 2, 1, 1, 0, 2, 0, 2, 0, 2, 1, 0, 1, 2, 0, 0, 1, 0, 1, 2, 0, 2, 1, 1, 0, 2, 0, 2, 0, 1, 2, 2, 0, 2, 0, 0, 0, 2, 1, 1, 2, 0, 2, 1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 2, 0, 2, 0, 0, 1, 1, 2, 1, 0, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, 2, 0, 2, 2, 1, 0, 1, 0, 2, 1, 1, 2, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 2, 2, 0, 2, 2, 1, 2, 2, 2, 0, 1, 0, 2, 1, 2, 1, 2, 0, 1, 1, 0, 2, 0, 1, 2, 1, 0, 1, 1, 2, 1, 0, 2, 2, 1, 2, 0, 2, 2, 1, 2, 1, 2, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 2, 1, 2, 0, 2, 0, 1, 0, 2, 1, 0, 2, 1, 1, 0, 0, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2])
#effectif par groupe
import numpy
print(numpy.unique(kmc.labels_,return_counts=True))
(array([0, 1, 2]), array([100, 98, 102], dtype=int64))
#rajouter le groupe d'appartenance dans le jeu de données
X = D.copy()
X["groupes"] = kmc.labels_
#illustration dans le plan
sns.scatterplot(data = X,x='X1',y='X2',hue='groupes',palette=['red','green','blue'])
<Axes: xlabel='X1', ylabel='X2'>
#récupérer les barycentres conditionnels
XCenters = pandas.DataFrame(kmc.cluster_centers_,columns=['X1','X2'])
XCenters
X1 | X2 | |
---|---|---|
0 | 4.923975 | -4.934515 |
1 | 0.184934 | 2.052779 |
2 | 3.982930 | 1.125263 |
#nuages de points avec les barycentres
sns.scatterplot(data = X,x='X1',y='X2',hue='groupes',palette=['red','green','blue'])
sns.scatterplot(data = XCenters,x='X1',y='X2',color='black',marker="X",s=100)
<Axes: xlabel='X1', ylabel='X2'>
#librairie scikit-fda (à installer une seule fois)
#!pip install scikit-fda
#vérification de version
import skfda
skfda.__version__
'0.8.1'
#lancer le fuzzy k-means
from skfda.ml.clustering import FuzzyCMeans #classe de calcul
from skfda.representation.grid import FDataGrid #format de données à présenter à la fonction
#un paramètre supplémentaire avec "fuzzifier" !
fcm = FuzzyCMeans(n_clusters = 3, random_state = 0, n_init = 1, fuzzifier = 2)
fcm.fit(FDataGrid(D))
#labels : groupes d'appartenance
fcm.labels_
array([2, 1, 2, 1, 0, 0, 2, 0, 1, 1, 1, 1, 2, 2, 1, 0, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 0, 1, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 2, 2, 0, 1, 2, 2, 1, 2, 2, 2, 0, 2, 0, 1, 0, 2, 2, 1, 1, 0, 0, 1, 1, 0, 1, 2, 0, 0, 0, 2, 0, 2, 1, 0, 0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 1, 0, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 0, 1, 1, 0, 0, 2, 1, 2, 1, 2, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 1, 0, 0, 2, 1, 2, 1, 2, 0, 1, 1, 2, 1, 2, 2, 2, 1, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 1, 2, 0, 1, 1, 2, 1, 2, 2, 0, 0, 1, 0, 2, 1, 0, 0, 2, 0, 2, 2, 0, 0, 0, 1, 2, 1, 1, 0, 2, 0, 2, 1, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 1, 2, 1, 1, 0, 1, 1, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 0, 0, 2, 1, 2, 0, 1, 0, 2, 0, 0, 1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 0, 1, 0, 1, 2, 0, 1, 0, 0, 0, 2, 0, 1, 2, 1, 0, 1, 2, 1, 2, 0, 2, 1, 0, 2, 1, 0, 0, 2, 2, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 2, 0, 2, 2, 0, 2, 1, 1, 1], dtype=int64)
#centres de classes
fcm.cluster_centers_
FDataGrid( array([[[ 0.18731988], [ 2.01415007]], [[ 4.0607 ], [ 1.19629239]], [[ 4.95843814], [-4.97711646]]]), grid_points=(array([0., 1.]),), domain_range=((0.0, 1.0),), dataset_name=None, argument_names=(None,), coordinate_names=(None,), extrapolation=None, interpolation=SplineInterpolation(interpolation_order=1, monotone=False))
#info supplémentaire - degré d'appartenance
pandas.DataFrame(fcm.membership_degree_,columns=['C0','C1','C2'])
C0 | C1 | C2 | |
---|---|---|---|
0 | 0.006305 | 0.011788 | 0.981907 |
1 | 0.031057 | 0.955298 | 0.013645 |
2 | 0.026736 | 0.054526 | 0.918739 |
3 | 0.312985 | 0.653088 | 0.033926 |
4 | 0.988601 | 0.009396 | 0.002003 |
... | ... | ... | ... |
295 | 0.837043 | 0.121239 | 0.041718 |
296 | 0.016815 | 0.027103 | 0.956083 |
297 | 0.085477 | 0.862568 | 0.051955 |
298 | 0.033699 | 0.956475 | 0.009826 |
299 | 0.087187 | 0.797421 | 0.115392 |
300 rows × 3 columns
#faire varier entre [0,1]
def rescale(values):
return((values-0.33)/0.67)
#graphiques avec les degrés d'appartenance
sns.scatterplot(D.loc[fcm.labels_==0],x="X1",y="X2",color="green",alpha=rescale(fcm.membership_degree_[fcm.labels_==0,0]))
sns.scatterplot(D.loc[fcm.labels_==1],x="X1",y="X2",color="blue",alpha=rescale(fcm.membership_degree_[fcm.labels_==1,1]))
sns.scatterplot(D.loc[fcm.labels_==2],x="X1",y="X2",color="red",alpha=rescale(fcm.membership_degree_[fcm.labels_==2,2]))
<Axes: xlabel='X1', ylabel='X2'>
#modifier fuzzifier == 1.02, proche de crisp ; 3, plus de lissage ; 5, tout est flou (presque)
fcm_bis = FuzzyCMeans(n_clusters = 3, random_state = 0, n_init = 1, fuzzifier = 3)
fcm_bis.fit(FDataGrid(D))
#graphiques avec les degrés d'appartenance
sns.scatterplot(D.loc[fcm_bis.labels_==0],x="X1",y="X2",color="green",alpha=rescale(fcm_bis.membership_degree_[fcm_bis.labels_==0,0]))
sns.scatterplot(D.loc[fcm_bis.labels_==1],x="X1",y="X2",color="blue",alpha=rescale(fcm_bis.membership_degree_[fcm_bis.labels_==1,1]))
sns.scatterplot(D.loc[fcm_bis.labels_==2],x="X1",y="X2",color="red",alpha=rescale(fcm_bis.membership_degree_[fcm_bis.labels_==2,2]))
<Axes: xlabel='X1', ylabel='X2'>