#changement de dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")
#importation de la matrice sous forme de data frame
#colonne 0 et ligne 0 = etiquettes
import pandas
dfAdj = pandas.read_excel("matrice_adjacence_iut.xlsx",header=0,index_col=0)
dfAdj.head()
BEN | BES | BOU | BRU | CAM | CHU | DUC | LAN | LEX | MAR | ... | CAL | DIF | FIR | FRE | FUM | HAD | HEL | MZA | VER | VID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
IUT | |||||||||||||||||||||
BEN | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
BES | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
BOU | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
BRU | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | ... | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
CAM | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 25 columns
#transformer en matrice numpy
MAdj = dfAdj.values
MAdj[:5,:5]
array([[0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [1, 0, 0, 0, 0], [0, 1, 0, 0, 1], [0, 1, 0, 1, 0]], dtype=int64)
#dimension
MAdj.shape
(25, 25)
#nombre de connexion
import numpy
print(numpy.sum(MAdj)/2)
85.0
#verification de version de igraph
import igraph
igraph.__version__
'0.10.4'
#création de la structure graphe
g = igraph.Graph.Adjacency(MAdj,mode=igraph.ADJ_UNDIRECTED)
print(g)
IGRAPH U--- 25 85 -- + edges: 0 -- 2 8 12 22 14 -- 2 3 9 12 15 18 19 20 22 24 1 -- 3 4 5 6 11 15 -- 12 14 19 20 24 2 -- 0 9 10 14 19 22 16 -- 3 4 10 13 17 18 21 23 3 -- 1 4 5 7 8 11 14 16 17 18 21 23 17 -- 3 4 6 7 12 13 16 18 21 23 4 -- 1 3 5 11 16 17 18 23 18 -- 3 4 7 13 14 16 17 21 23 5 -- 1 3 4 6 11 20 19 -- 2 9 12 14 15 20 22 24 6 -- 1 5 11 17 20 -- 5 11 12 14 15 19 22 24 7 -- 3 8 17 18 23 21 -- 3 11 13 16 17 18 23 8 -- 0 3 7 22 22 -- 0 2 8 9 10 14 19 20 24 9 -- 2 10 14 19 22 23 -- 3 4 7 13 16 17 18 21 10 -- 2 9 16 22 24 -- 12 14 15 19 20 22 11 -- 1 3 4 5 6 20 21 12 -- 0 14 15 17 19 20 24 13 -- 16 17 18 21 23
#attribuer des étiquettes aux sommets dans le grahique
g.vs['label'] = dfAdj.columns
print(g.vs['label'])
['BEN', 'BES', 'BOU', 'BRU', 'CAM', 'CHU', 'DUC', 'LAN', 'LEX', 'MAR', 'ROG', 'ROS', 'TOS', 'BAR', 'BEL', 'CAL', 'DIF', 'FIR', 'FRE', 'FUM', 'HAD', 'HEL', 'MZA', 'VER', 'VID']
#affichage du graphe social
#attention, nécessite l'installation du package pycairo
igraph.plot(g,vertex_label_size=10,vertex_size=30,vertex_color='antiquewhite',bbox=(0,0,400,400))
#transformer en matrice de distance cosinus
from sklearn.metrics.pairwise import cosine_distances
D = cosine_distances(MAdj)
#premières valeurs
print(D[:5,:5])
[[0. 1. 0.79587585 0.85566243 1. ] [1. 0. 1. 0.61270167 0.52565835] [0.79587585 1. 0. 0.88214887 1. ] [0.85566243 0.61270167 0.88214887 0. 0.28556549] [1. 0.52565835 1. 0.28556549 0. ]]
#vérifier s'il s'agit bien du matrice de distance
import scipy
print(scipy.spatial.distance.is_valid_dm(D))
True
#vectoriser pour la CAH
from scipy.spatial.distance import squareform
VD = squareform(D)
print(VD[:5])
[1. 0.79587585 0.85566243 1. 1. ]
#CAH, ward
from scipy.cluster.hierarchy import ward
cah = ward(VD)
#dendrogramme
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram
plt.title("CAH")
dendrogram(cah,labels=dfAdj.columns,orientation='left',color_threshold=0)
plt.show()
#matérialisation de 2 classes
plt.title("CAH - 2 classes")
dendrogram(cah,labels=dfAdj.columns,orientation='left',color_threshold=2.0)
plt.show()
#découpage en 2 classes
from scipy.cluster.hierarchy import fcluster
groupes = fcluster(cah,t=2.0,criterion='distance')
print(groupes)
[1 2 1 2 2 2 2 2 1 1 1 2 1 2 1 1 2 2 2 1 1 2 1 2 1]
#comptage
print(numpy.unique(groupes,return_counts=True))
(array([1, 2], dtype=int32), array([12, 13], dtype=int64))
#couleurs des groupes
couleurs = ['#df9012','#60bca0']
#reconfigurer de graphe avec les groupes d'appartenance
for grp in numpy.unique(groupes):
g.vs[numpy.where(groupes==grp)[0]]['color'] = couleurs[grp-1]
#afficher
igraph.plot(g,vertex_label_size=10,vertex_size=30,bbox=(0,0,400,400))
Matrice des distances avec les labels.
#transformer la matrice de distances en data frame
#en spécifiant les indentifiants de ligne et colonne
df_D = pandas.DataFrame(D,index=dfAdj.index,columns=dfAdj.columns)
df_D
BEN | BES | BOU | BRU | CAM | CHU | DUC | LAN | LEX | MAR | ... | CAL | DIF | FIR | FRE | FUM | HAD | HEL | MZA | VER | VID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
IUT | |||||||||||||||||||||
BEN | 0.000000 | 1.000000 | 0.795876 | 0.855662 | 1.000000 | 1.000000 | 1.000000 | 0.776393 | 0.750000 | 0.552786 | ... | 0.776393 | 1.000000 | 0.841886 | 1.000000 | 0.469670 | 0.646447 | 1.000000 | 0.666667 | 1.000000 | 0.591752 |
BES | 1.000000 | 0.000000 | 1.000000 | 0.612702 | 0.525658 | 0.269703 | 0.552786 | 0.800000 | 0.776393 | 1.000000 | ... | 1.000000 | 0.683772 | 0.575736 | 0.701858 | 1.000000 | 0.683772 | 0.661938 | 1.000000 | 0.683772 | 1.000000 |
BOU | 0.795876 | 1.000000 | 0.000000 | 0.882149 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.591752 | 0.269703 | ... | 0.634852 | 0.855662 | 1.000000 | 0.863917 | 0.566987 | 0.566987 | 1.000000 | 0.319586 | 1.000000 | 0.500000 |
BRU | 0.855662 | 0.612702 | 0.882149 | 0.000000 | 0.285565 | 0.646447 | 0.422650 | 0.483602 | 0.855662 | 0.870901 | ... | 0.870901 | 0.489690 | 0.452277 | 0.326425 | 0.897938 | 0.693814 | 0.454455 | 0.807550 | 0.387628 | 0.882149 |
CAM | 1.000000 | 0.525658 | 1.000000 | 0.285565 | 0.000000 | 0.566987 | 0.292893 | 0.367544 | 0.823223 | 1.000000 | ... | 1.000000 | 0.500000 | 0.552786 | 0.528595 | 1.000000 | 0.750000 | 0.198216 | 1.000000 | 0.500000 | 1.000000 |
CHU | 1.000000 | 0.269703 | 1.000000 | 0.646447 | 0.566987 | 0.000000 | 0.591752 | 0.817426 | 0.795876 | 1.000000 | ... | 0.817426 | 0.711325 | 0.612702 | 0.727834 | 0.855662 | 0.855662 | 0.691393 | 0.863917 | 0.711325 | 0.833333 |
DUC | 1.000000 | 0.552786 | 1.000000 | 0.422650 | 0.292893 | 0.591752 | 0.000000 | 0.776393 | 1.000000 | 1.000000 | ... | 1.000000 | 0.823223 | 1.000000 | 0.833333 | 1.000000 | 0.646447 | 0.622036 | 1.000000 | 0.823223 | 1.000000 |
LAN | 0.776393 | 0.800000 | 1.000000 | 0.483602 | 0.367544 | 0.817426 | 0.776393 | 0.000000 | 0.776393 | 1.000000 | ... | 1.000000 | 0.367544 | 0.575736 | 0.552786 | 1.000000 | 1.000000 | 0.323877 | 0.850929 | 0.525658 | 1.000000 |
LEX | 0.750000 | 0.776393 | 0.591752 | 0.855662 | 0.823223 | 0.795876 | 1.000000 | 0.776393 | 0.000000 | 0.776393 | ... | 1.000000 | 0.823223 | 0.683772 | 0.666667 | 0.823223 | 0.823223 | 0.811018 | 0.833333 | 0.646447 | 0.795876 |
MAR | 0.552786 | 1.000000 | 0.269703 | 0.870901 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.776393 | 0.000000 | ... | 0.600000 | 0.841886 | 1.000000 | 0.850929 | 0.525658 | 0.525658 | 1.000000 | 0.403715 | 1.000000 | 0.452277 |
ROG | 0.500000 | 1.000000 | 0.591752 | 0.855662 | 0.823223 | 1.000000 | 1.000000 | 1.000000 | 0.750000 | 0.552786 | ... | 1.000000 | 1.000000 | 0.841886 | 0.833333 | 0.469670 | 0.823223 | 0.811018 | 0.666667 | 0.823223 | 0.795876 |
ROS | 1.000000 | 0.323877 | 1.000000 | 0.563564 | 0.599108 | 0.228483 | 0.622036 | 0.830969 | 0.811018 | 1.000000 | ... | 0.830969 | 0.599108 | 0.521909 | 0.622036 | 0.866369 | 0.866369 | 0.857143 | 0.874012 | 0.599108 | 0.845697 |
TOS | 1.000000 | 1.000000 | 0.537090 | 0.781782 | 0.866369 | 0.845697 | 0.811018 | 0.830969 | 0.811018 | 0.661938 | ... | 0.323877 | 0.866369 | 1.000000 | 0.748024 | 0.465478 | 0.465478 | 0.857143 | 0.370059 | 0.866369 | 0.382787 |
BAR | 1.000000 | 1.000000 | 1.000000 | 0.354503 | 0.367544 | 1.000000 | 0.776393 | 0.400000 | 1.000000 | 1.000000 | ... | 1.000000 | 0.367544 | 0.434315 | 0.403715 | 1.000000 | 1.000000 | 0.323877 | 1.000000 | 0.367544 | 1.000000 |
BEL | 0.525658 | 0.858579 | 0.612702 | 0.908713 | 0.776393 | 0.741801 | 1.000000 | 0.717157 | 0.683772 | 0.575736 | ... | 0.434315 | 0.776393 | 0.700000 | 0.894591 | 0.217376 | 0.440983 | 0.760954 | 0.472954 | 0.776393 | 0.354503 |
CAL | 0.776393 | 1.000000 | 0.634852 | 0.870901 | 1.000000 | 0.817426 | 1.000000 | 1.000000 | 1.000000 | 0.600000 | ... | 0.000000 | 1.000000 | 0.858579 | 0.850929 | 0.367544 | 0.367544 | 1.000000 | 0.403715 | 1.000000 | 0.269703 |
DIF | 1.000000 | 0.683772 | 0.855662 | 0.489690 | 0.500000 | 0.711325 | 0.823223 | 0.367544 | 0.823223 | 0.841886 | ... | 1.000000 | 0.000000 | 0.329180 | 0.292893 | 1.000000 | 1.000000 | 0.331847 | 0.882149 | 0.250000 | 1.000000 |
FIR | 0.841886 | 0.575736 | 1.000000 | 0.452277 | 0.552786 | 0.612702 | 1.000000 | 0.575736 | 0.683772 | 1.000000 | ... | 0.858579 | 0.329180 | 0.000000 | 0.262135 | 0.888197 | 0.888197 | 0.402386 | 1.000000 | 0.217376 | 0.870901 |
FRE | 1.000000 | 0.701858 | 0.863917 | 0.326425 | 0.528595 | 0.727834 | 0.833333 | 0.552786 | 0.666667 | 0.850929 | ... | 0.850929 | 0.292893 | 0.262135 | 0.000000 | 0.882149 | 0.882149 | 0.370059 | 0.888889 | 0.175042 | 0.863917 |
FUM | 0.469670 | 1.000000 | 0.566987 | 0.897938 | 1.000000 | 0.855662 | 1.000000 | 1.000000 | 0.823223 | 0.525658 | ... | 0.367544 | 1.000000 | 0.888197 | 0.882149 | 0.000000 | 0.375000 | 1.000000 | 0.410744 | 1.000000 | 0.278312 |
HAD | 0.646447 | 0.683772 | 0.566987 | 0.693814 | 0.750000 | 0.855662 | 0.646447 | 1.000000 | 0.823223 | 0.525658 | ... | 0.367544 | 1.000000 | 0.888197 | 0.882149 | 0.375000 | 0.000000 | 0.866369 | 0.646447 | 1.000000 | 0.278312 |
HEL | 1.000000 | 0.661938 | 1.000000 | 0.454455 | 0.198216 | 0.691393 | 0.622036 | 0.323877 | 0.811018 | 1.000000 | ... | 1.000000 | 0.331847 | 0.402386 | 0.370059 | 1.000000 | 0.866369 | 0.000000 | 1.000000 | 0.331847 | 1.000000 |
MZA | 0.666667 | 1.000000 | 0.319586 | 0.807550 | 1.000000 | 0.863917 | 1.000000 | 0.850929 | 0.833333 | 0.403715 | ... | 0.403715 | 0.882149 | 1.000000 | 0.888889 | 0.410744 | 0.646447 | 1.000000 | 0.000000 | 1.000000 | 0.591752 |
VER | 1.000000 | 0.683772 | 1.000000 | 0.387628 | 0.500000 | 0.711325 | 0.823223 | 0.525658 | 0.646447 | 1.000000 | ... | 1.000000 | 0.250000 | 0.217376 | 0.175042 | 1.000000 | 1.000000 | 0.331847 | 1.000000 | 0.000000 | 1.000000 |
VID | 0.591752 | 1.000000 | 0.500000 | 0.882149 | 1.000000 | 0.833333 | 1.000000 | 1.000000 | 0.795876 | 0.452277 | ... | 0.269703 | 1.000000 | 0.870901 | 0.863917 | 0.278312 | 0.278312 | 1.000000 | 0.591752 | 1.000000 | 0.000000 |
25 rows × 25 columns
#liste des individus du 1er groupe
id_1 = numpy.where(groupes==1)[0]
id_1
array([ 0, 2, 8, 9, 10, 12, 14, 15, 19, 20, 22, 24], dtype=int64)
#construction du sous-data frame corresp.
df_1 = df_D.iloc[id_1,id_1]
df_1
BEN | BOU | LEX | MAR | ROG | TOS | BEL | CAL | FUM | HAD | MZA | VID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
IUT | ||||||||||||
BEN | 0.000000 | 0.795876 | 0.750000 | 0.552786 | 0.500000 | 1.000000 | 0.525658 | 0.776393 | 0.469670 | 0.646447 | 0.666667 | 0.591752 |
BOU | 0.795876 | 0.000000 | 0.591752 | 0.269703 | 0.591752 | 0.537090 | 0.612702 | 0.634852 | 0.566987 | 0.566987 | 0.319586 | 0.500000 |
LEX | 0.750000 | 0.591752 | 0.000000 | 0.776393 | 0.750000 | 0.811018 | 0.683772 | 1.000000 | 0.823223 | 0.823223 | 0.833333 | 0.795876 |
MAR | 0.552786 | 0.269703 | 0.776393 | 0.000000 | 0.552786 | 0.661938 | 0.575736 | 0.600000 | 0.525658 | 0.525658 | 0.403715 | 0.452277 |
ROG | 0.500000 | 0.591752 | 0.750000 | 0.552786 | 0.000000 | 1.000000 | 0.525658 | 1.000000 | 0.469670 | 0.823223 | 0.666667 | 0.795876 |
TOS | 1.000000 | 0.537090 | 0.811018 | 0.661938 | 1.000000 | 0.000000 | 0.521909 | 0.323877 | 0.465478 | 0.465478 | 0.370059 | 0.382787 |
BEL | 0.525658 | 0.612702 | 0.683772 | 0.575736 | 0.525658 | 0.521909 | 0.000000 | 0.434315 | 0.217376 | 0.440983 | 0.472954 | 0.354503 |
CAL | 0.776393 | 0.634852 | 1.000000 | 0.600000 | 1.000000 | 0.323877 | 0.434315 | 0.000000 | 0.367544 | 0.367544 | 0.403715 | 0.269703 |
FUM | 0.469670 | 0.566987 | 0.823223 | 0.525658 | 0.469670 | 0.465478 | 0.217376 | 0.367544 | 0.000000 | 0.375000 | 0.410744 | 0.278312 |
HAD | 0.646447 | 0.566987 | 0.823223 | 0.525658 | 0.823223 | 0.465478 | 0.440983 | 0.367544 | 0.375000 | 0.000000 | 0.646447 | 0.278312 |
MZA | 0.666667 | 0.319586 | 0.833333 | 0.403715 | 0.666667 | 0.370059 | 0.472954 | 0.403715 | 0.410744 | 0.646447 | 0.000000 | 0.591752 |
VID | 0.591752 | 0.500000 | 0.795876 | 0.452277 | 0.795876 | 0.382787 | 0.354503 | 0.269703 | 0.278312 | 0.278312 | 0.591752 | 0.000000 |
#calcul de la somme en ligne (on aurait en colonne)
sum_1 = numpy.sum(df_1,axis=1)
sum_1
IUT BEN 7.275249 BOU 5.987287 LEX 8.638591 MAR 5.896653 ROG 7.675632 TOS 6.539632 BEL 5.365565 CAL 6.177943 FUM 4.969663 HAD 5.959303 MZA 5.785639 VID 5.291150 dtype: float64
#numéro du min
print(sum_1.argmin())
#identifiant de l'élément corresp.
print(df_1.index[sum_1.argmin()])
8 FUM
#médoïdes des groupes
medoides = []
#pour chaque groupe
for grp in numpy.unique(groupes):
#récupérer les numéros d'individus corresp.
id_grp = numpy.where(groupes==grp)[0]
#construire le sous data frame corresp.
df_grp = df_D.iloc[id_grp,id_grp]
#calculer la somme
sum_grp = df_grp.sum(axis=1)
#affichage de contrôle
print('\nGroupe {}'.format(grp))
print(sum_grp)
#récupérer le numéro du min
num_min = sum_grp.argmin()
#récupérer le label corresp.
medoides.append(df_grp.index[num_min])
#afficher les medoides
print('\nMedoïdes : ', medoides)
Groupe 1 IUT BEN 7.275249 BOU 5.987287 LEX 8.638591 MAR 5.896653 ROG 7.675632 TOS 6.539632 BEL 5.365565 CAL 6.177943 FUM 4.969663 HAD 5.959303 MZA 5.785639 VID 5.291150 dtype: float64 Groupe 2 IUT BES 7.391803 BRU 5.479507 CAM 5.284900 CHU 7.575377 DUC 8.136718 LAN 6.821537 ROS 7.198309 BAR 6.626405 DIF 5.746127 FIR 5.936537 FRE 5.796712 HEL 5.569073 VER 5.572524 dtype: float64 Medoïdes : ['FUM', 'CAM']
#mettre les couleurs pour les deux qui nous concernent
g.vs[dfAdj.index.get_loc(medoides[0])]['color'] = '#fd3700'
g.vs[dfAdj.index.get_loc(medoides[1])]['color'] = '#00b400'
#réaffichage du graphe
igraph.plot(g,vertex_label_size=10,vertex_size=30,bbox=(0,0,400,400))