In [ ]:
#modifier le dossier
import os
os.chdir("C:/Users/ricco/Desktop/demo")

#et charger le modèle d'identification
import fasttext
modele = fasttext.load_model("lid.176.bin")
In [ ]:
#caractériques
dir(modele)
Out[ ]:
['__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_labels',
 '_words',
 'f',
 'get_analogies',
 'get_dimension',
 'get_input_matrix',
 'get_input_vector',
 'get_label_id',
 'get_labels',
 'get_line',
 'get_meter',
 'get_nearest_neighbors',
 'get_output_matrix',
 'get_sentence_vector',
 'get_subword_id',
 'get_subwords',
 'get_word_id',
 'get_word_vector',
 'get_words',
 'is_quantized',
 'labels',
 'predict',
 'quantize',
 'save_model',
 'set_args',
 'set_matrices',
 'test',
 'test_label',
 'words']
In [ ]:
#liste des langues reconnues
#voir : https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
modele.get_labels()
Out[ ]:
['__label__en',
 '__label__ru',
 '__label__de',
 '__label__fr',
 '__label__it',
 '__label__ja',
 '__label__es',
 '__label__ceb',
 '__label__tr',
 '__label__pt',
 '__label__uk',
 '__label__eo',
 '__label__pl',
 '__label__sv',
 '__label__nl',
 '__label__he',
 '__label__zh',
 '__label__hu',
 '__label__ar',
 '__label__ca',
 '__label__fi',
 '__label__cs',
 '__label__fa',
 '__label__sr',
 '__label__el',
 '__label__vi',
 '__label__bg',
 '__label__ko',
 '__label__no',
 '__label__mk',
 '__label__ro',
 '__label__id',
 '__label__th',
 '__label__hy',
 '__label__da',
 '__label__ta',
 '__label__hi',
 '__label__hr',
 '__label__sh',
 '__label__be',
 '__label__ka',
 '__label__te',
 '__label__kk',
 '__label__war',
 '__label__lt',
 '__label__gl',
 '__label__sk',
 '__label__bn',
 '__label__eu',
 '__label__sl',
 '__label__kn',
 '__label__ml',
 '__label__mr',
 '__label__et',
 '__label__az',
 '__label__ms',
 '__label__sq',
 '__label__la',
 '__label__bs',
 '__label__nn',
 '__label__ur',
 '__label__lv',
 '__label__my',
 '__label__tt',
 '__label__af',
 '__label__oc',
 '__label__nds',
 '__label__ky',
 '__label__ast',
 '__label__tl',
 '__label__is',
 '__label__ia',
 '__label__si',
 '__label__gu',
 '__label__km',
 '__label__br',
 '__label__ba',
 '__label__uz',
 '__label__bo',
 '__label__pa',
 '__label__vo',
 '__label__als',
 '__label__ne',
 '__label__cy',
 '__label__jbo',
 '__label__fy',
 '__label__mn',
 '__label__lb',
 '__label__ce',
 '__label__ug',
 '__label__tg',
 '__label__sco',
 '__label__sa',
 '__label__cv',
 '__label__jv',
 '__label__min',
 '__label__io',
 '__label__or',
 '__label__as',
 '__label__new',
 '__label__ga',
 '__label__mg',
 '__label__an',
 '__label__ckb',
 '__label__sw',
 '__label__bar',
 '__label__lmo',
 '__label__yi',
 '__label__arz',
 '__label__mhr',
 '__label__azb',
 '__label__sah',
 '__label__pnb',
 '__label__su',
 '__label__bpy',
 '__label__pms',
 '__label__ilo',
 '__label__wuu',
 '__label__ku',
 '__label__ps',
 '__label__ie',
 '__label__xmf',
 '__label__yue',
 '__label__gom',
 '__label__li',
 '__label__mwl',
 '__label__kw',
 '__label__sd',
 '__label__hsb',
 '__label__scn',
 '__label__gd',
 '__label__pam',
 '__label__bh',
 '__label__mai',
 '__label__vec',
 '__label__mt',
 '__label__dv',
 '__label__wa',
 '__label__mzn',
 '__label__am',
 '__label__qu',
 '__label__eml',
 '__label__cbk',
 '__label__tk',
 '__label__rm',
 '__label__os',
 '__label__vls',
 '__label__yo',
 '__label__lo',
 '__label__lez',
 '__label__so',
 '__label__myv',
 '__label__diq',
 '__label__mrj',
 '__label__dsb',
 '__label__frr',
 '__label__ht',
 '__label__gn',
 '__label__bxr',
 '__label__kv',
 '__label__sc',
 '__label__nah',
 '__label__krc',
 '__label__bcl',
 '__label__nap',
 '__label__gv',
 '__label__av',
 '__label__rue',
 '__label__xal',
 '__label__pfl',
 '__label__dty',
 '__label__hif',
 '__label__co',
 '__label__lrc',
 '__label__vep',
 '__label__tyv']
In [ ]:
#texte en français
modele.predict("mignone allons voir si la rose qui ce matin avait eclose")
Out[ ]:
(('__label__fr',), array([0.94153857]))
In [ ]:
#texte un peu limite
modele.predict("hello mon cher t'as pas le burn-out avec tes dreadlocks ?")
Out[ ]:
(('__label__fr',), array([0.99622267]))
In [ ]:
#et en anglais
modele.predict("hello my dear how are you this morning?")
Out[ ]:
(('__label__en',), array([0.95423901]))
In [ ]:
#et en anglais, langues de rapprochant
modele.predict("hello my dear how are you this morning?", k = 3)
Out[ ]:
(('__label__en', '__label__ru', '__label__nl'),
 array([0.95423901, 0.0027302 , 0.00234646]))
In [ ]:
#et en malgache
modele.predict("manao ahoana, aiza ny sakafo ?")
Out[ ]:
(('__label__mg',), array([0.40042931]))
In [ ]:
#et en malgache
modele.predict("manao ahoana, aiza ny sakafo ?", k = 3)
Out[ ]:
(('__label__mg', '__label__eu', '__label__vo'),
 array([0.40042931, 0.20691408, 0.10909589]))
In [ ]:
#et en malgache, bizarrement
modele.predict("manao ahoana, aiza no misakafo ?", k = 5)
Out[ ]:
(('__label__eo', '__label__es', '__label__eu', '__label__nl', '__label__mg'),
 array([0.21522667, 0.20384081, 0.18237829, 0.08820041, 0.0506593 ]))