Versions et chargement des modèles pré-entraînés¶
In [20]:
#version de Python
import sys
sys.version
Out[20]:
'3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]'
In [21]:
#version
import transformers
transformers.__version__
Out[21]:
'4.46.3'
In [22]:
# charger le modèle pré-entraîné pour traduction en -> fr
# d'autres langues sont possibles
# le fichier est mis en cache en suite cf. sur le disque dur /user/.cache
from transformers import pipeline
# si non spécifié, par défaut
# https://huggingface.co/google-t5/t5-base
traducteur = pipeline("translation_en_to_fr",
model="google-t5/t5-base")
In [23]:
# charger le modèle pré-entraîné pour image-to-text
# le fichier est mis en cache en suite cf. sur le disque dur /user/.cache
from transformers import pipeline
# si non spécifié, par défaut
# par défaut : https://huggingface.co/ydshieh/vit-gpt2-coco-en
image_process = pipeline("image-to-text",
model = "ydshieh/vit-gpt2-coco-en")
Config of the encoder: <class 'transformers.models.vit.modeling_vit.ViTModel'> is overwritten by shared encoder config: ViTConfig { "architectures": [ "ViTModel" ], "attention_probs_dropout_prob": 0.0, "encoder_stride": 16, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-12, "model_type": "vit", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "qkv_bias": true, "transformers_version": "4.46.3" } Config of the decoder: <class 'transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel'> is overwritten by shared decoder config: GPT2Config { "activation_function": "gelu_new", "add_cross_attention": true, "architectures": [ "GPT2LMHeadModel" ], "attn_pdrop": 0.1, "bos_token_id": 50256, "decoder_start_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "is_decoder": true, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "pad_token_id": 50256, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": { "text-generation": { "do_sample": true, "max_length": 50 } }, "transformers_version": "4.46.3", "use_cache": true, "vocab_size": 50257 } Config of the decoder: <class 'transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel'> is overwritten by shared decoder config: GPT2Config { "activation_function": "gelu_new", "add_cross_attention": true, "architectures": [ "GPT2LMHeadModel" ], "attn_pdrop": 0.1, "bos_token_id": 50256, "decoder_start_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "is_decoder": true, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "pad_token_id": 50256, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": { "text-generation": { "do_sample": true, "max_length": 50 } }, "transformers_version": "4.46.3", "use_cache": true, "vocab_size": 50257 }
Quelques exemples¶
In [24]:
# changement de dossier par défaut
import os
os.chdir("C:/Users/ricco/Desktop/demo")
Les zèbres¶
In [25]:
from PIL import Image
im_1 = Image.open("image_ouest_france.jpg")
display(im_1)
In [26]:
# obtenir une description
res_1 = image_process('image_ouest_france.jpg',max_new_tokens=100)
print(res_1)
[{'generated_text': 'zebras are standing in a field '}]
In [27]:
# et traduite en français
traducteur(res_1[0]['generated_text'])
Out[27]:
[{'translation_text': 'zèbres se trouvent dans un champ'}]
La salle de classe¶
In [28]:
im_2 = Image.open("image_rentree_sise.jpg")
display(im_2)
In [29]:
# obtenir une description
res_2 = image_process('image_rentree_sise.jpg',max_new_tokens=100)
print(res_2)
[{'generated_text': 'a room with a large group of people sitting at tables '}]
In [30]:
# et traduite en français
traducteur(res_2[0]['generated_text'])
Out[30]:
[{'translation_text': 'une salle où un grand groupe de personnes sont assises à des tables'}]
Les voitures¶
In [31]:
im_3 = Image.open("image_salon_auto.jpg")
display(im_3)
In [32]:
# obtenir une description
res_3 = image_process('image_salon_auto.jpg',max_new_tokens=100)
print(res_3)
[{'generated_text': 'a vintage car is shown in a museum '}]
In [33]:
# et traduite en français
traducteur(res_3[0]['generated_text'])
Out[33]:
[{'translation_text': "une voiture d'époque est montrée dans un musée"}]