Versions et chargement des modèles pré-entraînés¶

In [20]:
#version de Python
import sys
sys.version
Out[20]:
'3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]'
In [21]:
#version
import transformers
transformers.__version__
Out[21]:
'4.46.3'
In [22]:
# charger le modèle pré-entraîné pour traduction en -> fr
# d'autres langues sont possibles
# le fichier est mis en cache en suite cf. sur le disque dur /user/.cache
from transformers import pipeline

# si non spécifié, par défaut
# https://huggingface.co/google-t5/t5-base
traducteur = pipeline("translation_en_to_fr",
                      model="google-t5/t5-base")
In [23]:
# charger le modèle pré-entraîné pour image-to-text
# le fichier est mis en cache en suite cf. sur le disque dur /user/.cache
from transformers import pipeline

# si non spécifié, par défaut
# par défaut : https://huggingface.co/ydshieh/vit-gpt2-coco-en
image_process = pipeline("image-to-text",
                         model = "ydshieh/vit-gpt2-coco-en")
Config of the encoder: <class 'transformers.models.vit.modeling_vit.ViTModel'> is overwritten by shared encoder config: ViTConfig {
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.46.3"
}

Config of the decoder: <class 'transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel'> is overwritten by shared decoder config: GPT2Config {
  "activation_function": "gelu_new",
  "add_cross_attention": true,
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "decoder_start_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "is_decoder": true,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "pad_token_id": 50256,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.46.3",
  "use_cache": true,
  "vocab_size": 50257
}

Config of the decoder: <class 'transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel'> is overwritten by shared decoder config: GPT2Config {
  "activation_function": "gelu_new",
  "add_cross_attention": true,
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "decoder_start_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "is_decoder": true,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "pad_token_id": 50256,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.46.3",
  "use_cache": true,
  "vocab_size": 50257
}

Quelques exemples¶

In [24]:
# changement de dossier par défaut
import os
os.chdir("C:/Users/ricco/Desktop/demo")

Les zèbres¶

In [25]:
from PIL import Image
im_1 = Image.open("image_ouest_france.jpg")
display(im_1)
No description has been provided for this image
In [26]:
# obtenir une description
res_1 = image_process('image_ouest_france.jpg',max_new_tokens=100)
print(res_1)
[{'generated_text': 'zebras are standing in a field '}]
In [27]:
# et traduite en français
traducteur(res_1[0]['generated_text'])
Out[27]:
[{'translation_text': 'zèbres se trouvent dans un champ'}]

La salle de classe¶

In [28]:
im_2 = Image.open("image_rentree_sise.jpg")
display(im_2)
No description has been provided for this image
In [29]:
# obtenir une description
res_2 = image_process('image_rentree_sise.jpg',max_new_tokens=100)
print(res_2)
[{'generated_text': 'a room with a large group of people sitting at tables '}]
In [30]:
# et traduite en français
traducteur(res_2[0]['generated_text'])
Out[30]:
[{'translation_text': 'une salle où un grand groupe de personnes sont assises à des tables'}]

Les voitures¶

In [31]:
im_3 = Image.open("image_salon_auto.jpg")
display(im_3)
No description has been provided for this image
In [32]:
# obtenir une description
res_3 = image_process('image_salon_auto.jpg',max_new_tokens=100)
print(res_3)
[{'generated_text': 'a vintage car is shown in a museum '}]
In [33]:
# et traduite en français
traducteur(res_3[0]['generated_text'])
Out[33]:
[{'translation_text': "une voiture d'époque est montrée dans un musée"}]