実際のところ
前提条件
- 公式サンプルで引用されているWikipediaの画像をsample.jpgとしてローカルに保存
スクリプト
from openai import OpenAI from dotenv import dotenv_values import sys import base64 # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def detectImage(prompt, client): response = client.chat.completions.create( model="gpt-4-vision-preview", messages=[ { "role": "user", "content": [ {"type": "text", "text": "How many person in this image?"}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}",}, }, ], } ], max_tokens=300, ) return response # Path to your image mydata = "" if len(sys.argv) > 1: mydata = sys.argv[1] else: mydata = "./sample.jpg" # Getting the base64 string base64_image = encode_image(mydata) config = dotenv_values(".env") yourclient = OpenAI( api_key= config["OPENAI_API_KEY"] ) result = detectImage(mydata,yourclient) comments = result.choices[0].message.content print(comments)
試しに実行すると
$ python3 visionTest.py There are no people visible in this image. It features a wooden path leading through a grassy area under a blue sky with some clouds.