The OpenAIImageToText class provides image-to-text conversion capabilities using OpenAI’s vision models. It extends LangChain’s ChatOpenAI class with specialized methods for processing images.
from scrapegraphai.models import OpenAIImageToTextclass OpenAIImageToText(ChatOpenAI): """ A wrapper for the OpenAI vision models that converts images to text descriptions. Args: llm_config (dict): Configuration parameters for the language model. """
from scrapegraphai.models import OpenAIImageToText# Initialize the modelitt_model = OpenAIImageToText({ "model": "gpt-4-vision-preview", "api_key": "your-openai-api-key", "temperature": 0.5})# Analyze an imageimage_url = "https://example.com/product-image.jpg"description = itt_model.run(image_url)print(description)# Output: "This image showing a red bicycle with a basket..."
The OpenAIImageToText model is primarily used within the OmniScraperGraph for automated image analysis:
from scrapegraphai.graphs import OmniScraperGraphgraph_config = { "llm": { "model": "gpt-4o", "api_key": "your-openai-api-key" }, "max_images": 10 # Process up to 10 images}omni_scraper = OmniScraperGraph( prompt="List all products and describe their images", source="https://example.com/shop", config=graph_config)result = omni_scraper.run()
The OpenAITextToSpeech class converts text to speech audio using OpenAI’s TTS API. Unlike the LLM models, this is a standalone class that directly interfaces with the OpenAI API.
from scrapegraphai.models import OpenAITextToSpeechclass OpenAITextToSpeech: """ Implements text-to-speech using the OpenAI API. Attributes: client (OpenAI): The OpenAI client instance model (str): The TTS model to use voice (str): The voice model for generating speech Args: tts_config (dict): Configuration parameters for the TTS model """
from scrapegraphai.models import OpenAITextToSpeech# Initialize the TTS modeltts = OpenAITextToSpeech({ "api_key": "your-openai-api-key", "model": "tts-1-hd", "voice": "nova"})# Generate speechtext = "Welcome to ScrapeGraphAI. This library makes web scraping intelligent."audio_bytes = tts.run(text)# Save to filewith open("output.mp3", "wb") as f: f.write(audio_bytes)
from scrapegraphai.models import OpenAITextToSpeechimport redef text_to_speech_long(text: str, output_file: str, chunk_size: int = 4000): """Convert long text to speech by splitting into chunks.""" tts = OpenAITextToSpeech({ "api_key": "your-api-key", "model": "tts-1" }) # Split on sentence boundaries sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) < chunk_size: current_chunk += sentence + " " else: chunks.append(current_chunk.strip()) current_chunk = sentence + " " if current_chunk: chunks.append(current_chunk.strip()) # Generate audio for each chunk all_audio = b"" for i, chunk in enumerate(chunks): print(f"Processing chunk {i+1}/{len(chunks)}...") audio = tts.run(chunk) all_audio += audio # Save combined audio with open(output_file, "wb") as f: f.write(all_audio) print(f"Saved {output_file}")# Usagelong_text = """Your very long text here..."""text_to_speech_long(long_text, "long_output.mp3")