Calculates embeddings (vector representations) for the given contents. Supports both text-only and multimodal embeddings depending on the model.Embeddings are useful for:
response = client.models.embed_content( model='text-embedding-004', contents=[ 'What is your name?', 'What is your favorite color?', 'Where do you live?', ])for i, embedding in enumerate(response.embeddings): print(f"Text {i+1}: {len(embedding.values)} dimensions") print(f" Tokens: {embedding.statistics.token_count}") print(f" Truncated: {embedding.statistics.truncated}")
from google.genai import types# Document embeddings for searchdoc_response = client.models.embed_content( model='text-embedding-004', contents=[ 'Python is a high-level programming language.', 'JavaScript is used for web development.', ], config=types.EmbedContentConfig( task_type='RETRIEVAL_DOCUMENT', output_dimensionality=256, # Reduce from 768 to 256 ))# Query embedding for searchquery_response = client.models.embed_content( model='text-embedding-004', contents='programming languages', config=types.EmbedContentConfig( task_type='RETRIEVAL_QUERY', output_dimensionality=256, ))print(f"Document embeddings: {len(doc_response.embeddings)}")print(f"Query dimension: {len(query_response.embeddings[0].values)}")
import numpy as npfrom google.genai import typesdef cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))# Embed documentsdocuments = [ 'The quick brown fox jumps over the lazy dog.', 'A fast auburn fox leaps above an idle canine.', 'Python is a programming language.',]doc_response = client.models.embed_content( model='text-embedding-004', contents=documents, config=types.EmbedContentConfig(task_type='SEMANTIC_SIMILARITY'))# Embed queryquery = 'agile fox jumping'query_response = client.models.embed_content( model='text-embedding-004', contents=query, config=types.EmbedContentConfig(task_type='SEMANTIC_SIMILARITY'))# Calculate similaritiesquery_embedding = np.array(query_response.embeddings[0].values)for i, doc in enumerate(documents): doc_embedding = np.array(doc_response.embeddings[i].values) similarity = cosine_similarity(query_embedding, doc_embedding) print(f"Document {i+1}: {similarity:.4f}") print(f" {doc[:50]}...")# Output:# Document 1: 0.8234# The quick brown fox jumps over the lazy dog.# Document 2: 0.8156# A fast auburn fox leaps above an idle canine.# Document 3: 0.3421# Python is a programming language.
response = client.models.embed_content( model='text-embedding-004', contents='Python is widely used in data science and machine learning applications.', config=types.EmbedContentConfig( task_type='RETRIEVAL_DOCUMENT', title='Introduction to Python Programming', ))embedding = response.embeddings[0].valuesprint(f"Document embedding with title: {len(embedding)} dimensions")
import timedef embed_large_dataset(client, texts, batch_size=100): """Embed large datasets in batches.""" all_embeddings = [] for i in range(0, len(texts), batch_size): batch = texts[i:i+batch_size] response = client.models.embed_content( model='text-embedding-004', contents=batch, config={'output_dimensionality': 256} ) all_embeddings.extend([emb.values for emb in response.embeddings]) # Rate limiting time.sleep(1) return all_embeddings# Use ittexts = [f"Document {i}" for i in range(500)]embeddings = embed_large_dataset(client, texts)print(f"Generated {len(embeddings)} embeddings")
import asynciofrom google import genaiclient = genai.Client(api_key='your-api-key')async def embed_async(): response = await client.aio.models.embed_content( model='text-embedding-004', contents=[ 'First text to embed', 'Second text to embed', 'Third text to embed', ], config={'output_dimensionality': 256} ) for i, emb in enumerate(response.embeddings): print(f"Embedding {i+1}: {len(emb.values)} dimensions")asyncio.run(embed_async())