from litellm import embeddingresponse = embedding( model="text-embedding-ada-002", input="The quick brown fox jumps over the lazy dog")vector = response.data[0].embeddingprint(f"Embedding dimensions: {len(vector)}")print(f"First 5 values: {vector[:5]}")print(f"Tokens used: {response.usage.total_tokens}")
from litellm import embeddingtexts = [ "Machine learning is fascinating", "Deep learning is a subset of ML", "Natural language processing enables AI", "Computer vision recognizes images"]response = embedding( model="text-embedding-ada-002", input=texts)for i, emb in enumerate(response.data): print(f"Text {i}: {len(emb.embedding)} dimensions")print(f"\nTotal tokens: {response.usage.total_tokens}")
from litellm import embeddingimport numpy as npdef cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))# Documents to searchdocuments = [ "Python is a programming language", "Machine learning uses algorithms", "Deep learning is a subset of machine learning", "Natural language processing analyzes text"]# Get embeddings for all documentsdoc_response = embedding( model="text-embedding-ada-002", input=documents)doc_embeddings = [d.embedding for d in doc_response.data]# Search queryquery = "Tell me about ML and AI"query_response = embedding( model="text-embedding-ada-002", input=query)query_embedding = query_response.data[0].embedding# Calculate similaritiessimilarities = [ cosine_similarity(query_embedding, doc_emb) for doc_emb in doc_embeddings]# Get most similar documentsranked_docs = sorted( zip(documents, similarities), key=lambda x: x[1], reverse=True)print("Most relevant documents:")for doc, score in ranked_docs: print(f"Score: {score:.4f} - {doc}")
import asynciofrom litellm import aembeddingasync def embed_batch(texts: list): tasks = [ aembedding(model="text-embedding-ada-002", input=[text]) for text in texts ] responses = await asyncio.gather(*tasks) return [r.data[0].embedding for r in responses]async def main(): texts = [f"Document {i}" for i in range(10)] embeddings = await embed_batch(texts) print(f"Generated {len(embeddings)} embeddings")asyncio.run(main())
from litellm import embeddingimport numpy as npdef find_duplicates(texts: list, threshold: float = 0.95): response = embedding( model="text-embedding-ada-002", input=texts ) embeddings = [d.embedding for d in response.data] duplicates = [] for i in range(len(embeddings)): for j in range(i + 1, len(embeddings)): similarity = np.dot(embeddings[i], embeddings[j]) / ( np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j]) ) if similarity >= threshold: duplicates.append((i, j, similarity)) return duplicatestexts = [ "The cat sat on the mat", "A feline was sitting on the rug", # Similar to first "Python is a programming language", "The dog ran in the park"]duplicates = find_duplicates(texts)for i, j, score in duplicates: print(f"Similar (score: {score:.3f}):") print(f" {texts[i]}") print(f" {texts[j]}")