Agentic RAG combines Retrieval-Augmented Generation with intelligent agent capabilities using the Agno framework. This example demonstrates how to create a knowledge base from web URLs, store documents in a vector database, and query them using an AI agent with semantic search.
# The knowledge base automatically:# 1. Fetches URL content# 2. Chunks text (typically 512-1024 tokens)# 3. Generates embeddings via OpenAI API# 4. Stores in LanceDB with metadataknowledge_base.load() # All steps happen here
import streamlit as stfrom typing import Iteratorst.set_page_config(page_title="Agentic RAG", layout="wide")st.title("Agentic RAG with Agno & GPT-4o")# Sidebar: URL managementwith st.sidebar: st.markdown("### 🧠 Knowledge Base URLs") if "urls" not in st.session_state: st.session_state.urls = [""] # URL input fields for i, url in enumerate(st.session_state.urls): st.session_state.urls[i] = st.text_input( f"URL {i+1}", value=url, key=f"url_{i}", label_visibility="collapsed" ) # Add URL button if st.button("➕"): if st.session_state.urls and st.session_state.urls[-1].strip() != "": st.session_state.urls.append("") # Load knowledge base if st.button("Load Knowledge Base"): urls = [u for u in st.session_state.urls if u.strip()] urls = list(dict.fromkeys(urls)) # Remove duplicates if urls: with st.spinner("Loading knowledge base..."): try: knowledge_base = load_knowledge_base(urls) st.session_state.docs_loaded = True st.session_state.loaded_urls = urls.copy() st.success(f"Loaded {len(urls)} URL(s)!") except Exception as e: st.error(f"Error: {str(e)}") else: st.warning("Please add at least one URL.") # Reset button if st.button("🔄 Reset KB"): st.session_state.docs_loaded = False if 'loaded_urls' in st.session_state: del st.session_state['loaded_urls'] st.success("Knowledge base reset!") st.rerun()# Chat interfacequery = st.chat_input("Ask a question")if query: if not st.session_state.get('docs_loaded', False): st.warning("Please load the knowledge base first.") else: loaded_urls = st.session_state.loaded_urls response = agentic_rag_response(loaded_urls, query) st.markdown("#### Answer") answer = "" answer_placeholder = st.empty() # Stream response for content in response: if hasattr(content, 'event') and content.event == "RunResponseContent": answer += content.content answer_placeholder.markdown(answer)
Query Embedding: User question is embedded using OpenAI
Vector Search: LanceDB finds similar document chunks
Context Retrieval: Top-k most relevant chunks are retrieved
Augmented Prompt: Retrieved context is added to the prompt
LLM Generation: GPT-4o generates answer with context
# This happens automatically when agent.run() is called# with search_knowledge=True:# 1. Query embeddingquery_vector = embedder.embed(query)# 2. Vector search in LanceDBresults = vector_db.search( query_vector, limit=5, # Top 5 most similar chunks metric="cosine" # Cosine similarity)# 3. Context is automatically added to prompt# 4. LLM generates response with context