The fastest way to experience the RAG Recruitment Assistant is through Google Colab. You’ll go from zero to analyzing candidate profiles in under 5 minutes.
No local installation required! Google Colab provides a free Python environment with GPU support and all major ML libraries pre-installed.
The notebook includes a realistic data generator that creates student CVs in PDF format:
from reportlab.pdfgen import canvasfrom reportlab.lib.pagesizes import A4import os, random, shutil# ConfigurationCANTIDAD_A_GENERAR = 5CARPETA_DESTINO = "cvs_estudiantes_final"if os.path.exists(CARPETA_DESTINO): shutil.rmtree(CARPETA_DESTINO)os.makedirs(CARPETA_DESTINO, exist_ok=True)print(f"Generating {CANTIDAD_A_GENERAR} student CVs...")# Sample data poolsnombres = ["Anghelo", "Camila", "Sebastian", "Valeria", "Mateo"]apellidos = ["Mendoza", "Vargas", "Toscano", "Rios", "Silva"]tech_stack = [ "Python", "Java", "Spring Boot", "React", "SQL (PostgreSQL)", "Git/GitHub", "PowerBI"]logros_tech = [ "Development of a Virtual Library System with user roles and stock management.", "Created a RESTful API for financial management using Python and FastAPI.", "First place in university Hackathon developing a recycling app.", "Automation of Excel reports using Python scripts and Pandas.", "Implementation of normalized relational database for a fictional e-commerce."]# Generate PDFs (full implementation in source code)# ...print(f"✓ {CANTIDAD_A_GENERAR} student CVs created in '{CARPETA_DESTINO}' folder")
Expected Output:
Generating 5 student CVs...✓ 5 student CVs created in 'cvs_estudiantes_final' folder
from langchain_community.document_loaders import PyPDFLoaderfrom langchain_community.vectorstores import FAISSfrom langchain_core.prompts import ChatPromptTemplatefrom langchain_core.output_parsers import StrOutputParserimport random# Select a random CVcarpeta_fuente = "cvs_estudiantes_final"archivos_disponibles = os.listdir(carpeta_fuente)archivo_elegido = random.choice(archivos_disponibles)ruta_archivo = f"{carpeta_fuente}/{archivo_elegido}"print(f"📂 Selected student profile: '{archivo_elegido}'")# Load and vectorizeloader = PyPDFLoader(ruta_archivo)docs = loader.load()vectorstore = FAISS.from_documents(docs, embeddings)retriever = vectorstore.as_retriever()# Create RAG chaintemplate = """You are a Career Mentor and expert in tech employability.Analyze this student's profile based ONLY on the following context (their CV):{context}Question: {question}"""prompt = ChatPromptTemplate.from_template(template)chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser())# Ask about the candidatequestion = "What notable projects or academic experience does this student have, and what is their main tech stack?"response = chain.invoke(question)print(f"\n🔍 QUESTION: {question}")print("-" * 50)print(f"🤖 ANALYSIS:\n{response}")
🔍 QUESTION: What notable projects or academic experience does this student have?--------------------------------------------------🤖 ANALYSIS:Based on the CV provided, this is the analysis for Fernanda Paredes:### Notable Projects and Academic ExperienceFernanda Paredes is a 9th semester Software Engineering student (UTP) seeking her first professional opportunity as a Data Analyst Trainee.**Key Projects:**1. **Academic Project as Data Analyst Trainee (Jun 2025 - Feb 2026)**2. **Hackathon Winner**: First place in university Hackathon for developing a recycling application### Main Tech StackFernanda's tech stack is mixed, reflecting her interest in both software development and data analysis:| Area | Technologies ||------|-------------|| Data Analysis / BI | Python, PowerBI || Software Development | Java, Spring Boot |**Conclusion:** Fernanda has a solid foundation in development tools and demonstrated initiative in data (Python, PowerBI), which aligns with her goal of becoming a Data Analyst Trainee. Winning a Hackathon indicates high potential and execution capability.
Now let’s analyze ALL candidates at once and extract structured data:
import globimport pandas as pdfrom langchain_core.output_parsers import JsonOutputParserfrom pydantic import BaseModel, Fieldclass StudentProfile(BaseModel): nombre: str = Field(description="Full name of the student") universidad: str = Field(description="Name of university or institute") ciclo_actual: str = Field(description="Current semester (e.g., 7th Semester)") stack_principal: list = Field(description="Top 5 technologies they know") tipo_perfil: str = Field(description="Classify as: Backend, Frontend, Data, Fullstack, or Management") potencial_contratacion: str = Field(description="Brief justification for hiring them as an intern")parser = JsonOutputParser(pydantic_object=StudentProfile)template_extract = """You are an Expert in Youth Employability and IT Recruitment.Analyze this student's CV and extract structured data.USE THIS JSON FORMAT:{format_instructions}CV TEXT:{context}"""prompt_extract = ChatPromptTemplate.from_template(template_extract)chain_extract = prompt_extract | llm | parser# Process all CVsresultados = []archivos = glob.glob("cvs_estudiantes_final/*.pdf")for pdf in archivos: loader = PyPDFLoader(pdf) pages = loader.load() texto_completo = "\n".join([p.page_content for p in pages]) data = chain_extract.invoke({ "context": texto_completo, "format_instructions": parser.get_format_instructions() }) resultados.append(data) print(f"✓ Processed: {data['nombre']} ({data['ciclo_actual']}) -> {data['tipo_perfil']}")# Create DataFramedf = pd.DataFrame(resultados)display(df[['nombre', 'universidad', 'tipo_perfil', 'potencial_contratacion']])
Finally, let’s perform a semantic search across all candidates:
from langchain_text_splitters import RecursiveCharacterTextSplitter# Load all CVsdocs_totales = []for pdf in archivos: loader = PyPDFLoader(pdf) documentos = loader.load() for doc in documentos: doc.metadata["source"] = pdf.split("/")[-1] docs_totales.extend(documentos)# Split into chunkstext_splitter = RecursiveCharacterTextSplitter( chunk_size=600, chunk_overlap=100)splits = text_splitter.split_documents(docs_totales)# Create vector store with MMR retrievervectorstore = FAISS.from_documents(splits, embeddings)retriever = vectorstore.as_retriever( search_type="mmr", # Maximum Marginal Relevance for diversity search_kwargs={"k": 5, "fetch_k": 20})# RAG chaintemplate_rag = """You are the 'Talent Scout 3000'. Your mission is to identify high-potential students.CONTEXT FROM CVs:{context}QUESTION:{question}Generate a list of matching students with:- Student Name (Source File)- Why they match: [Brief explanation]"""prompt_rag = ChatPromptTemplate.from_template(template_rag)def format_docs(docs): return "\n\n".join(f"[Source: {d.metadata['source']}]\n{d.page_content}" for d in docs)rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt_rag | llm | StrOutputParser())# Search queryquery = "Which students know Python and have developed complex systems (like a Virtual Library or similar)?"print(f"🔍 Search: {query}")print("-" * 50)response = rag_chain.invoke(query)print(response)
Sample Output:
🔍 Search: Which students know Python and have developed complex systems?--------------------------------------------------The following students meet the criteria:| Student Name (Source) | Why They Match ||----------------------|----------------|| **Fernanda Mendoza** (CV_Estudiante_2_Fernanda_Mendoza.pdf) | Knows Python (mentioned in profile). **Developed a complex system:** "Virtual Library System with user roles and stock management." || **Nicolas Paredes** (CV_Estudiante_3_Nicolas_Paredes.pdf) | Knows Python (profile and title). **Developed:** "RESTful API for financial management using Python and FastAPI." || **Ximena Rios** (CV_Estudiante_1_Ximena_Rios.pdf) | Knows Python. **Developed:** "RESTful API for financial management using Python and FastAPI." |