Skip to main content

Overview

Kamutini is an advanced AI voice assistant built in Python that combines Google Gemini AI with smart home control capabilities. Designed specifically for elderly users (like Rosario, 66), it features natural language processing, text-to-speech, web search, and direct Roku TV control.
Project Name: asistantkamutini.py
Created By: Daniel Tecnologi
Location: ~/workspace/source/proyectos/ai creator/asistantkamutini.py

Core Features

  • Google Gemini AI Integration: Uses gemini-1.5-flash model for natural conversations
  • Roku TV Control: Direct API integration for app launching, searching, and control
  • Voice Synthesis: Spanish text-to-speech using gTTS and pygame
  • Web Search: Google Custom Search API for real-time information
  • Conversational Memory: Maintains context across multiple interactions
  • Personalized Personality: Religious, warm, and respectful tone for elderly users

System Architecture

API Configuration

# Google Gemini API Configuration
API_KEY_GEMINI = ""  # Your Gemini API key
GEMINI_MODEL = "gemini-1.5-flash"

# Generation Parameters
TEMP = 0.6            # Temperature for randomness
TOP_K = 40            # Top-K sampling
TOP_P = 0.9           # Nucleus sampling
MAX_TOKENS = 250      # Maximum response length

# Google Custom Search
API_KEY_GOOGLE_SEARCH = ""
SEARCH_ENGINE_ID = ""

# Network Configuration
TARGET_IP = "192.168.1.8"  # Roku TV IP address
MAX_HISTORIAL = 10          # Conversation memory limit

Roku TV Integration

The assistant can control Roku devices via HTTP API:
ROKU_APPS = {
    "netflix": "12",
    "youtube": "837",
    "prime": "13",
    "disney": "291097",
    "spotify": "199",
    "hbo": "61322",
    "plex": "3847",
    "television": "tvinput.dtv"
}

def enviar_comando_roku(ip, endpoint, metodo="POST"):
    url = f"http://{ip}:8060/{endpoint}"
    try:
        if metodo == "POST":
            requests.post(url, timeout=5)
        else:
            requests.get(url, timeout=5)
        return True
    except:
        return False

Network Discovery

Automatic Roku device detection on the local network:
def verificar_roku_por_info(ip):
    url = f"http://{ip}:8060/query/device-info"
    try:
        response = requests.get(url, timeout=1.5)
        if response.status_code == 200 and "<device-info>" in response.text:
            model_match = re.search(r'<model-name>(.*?)</model-name>', response.text)
            model_name = model_match.group(1) if model_match else "Roku"
            return {"ip": ip, "tipo": f"Roku ({model_name})"}
    except:
        pass
    return None

KamutiniEngine Class

Initialization

class KamutiniEngine:
    def __init__(self):
        print(f"🚀 Kamutini Engine v13.0 (Cloud Gemini API)...")
        self.dispositivos_tv = escanear_red_tvs()
        self.active_ip = self.dispositivos_tv[0]['ip'] if self.dispositivos_tv else TARGET_IP
        self.app_abierta = "el Menú de Inicio"
        
        pygame.mixer.init()
        self.historial = []

Gemini API Processing

def procesar_gemini(self, consulta):
    ahora = datetime.now()
    self.app_abierta = obtener_app_actual(self.active_ip)
    
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_MODEL}:generateContent?key={API_KEY_GEMINI}"
    
    # Build conversation history
    contents = []
    for h in self.historial:
        contents.append({"role": "user", "parts": [{"text": h['u']}]})
        contents.append({"role": "model", "parts": [{"text": h['k']}]})
    
    contents.append({"role": "user", "parts": [{"text": consulta}]})
    
    payload = {
        "contents": contents,
        "systemInstruction": system_instruction,
        "generationConfig": {
            "temperature": TEMP,
            "topK": TOP_K,
            "topP": TOP_P,
            "maxOutputTokens": MAX_TOKENS
        }
    }
    
    response = requests.post(url, json=payload, timeout=15)
    return response.json()['candidates'][0]['content']['parts'][0]['text'].strip()

Tag Processing System

The assistant uses special tags to trigger actions:
def responder(self, consulta):
    respuesta_cruda = self.procesar_gemini(consulta)
    
    tags = re.findall(r'/\*(.*?)\*/', respuesta_cruda)
    
    for tag in tags:
        if "search(" in tag:
            search_match = re.search(r'search\((.*?), (.*?)\)', tag)
            if search_match:
                app = search_match.group(1).strip().lower()
                query = search_match.group(2).strip()
                ejecutar_busqueda_roku(self.active_ip, app, query)
        
        elif "resultados(" in tag:
            query_google = re.search(r'resultados\((.*?)\)', tag).group(1)
            info = google_search_custom(query_google)
            respuesta_cruda += info
        
        elif "app(" in tag:
            app_name = re.search(r'app\((.*?)\)', tag).group(1).lower()
            enviar_comando_roku(self.active_ip, 
                              f"launch/{ROKU_APPS.get(app_name, '837')}")
        
        elif "home" in tag:
            enviar_comando_roku(self.active_ip, "keypress/Home")
        
        elif "power" in tag:
            enviar_comando_roku(self.active_ip, "keypress/PowerOff")
    
    respuesta_final = re.sub(r'/\*.*?\*/', '', respuesta_cruda).strip()
    self.historial.append({"u": consulta, "k": respuesta_final})
    
    return respuesta_final, "/*salir*/" in respuesta_cruda.lower()

Voice Synthesis

def hablar_local(self, texto):
    texto_limpio = self.limpiar_texto_para_audio(texto)
    if not texto_limpio: return

    try:
        tts = gTTS(text=texto_limpio, lang='es', slow=False)
        filename = "temp_voice.mp3"
        tts.save(filename)
        pygame.mixer.music.load(filename)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy(): 
            time.sleep(0.1)
        pygame.mixer.music.unload()
        if os.path.exists(filename): 
            os.remove(filename)
    except Exception as e: 
        print(f"⚠️ Error audio: {e}")

def limpiar_texto_para_audio(self, texto):
    texto = re.sub(r'/\*.*?\*/', '', texto)  # Remove tags
    texto = texto.replace('*', '').replace('#', '').replace('_', ' ')
    texto = re.sub(r'http\S+', '', texto)  # Remove URLs
    texto = " ".join(texto.split())
    return texto

Installation

Dependencies

pip install requests gtts pygame
API Keys Required: You must configure:
  • Google Gemini API key
  • Google Custom Search API key and Search Engine ID

Configuration Steps

1

Install Dependencies

Install all required Python packages:
pip install requests gtts pygame
2

Configure API Keys

Edit the configuration section in asistantkamutini.py:
API_KEY_GEMINI = "your-gemini-api-key"
API_KEY_GOOGLE_SEARCH = "your-google-api-key"
SEARCH_ENGINE_ID = "your-search-engine-id"
3

Set Roku TV IP

Find your Roku TV’s IP address and configure:
TARGET_IP = "192.168.1.8"  # Your Roku TV IP
4

Run the Assistant

python asistantkamutini.py

Usage Examples

Basic Interaction

 Kamutini v13.0 listo. Usando gemini-1.5-flash vía API.

👤 Rosario: Hola, buenos días
🤖: Buenos días, Rosario. Que Dios la bendiga en este hermoso día.

👤 Rosario: Busca videos de cocina en YouTube
🤖: Con mucho gusto, Rosario. Abriendo YouTube para buscar videos de cocina.
[System launches YouTube and searches for "cocina"]

👤 Rosario: ¿Cuál es el clima hoy?
🤖: Déjame investigar eso para usted, Rosario.
[System searches Google and responds with weather information]

👤 Rosario: Apaga la televisión
🤖: Por supuesto, Rosario. Apagando la televisión.
[System sends power-off command to Roku TV]

Command Tags

User: "Abre Netflix"
Tag: /*app(netflix)*/
Action: Launches Netflix app on Roku

User: "Ve al inicio"
Tag: /*home*/
Action: Returns to Roku home screen

Advanced Features

Conversation Memory

The assistant maintains a rolling history of the last 10 interactions:
self.historial.append({"u": consulta, "k": respuesta_final})
if len(self.historial) > MAX_HISTORIAL: 
    self.historial.pop(0)

Network Scanning

Automatic detection of Roku devices using multithreading:
def escanear_red_tvs():
    encontrados = []
    # First check target IP
    tv_principal = verificar_roku_por_info(TARGET_IP)
    if tv_principal:
        encontrados.append(tv_principal)
        return encontrados
    
    # Scan entire subnet
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(("8.8.8.8", 80))
    ip_local = s.getsockname()[0]
    s.close()
    prefijo = ".".join(ip_local.split(".")[:-1])
    
    hilos = []
    for i in range(1, 255):
        t = threading.Thread(target=tarea_escaneo, args=(f"{prefijo}.{i}",))
        t.daemon = True
        t.start()
        hilos.append(t)
    time.sleep(1.5)
    
    return encontrados

Google Custom Search Integration

def google_search_custom(query):
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        'key': API_KEY_GOOGLE_SEARCH, 
        'cx': SEARCH_ENGINE_ID, 
        'q': query, 
        'num': 3
    }
    try:
        response = requests.get(url, params=params, timeout=10)
        if response.status_code == 200:
            items = response.json().get('items', [])
            return " Rosario, encontré esto: " + 
                   " ".join([i.get('snippet') for i in items])
    except: 
        pass
    return " No logré encontrar información en este momento."

Technical Specifications

GEMINI_MODEL
string
default:"gemini-1.5-flash"
Google Gemini model version (smaller than 2.0 series)
TEMP
float
default:"0.6"
Temperature parameter for response randomness (0.0-1.0)
TOP_K
integer
default:"40"
Top-K sampling parameter for token selection
TOP_P
float
default:"0.9"
Nucleus sampling parameter (0.0-1.0)
MAX_TOKENS
integer
default:"250"
Maximum tokens per AI response
MAX_HISTORIAL
integer
default:"10"
Number of conversation turns to remember

File Reference

Source: /home/daytona/workspace/source/proyectos/ai creator/asistantkamutini.py:1
Version: v13.0
Lines of Code: 280

Wake Word Detection

Voice activation system for hands-free control

Intent Classification

ML model for understanding user intents

Build docs developers (and LLMs) love