Overview
RCLI provides functions to query:
- Pipeline state: Current processing stage (idle, listening, processing, speaking)
- Engine info: Model names, device info, configuration
- Audio levels: Real-time microphone RMS for waveform displays
- Performance metrics: LLM, TTS, and STT performance from last operation
- Context usage: Token counts and context window utilization
Pipeline State
rcli_get_state
Get the current pipeline state.
int rcli_get_state(RCLIHandle handle);
Pipeline state:
0: IDLE - Not processing
1: LISTENING - Capturing microphone input
2: PROCESSING - Running LLM inference
3: SPEAKING - Playing TTS audio
4: INTERRUPTED - Processing was cancelled
Example: State Polling
const char* state_names[] = {"IDLE", "LISTENING", "PROCESSING", "SPEAKING", "INTERRUPTED"};
int state = rcli_get_state(handle);
printf("Current state: %s\n", state_names[state]);
// Wait for idle
while (rcli_get_state(handle) != 0) {
usleep(100000); // Poll every 100ms
}
printf("Pipeline is idle\n");
Use rcli_set_state_callback() for event-driven state updates instead of polling.
rcli_get_info
Get engine info as JSON (model names, device info, etc.).
const char* rcli_get_info(RCLIHandle handle);
Engine handle (must be initialized)
JSON string with engine information. Do not free - owned by the engine.Example:{
"llm_model": "Qwen3 0.6B",
"stt_model": "Whisper base.en",
"tts_model": "Piper Lessac",
"gpu_layers": 99,
"context_size": 4096,
"device": "Apple M1 Max",
"actions_count": 43,
"actions_enabled": 38
}
Example
const char* info = rcli_get_info(handle);
printf("Engine Info:\n%s\n", info);
Model Queries
rcli_get_llm_model
Get the name of the active LLM model.
const char* rcli_get_llm_model(RCLIHandle handle);
Model name (e.g., "Qwen3 0.6B", "Liquid LFM2 1.2B"). Do not free.
rcli_get_tts_model
Get the name of the active TTS voice.
const char* rcli_get_tts_model(RCLIHandle handle);
Voice name (e.g., "Piper Lessac", "Kokoro American Female"). Do not free.
rcli_get_stt_model
Get the name of the active offline STT model.
const char* rcli_get_stt_model(RCLIHandle handle);
Model name (e.g., "Whisper base.en", "Parakeet TDT 1.1B"). Do not free.
Example: Print Active Models
printf("Active Models:\n");
printf(" LLM: %s\n", rcli_get_llm_model(handle));
printf(" STT: %s\n", rcli_get_stt_model(handle));
printf(" TTS: %s\n", rcli_get_tts_model(handle));
// Output:
// Active Models:
// LLM: Qwen3 0.6B
// STT: Whisper base.en
// TTS: Piper Lessac
rcli_is_using_parakeet
Check if using Parakeet TDT (high-accuracy STT) vs. Whisper.
int rcli_is_using_parakeet(RCLIHandle handle);
1: Using Parakeet TDT (high-accuracy)
0: Using Whisper (default)
Audio Level
rcli_get_audio_level
Get current microphone audio level (RMS) for waveform display.
float rcli_get_audio_level(RCLIHandle handle);
Audio level (0.0 - 1.0 RMS). 0.0 = silence, 1.0 = maximum.
rcli_start_listening(handle);
for (int i = 0; i < 100; i++) {
float level = rcli_get_audio_level(handle);
int bars = (int)(level * 20);
printf("\r[");
for (int j = 0; j < 20; j++) {
printf("%s", j < bars ? "█" : "·");
}
printf("] %.2f", level);
fflush(stdout);
usleep(50000); // 50ms refresh
}
// Output:
// [███████·············] 0.35
rcli_get_last_llm_perf
Get LLM performance from the last rcli_process_command() or rcli_rag_query() call.
void rcli_get_last_llm_perf(
RCLIHandle handle,
int* out_tokens,
double* out_tok_per_sec,
double* out_ttft_ms,
double* out_total_ms
);
Output: Number of tokens generated. Pass NULL to skip.
Output: Token generation speed (tokens/second). Pass NULL to skip.
Output: Time to first token in milliseconds. Pass NULL to skip.
Output: Total generation time in milliseconds. Pass NULL to skip.
Example: Display LLM Stats
const char* response = rcli_process_command(handle, "Tell me a joke");
int tokens;
double tok_per_sec, ttft_ms, total_ms;
rcli_get_last_llm_perf(handle, &tokens, &tok_per_sec, &ttft_ms, &total_ms);
printf("LLM Performance:\n");
printf(" Tokens: %d\n", tokens);
printf(" Speed: %.1f tok/s\n", tok_per_sec);
printf(" TTFT: %.1f ms\n", ttft_ms);
printf(" Total: %.1f ms\n", total_ms);
// Output:
// LLM Performance:
// Tokens: 42
// Speed: 38.5 tok/s
// TTFT: 89.2 ms
// Total: 1091.3 ms
rcli_get_context_info
Get context window usage from the last LLM call.
void rcli_get_context_info(
RCLIHandle handle,
int* out_prompt_tokens,
int* out_ctx_size
);
Output: Total tokens in the last prompt (system + history + user). Pass NULL to skip.
Output: Model’s configured context window size. Pass NULL to skip.
Example: Context Usage
rcli_process_command(handle, "Explain quantum computing");
int prompt_tokens, ctx_size;
rcli_get_context_info(handle, &prompt_tokens, &ctx_size);
float usage = (float)prompt_tokens / ctx_size * 100;
printf("Context Usage: %d / %d tokens (%.1f%%)\n",
prompt_tokens, ctx_size, usage);
// Output:
// Context Usage: 512 / 4096 tokens (12.5%)
If context usage exceeds ~80%, consider clearing conversation history with rcli_clear_history().
rcli_get_last_tts_perf
Get TTS performance from the last rcli_speak() call.
void rcli_get_last_tts_perf(
RCLIHandle handle,
int* out_samples,
double* out_synthesis_ms,
double* out_rtf
);
Output: Number of audio samples generated. Pass NULL to skip.
Output: Synthesis time in milliseconds. Pass NULL to skip.
Output: Real-time factor (RTF < 1.0 means faster than real-time). Pass NULL to skip.
Example: TTS Stats
rcli_speak(handle, "Hello, world!");
int samples;
double synthesis_ms, rtf;
rcli_get_last_tts_perf(handle, &samples, &synthesis_ms, &rtf);
printf("TTS Performance:\n");
printf(" Samples: %d\n", samples);
printf(" Synthesis: %.1f ms\n", synthesis_ms);
printf(" RTF: %.2f\n", rtf);
// Output:
// TTS Performance:
// Samples: 22050
// Synthesis: 123.4 ms
// RTF: 0.45
rcli_get_last_stt_perf
Get STT performance from the last rcli_stop_capture_and_transcribe() call.
void rcli_get_last_stt_perf(
RCLIHandle handle,
double* out_audio_ms,
double* out_transcribe_ms
);
Output: Audio duration in milliseconds. Pass NULL to skip.
Output: Transcription time in milliseconds. Pass NULL to skip.
Example: STT Stats
rcli_start_capture(handle);
// ... user speaks ...
const char* transcript = rcli_stop_capture_and_transcribe(handle);
double audio_ms, transcribe_ms;
rcli_get_last_stt_perf(handle, &audio_ms, &transcribe_ms);
float rtf = transcribe_ms / audio_ms;
printf("STT Performance:\n");
printf(" Audio: %.1f ms\n", audio_ms);
printf(" Transcribe: %.1f ms\n", transcribe_ms);
printf(" RTF: %.2f\n", rtf);
// Output:
// STT Performance:
// Audio: 3500.0 ms
// Transcribe: 234.5 ms
// RTF: 0.07
#include "api/rcli_api.h"
#include <stdio.h>
void print_performance_stats(RCLIHandle handle) {
// LLM stats
int llm_tokens;
double llm_tok_per_sec, llm_ttft_ms, llm_total_ms;
rcli_get_last_llm_perf(handle, &llm_tokens, &llm_tok_per_sec, &llm_ttft_ms, &llm_total_ms);
// Context usage
int prompt_tokens, ctx_size;
rcli_get_context_info(handle, &prompt_tokens, &ctx_size);
// TTS stats
int tts_samples;
double tts_synthesis_ms, tts_rtf;
rcli_get_last_tts_perf(handle, &tts_samples, &tts_synthesis_ms, &tts_rtf);
printf("\n┌──────────────────────────────┐\n");
printf("│ Performance Metrics │\n");
printf("├──────────────────────────────┤\n");
printf("│ LLM │\n");
printf("│ Tokens: %-14d │\n", llm_tokens);
printf("│ Speed: %-10.1f tok/s │\n", llm_tok_per_sec);
printf("│ TTFT: %-14.1f ms │\n", llm_ttft_ms);
printf("│ Total: %-14.1f ms │\n", llm_total_ms);
printf("│ Context: %d/%d (%.1f%%) │\n",
prompt_tokens, ctx_size, (float)prompt_tokens / ctx_size * 100);
printf("├──────────────────────────────┤\n");
printf("│ TTS │\n");
printf("│ Samples: %-14d │\n", tts_samples);
printf("│ Synthesis: %-14.1f ms │\n", tts_synthesis_ms);
printf("│ RTF: %-14.2f │\n", tts_rtf);
printf("└──────────────────────────────┘\n");
}
int main() {
RCLIHandle handle = rcli_create(NULL);
rcli_init(handle, "/path/to/models", 99);
// Process command
const char* response = rcli_process_command(handle, "Tell me about black holes");
printf("Response: %s\n", response);
// Speak response
rcli_speak(handle, response);
// Display performance stats
print_performance_stats(handle);
rcli_destroy(handle);
return 0;
}
See Also