Skip to main content
Arcana integrates with LLMs for question answering via Arcana.ask/2 and the Agent pipeline. Use model strings, functions, or custom modules.

Quick Start

# Using req_llm with model strings
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-mini"
)

# Using a function
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: fn prompt -> {:ok, "Generated answer"} end
)

# Using a custom module
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: MyApp.CustomLLM
)
req_llm provides a unified interface to 45+ LLM providers.

Setup

1

Add Dependency

mix.exs
defp deps do
  [
    {:arcana, "~> 1.0"},
    {:req_llm, "~> 1.2"}
  ]
end
2

Configure API Keys

config/runtime.exs
# OpenAI
config :req_llm, :openai,
  api_key: System.get_env("OPENAI_API_KEY")

# Anthropic
config :req_llm, :anthropic,
  api_key: System.get_env("ANTHROPIC_API_KEY")

# Google
config :req_llm, :google,
  api_key: System.get_env("GOOGLE_API_KEY")
3

Set Environment Variables

.env
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_API_KEY=AIza...

Model Strings

Pass model strings directly to ask/2 or Agent functions:
# GPT-4o mini (recommended)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-mini"
)

# GPT-4o
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o"
)

# GPT-4o Turbo
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-turbo"
)
Cost: $0.15-15.00 per 1M input tokens

Model String Options

Pass options as a tuple:
# Custom API key
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {"openai:gpt-4o-mini", api_key: "sk-..."}
)

# Temperature and max tokens
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {
    "anthropic:claude-sonnet-4-20250514",
    temperature: 0.7,
    max_tokens: 1024
  }
)

# Provider-specific options
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {
    "zai:glm-4.7",
    provider_options: %{thinking: true}  # Z.ai thinking mode
  }
)

Global Configuration

Set a default LLM in your config:
# config/config.exs
config :arcana, llm: "openai:gpt-4o-mini"

# With options
config :arcana, llm: {
  "anthropic:claude-sonnet-4-20250514",
  temperature: 0.7
}

# Function
config :arcana, llm: fn prompt ->
  {:ok, MyApp.LLM.complete(prompt)}
end

# Module
config :arcana, llm: MyApp.CustomLLM
Then use without specifying :llm:
{:ok, answer} = Arcana.ask("question", repo: MyApp.Repo)

Function-Based LLM

Provide a function for custom LLM logic:
Signature: fn prompt -> {:ok, response} | {:error, reason}
llm = fn prompt ->
  # Simple completion
  case HTTPoison.post(
    "https://api.openai.com/v1/completions",
    Jason.encode!(%{prompt: prompt, model: "gpt-4o-mini"}),
    headers()
  ) do
    {:ok, %{body: body}} ->
      %{"choices" => [%{"text" => text}]} = Jason.decode!(body)
      {:ok, text}

    {:error, reason} ->
      {:error, reason}
  end
end

{:ok, answer} = Arcana.ask("question", repo: MyApp.Repo, llm: llm)

Custom LLM Module

Implement custom LLM logic in a module:
defmodule MyApp.CustomLLM do
  @behaviour Arcana.LLM

  @impl true
  def complete(prompt, context, opts) do
    # Build system prompt with context
    system_prompt = build_system_prompt(context)
    
    # Extract options
    model = opts[:model] || "openai:gpt-4o-mini"
    temperature = opts[:temperature] || 0.7

    # Call LLM
    case ReqLLM.generate_text(
      model,
      ReqLLM.Context.new([
        ReqLLM.Context.system(system_prompt),
        ReqLLM.Context.user(prompt)
      ]),
      temperature: temperature
    ) do
      {:ok, response} ->
        {:ok, ReqLLM.Response.text(response)}

      {:error, reason} ->
        {:error, reason}
    end
  end

  defp build_system_prompt([]) do
    "You are a helpful assistant."
  end

  defp build_system_prompt(context) do
    context_text = Enum.map_join(context, "\n\n---\n\n", fn
      %{text: text} -> text
      text when is_binary(text) -> text
    end)

    """
    You are a helpful assistant with access to the following reference material.
    Answer questions directly and naturally, using this information to inform your responses.
    Don't mention or reference the material explicitly in your answers.

    Reference material:
    #{context_text}
    """
  end
end
Configuration:
# config/config.exs
config :arcana, llm: MyApp.CustomLLM

# Or use directly
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: MyApp.CustomLLM
)

Agentic RAG

Use LLMs with the Agent pipeline for complex workflows:
alias Arcana.Agent

llm = "openai:gpt-4o-mini"

ctx =
  Agent.new("Compare Elixir and Erlang features", repo: MyApp.Repo, llm: llm)
  |> Agent.gate()                                 # Skip retrieval if not needed
  |> Agent.rewrite()                              # Clean up conversational input
  |> Agent.select(collections: ["elixir", "erlang"])  # Choose collections
  |> Agent.expand()                               # Add synonyms
  |> Agent.decompose()                            # Split complex questions
  |> Agent.search()                               # Execute search
  |> Agent.reason()                               # Multi-hop reasoning
  |> Agent.rerank(threshold: 7)                   # Filter low-quality chunks
  |> Agent.answer()                               # Generate answer

ctx.answer
# => "Generated answer based on retrieved context..."

Pipeline Steps with LLM

Each Agent step uses the LLM:
StepLLM Purpose
gate/2Decide if retrieval is needed
rewrite/2Clean up conversational queries
select/2Choose relevant collections
expand/2Add synonyms and related terms
decompose/2Split into sub-questions
reason/2Evaluate if more search needed
rerank/2Score chunk relevance (0-10)
answer/2Generate final answer

Custom Prompts

Override default prompts for any step:
ctx
|> Agent.expand(
  prompt: fn question ->
    "Generate 5 synonyms for key terms in: #{question}"
  end
)
|> Agent.decompose(
  prompt: fn question ->
    "Break this into 2-3 focused sub-questions: #{question}"
  end
)
|> Agent.answer(
  prompt: fn question, chunks ->
    context = Enum.map_join(chunks, "\n\n", & &1.text)
    """
    Context: #{context}
    
    Question: #{question}
    
    Provide a detailed answer with examples.
    """
  end
)

Streaming Responses

Stream LLM responses for better UX in LiveView:
defmodule MyAppWeb.ChatLive do
  use MyAppWeb, :live_view

  def handle_event("ask", %{"question" => question}, socket) do
    # Get context from Arcana
    {:ok, context} = Arcana.search(question, repo: MyApp.Repo, limit: 5)
    
    # Stream response
    send(self(), {:stream_answer, question, context})
    {:noreply, assign(socket, streaming: true, answer: "")}
  end

  def handle_info({:stream_answer, question, context}, socket) do
    live_view_pid = self()

    Task.start(fn ->
      context_text = Enum.map_join(context, "\n\n", & &1.text)

      llm_context =
        ReqLLM.Context.new([
          ReqLLM.Context.system("""
            Answer based on this context:
            #{context_text}
          """),
          ReqLLM.Context.user(question)
        ])

      {:ok, response} = ReqLLM.stream_text("openai:gpt-4o-mini", llm_context)

      response
      |> ReqLLM.StreamResponse.tokens()
      |> Stream.each(fn chunk ->
        send(live_view_pid, {:chunk, chunk})
      end)
      |> Stream.run()

      send(live_view_pid, :stream_done)
    end)

    {:noreply, socket}
  end

  def handle_info({:chunk, content}, socket) do
    {:noreply, update(socket, :answer, &(&1 <> content))}
  end

  def handle_info(:stream_done, socket) do
    {:noreply, assign(socket, streaming: false)}
  end
end

Custom RAG Module

Wrap Arcana for app-specific RAG:
defmodule MyApp.RAG do
  @default_model "openai:gpt-4o-mini"
  @default_limit 5

  def ask(question, opts \\ []) do
    repo = Keyword.get(opts, :repo, MyApp.Repo)
    model = Keyword.get(opts, :model, @default_model)
    limit = Keyword.get(opts, :limit, @default_limit)
    collection = Keyword.get(opts, :collection)

    search_opts = [
      repo: repo,
      llm: model,
      limit: limit,
      mode: :hybrid
    ]

    search_opts =
      if collection,
        do: Keyword.put(search_opts, :collection, collection),
        else: search_opts

    Arcana.ask(question, search_opts)
  end

  def ask_with_agent(question, opts \\ []) do
    repo = Keyword.get(opts, :repo, MyApp.Repo)
    model = Keyword.get(opts, :model, @default_model)
    collections = Keyword.get(opts, :collections)

    ctx =
      Arcana.Agent.new(question, repo: repo, llm: model)
      |> maybe_select(collections)
      |> Arcana.Agent.expand()
      |> Arcana.Agent.search()
      |> Arcana.Agent.rerank(threshold: 7)
      |> Arcana.Agent.answer()

    {:ok, ctx.answer}
  end

  defp maybe_select(ctx, nil), do: ctx
  defp maybe_select(ctx, collections) do
    Arcana.Agent.select(ctx, collections: collections)
  end
end
Usage:
# Simple ask
{:ok, answer} = MyApp.RAG.ask("What is Elixir?")

# With options
{:ok, answer} = MyApp.RAG.ask(
  "What is Elixir?",
  collection: "docs",
  model: "anthropic:claude-sonnet-4-20250514"
)

# Agent pipeline
{:ok, answer} = MyApp.RAG.ask_with_agent(
  "Compare Elixir and Erlang",
  collections: ["elixir", "erlang"]
)

Cost Tracking

Monitor LLM costs via telemetry:
defmodule MyApp.LLMLogger do
  require Logger

  def setup do
    # Track Arcana LLM calls
    :telemetry.attach(
      "arcana-llm-logger",
      [:arcana, :llm, :complete, :stop],
      &handle_llm/4,
      nil
    )

    # Track req_llm token usage
    :telemetry.attach(
      "req-llm-cost-logger",
      [:req_llm, :token_usage],
      &handle_cost/4,
      nil
    )
  end

  def handle_llm([:arcana, :llm, :complete, :stop], measurements, metadata, _) do
    duration_ms = System.convert_time_unit(measurements.duration, :native, :millisecond)
    
    Logger.info("""
    LLM Call:
      Model: #{metadata.model}
      Prompt: #{String.slice(metadata.prompt, 0..100)}...
      Duration: #{duration_ms}ms
      Success: #{metadata.success}
    """)
  end

  def handle_cost([:req_llm, :token_usage], measurements, metadata, _) do
    Logger.info("""
    Token Usage:
      Model: #{metadata.model}
      Input: #{measurements.input_tokens} tokens
      Output: #{measurements.output_tokens} tokens
      Cost: $#{Float.round(measurements.total_cost, 4)}
    """)
  end
end

# In application.ex
MyApp.LLMLogger.setup()

Best Practices

  1. Use gpt-4o-mini for development - Fast and cheap ($0.15/1M tokens)
  2. Upgrade to Claude 4.5 for production - Better quality, longer context
  3. Set max_tokens - Prevent runaway costs
  4. Use temperature=0.7 - Good balance of creativity and consistency
  5. Stream responses - Better UX for chat interfaces
  6. Monitor costs - Attach telemetry handlers
  7. Cache common queries - LLM calls are expensive
  8. Use hybrid search - Better context = better answers

Model Selection Guide

Use CaseRecommended ModelReason
Developmentgpt-4o-miniFast, cheap, good quality
Productionclaude-sonnet-4-20250514Best quality, 200K context
High Volumegemini-2.0-flash-expFree tier, fast
Complex Reasoninggpt-4o or claude-opus-4Best reasoning capabilities
Low Latencygroq:llama-3.1-*Ultra-fast inference
Budgetgemini-flash or gpt-4o-miniLow cost

Troubleshooting

Add dependency:
{:req_llm, "~> 1.2"}
Run:
mix deps.get
Set environment variables:
export OPENAI_API_KEY=sk-...
export ANTHROPIC_API_KEY=sk-ant-...
Check config:
System.get_env("OPENAI_API_KEY")  # Should not be nil
Implement retry logic:
defp call_with_retry(llm, prompt, context, retries \\ 3) do
  case Arcana.LLM.complete(llm, prompt, context, []) do
    {:ok, response} -> {:ok, response}
    {:error, :rate_limit} when retries > 0 ->
      Process.sleep(1000)
      call_with_retry(llm, prompt, context, retries - 1)
    {:error, reason} -> {:error, reason}
  end
end
Increase timeout:
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {"openai:gpt-4o-mini", timeout: 60_000}
)

Next Steps

Agentic RAG Guide

Build sophisticated RAG pipelines

Embeddings

Configure embedding providers

Build docs developers (and LLMs) love