Skip to main content
Function calling allows LLMs to interact with external tools and APIs. Qwen-Agent provides robust support for function calling following OpenAI’s pattern, with additional features like parallel execution.

Basic Function Calling

The fundamental pattern for function calling with Qwen models.

Complete Example

function_calling.py
import json
import os
from qwen_agent.llm import get_chat_model

# Define your function
def get_current_weather(location, unit='fahrenheit'):
    """Get the current weather in a given location"""
    if 'tokyo' in location.lower():
        return json.dumps({
            'location': 'Tokyo',
            'temperature': '10',
            'unit': 'celsius'
        })
    elif 'san francisco' in location.lower():
        return json.dumps({
            'location': 'San Francisco',
            'temperature': '72',
            'unit': 'fahrenheit'
        })
    elif 'paris' in location.lower():
        return json.dumps({
            'location': 'Paris',
            'temperature': '22',
            'unit': 'celsius'
        })
    else:
        return json.dumps({
            'location': location,
            'temperature': 'unknown'
        })

def test(fncall_prompt_type: str = 'qwen'):
    # Initialize the LLM
    llm = get_chat_model({
        'model': 'qwen-plus-latest',
        'model_server': 'dashscope',
        'api_key': os.getenv('DASHSCOPE_API_KEY'),
        'generate_cfg': {
            'fncall_prompt_type': fncall_prompt_type
        },
    })

    # Step 1: Define messages and functions
    messages = [{
        'role': 'user',
        'content': "What's the weather like in San Francisco?"
    }]
    
    functions = [{
        'name': 'get_current_weather',
        'description': 'Get the current weather in a given location',
        'parameters': {
            'type': 'object',
            'properties': {
                'location': {
                    'type': 'string',
                    'description': 'The city and state, e.g. San Francisco, CA',
                },
                'unit': {
                    'type': 'string',
                    'enum': ['celsius', 'fahrenheit']
                },
            },
            'required': ['location'],
        },
    }]

    # Step 2: Get model response
    print('# Assistant Response 1:')
    responses = []
    for responses in llm.chat(
        messages=messages,
        functions=functions,
        stream=True,
    ):
        print(responses)

    messages.extend(responses)

    # Step 3: Check if model wants to call a function
    last_response = messages[-1]
    if last_response.get('function_call', None):
        # Step 4: Call the function
        available_functions = {
            'get_current_weather': get_current_weather,
        }
        
        function_name = last_response['function_call']['name']
        function_to_call = available_functions[function_name]
        function_args = json.loads(
            last_response['function_call']['arguments']
        )
        
        function_response = function_to_call(
            location=function_args.get('location'),
            unit=function_args.get('unit'),
        )
        
        print('# Function Response:')
        print(function_response)

        # Step 5: Send function result back to model
        messages.append({
            'role': 'function',
            'name': function_name,
            'content': function_response,
        })

        print('# Assistant Response 2:')
        for responses in llm.chat(
            messages=messages,
            functions=functions,
            stream=True,
        ):
            print(responses)

if __name__ == '__main__':
    test(fncall_prompt_type='qwen')

How It Works

1

Define Functions

Create Python functions with clear docstrings and parameter definitions in JSON Schema format
2

Send to Model

Pass messages and function definitions to the LLM
3

Model Decides

The LLM determines whether to call a function and which one
4

Execute Function

Your code executes the requested function with provided arguments
5

Return Result

Send function result back to model for final response generation

Function Definition Schema

Follow this pattern to define functions:

Basic Structure

function_definition = {
    'name': 'function_name',
    'description': 'Clear description of what this function does',
    'parameters': {
        'type': 'object',
        'properties': {
            'param1': {
                'type': 'string',  # or number, boolean, object, array
                'description': 'What this parameter represents',
            },
            'param2': {
                'type': 'number',
                'description': 'Another parameter',
            },
        },
        'required': ['param1'],  # List of required parameters
    },
}

Parameter Types

{
    'type': 'string',
    'description': 'A text value',
    'enum': ['option1', 'option2']  # Optional: restrict values
}

Parallel Function Calling

Execute multiple functions simultaneously for better efficiency.

Example

function_calling_in_parallel.py
import json
import os
from qwen_agent.llm import get_chat_model

def get_current_weather(location, unit='fahrenheit'):
    """Get the current weather in a given location"""
    # Implementation...
    return json.dumps({'location': location, 'temperature': '72'})

def test():
    llm = get_chat_model({
        'model': 'qwen-plus-latest',
        'model_server': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
        'api_key': os.getenv('DASHSCOPE_API_KEY'),
        'generate_cfg': {
            'fncall_prompt_type': 'qwen'
        },
    })

    # Ask about multiple locations
    messages = [{
        'role': 'user',
        'content': "What's the weather like in San Francisco? And Tokyo? Paris?",
    }]
    
    functions = [{
        'name': 'get_current_weather',
        'description': 'Get the current weather in a given location',
        'parameters': {
            'type': 'object',
            'properties': {
                'location': {
                    'type': 'string',
                    'description': 'The city and state',
                },
                'unit': {
                    'type': 'string',
                    'enum': ['celsius', 'fahrenheit']
                },
            },
            'required': ['location'],
        },
    }]

    print('# Assistant Response 1:')
    responses = []
    for responses in llm.chat(
        messages=messages,
        functions=functions,
        stream=True,
        extra_generate_cfg=dict(
            parallel_function_calls=True,  # Enable parallel calls
        ),
    ):
        print(responses)

    messages.extend(responses)

    # Process all function calls
    fncall_msgs = [rsp for rsp in responses if rsp.get('function_call')]
    if fncall_msgs:
        available_functions = {
            'get_current_weather': get_current_weather,
        }

        for msg in fncall_msgs:
            function_name = msg['function_call']['name']
            function_to_call = available_functions[function_name]
            function_args = json.loads(msg['function_call']['arguments'])
            
            function_response = function_to_call(
                location=function_args.get('location'),
                unit=function_args.get('unit'),
            )
            
            print(f'# Function Response ({function_name}):')
            print(function_response)
            
            messages.append({
                'role': 'function',
                'name': function_name,
                'content': function_response,
            })

        # Get final response
        print('# Assistant Response 2:')
        for responses in llm.chat(
            messages=messages,
            functions=functions,
            extra_generate_cfg={'parallel_function_calls': True},
            stream=True,
        ):
            print(responses)

if __name__ == '__main__':
    test()

Benefits of Parallel Calling

Better Performance

Execute multiple independent functions simultaneously

Natural Queries

Handle queries asking about multiple items

Reduced Latency

One LLM call instead of multiple sequential calls

More Efficient

Fewer API requests overall

Advanced Configuration

Function Choice Control

Control when and which functions are called:
# Let model decide (default)
llm.chat(
    messages=messages,
    functions=functions,
    extra_generate_cfg=dict(
        function_choice='auto'
    )
)

# Force specific function call
llm.chat(
    messages=messages,
    functions=functions,
    extra_generate_cfg=dict(
        function_choice='get_current_weather'
    )
)

# Disable function calling for this request
llm.chat(
    messages=messages,
    functions=functions,
    extra_generate_cfg=dict(
        function_choice='none'
    )
)

Prompt Types

Qwen-Agent supports different function calling prompt formats:
Optimized for Qwen models:
llm = get_chat_model({
    'model': 'qwen-plus-latest',
    'generate_cfg': {
        'fncall_prompt_type': 'qwen'
    }
})
Best for: Qwen2.5, Qwen3, QwQ

Token Management

llm.chat(
    messages=messages,
    functions=functions,
    extra_generate_cfg=dict(
        max_input_tokens=6500,  # Truncate history if needed
        max_tokens=2000,  # Maximum response length
    )
)

Vision Model Function Calling

Use functions with vision models:
from qwen_agent.llm import get_chat_model

llm = get_chat_model({
    'model': 'qwen-vl-max',
    'model_server': 'dashscope',
})

messages = [{
    'role': 'user',
    'content': [
        {'image': 'https://example.com/image.jpg'},
        {'text': 'What objects are in this image?'}
    ]
}]

functions = [{
    'name': 'identify_objects',
    'description': 'Identify objects in an image',
    'parameters': {
        'type': 'object',
        'properties': {
            'objects': {
                'type': 'array',
                'items': {'type': 'string'},
                'description': 'List of identified objects'
            }
        },
        'required': ['objects']
    }
}]

for response in llm.chat(messages=messages, functions=functions):
    print(response)

Error Handling

Best Practices

import json
import json5

def safe_function_call(function_to_call, function_args):
    """Safely execute a function call with error handling"""
    try:
        # Parse arguments (use json5 for more lenient parsing)
        args = json5.loads(function_args)
        
        # Execute function
        result = function_to_call(**args)
        
        return result
    except json.JSONDecodeError as e:
        return json.dumps({
            'error': 'Invalid JSON arguments',
            'details': str(e)
        })
    except Exception as e:
        return json.dumps({
            'error': 'Function execution failed',
            'details': str(e)
        })

# Usage
if last_response.get('function_call'):
    function_name = last_response['function_call']['name']
    function_args = last_response['function_call']['arguments']
    
    function_result = safe_function_call(
        available_functions[function_name],
        function_args
    )

Retry Configuration

llm = get_chat_model({
    'model': 'qwen-plus-latest',
    'generate_cfg': {
        'max_retries': 3,  # Retry failed API calls
    }
})

Using with Different Model Providers

DashScope

llm = get_chat_model({
    'model': 'qwen-plus-latest',
    'model_server': 'dashscope',
    'api_key': os.getenv('DASHSCOPE_API_KEY'),
})

DashScope OpenAI-Compatible Mode

llm = get_chat_model({
    'model': 'qwen2.5-72b-instruct',
    'model_server': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
    'api_key': os.getenv('DASHSCOPE_API_KEY'),
})

Together.AI

llm = get_chat_model({
    'model': 'Qwen/qwen2.5-7b-instruct',
    'model_server': 'https://api.together.xyz',
    'api_key': os.getenv('TOGETHER_API_KEY'),
})

Self-Hosted (vLLM)

llm = get_chat_model({
    'model': 'Qwen/Qwen2.5-7B-Instruct',
    'model_server': 'http://localhost:8000/v1',
    'api_key': 'EMPTY',
})

Common Patterns

Weather Service

def get_weather(location: str, unit: str = 'celsius') -> str:
    # Call weather API
    data = weather_api.get_current(location)
    return json.dumps({
        'location': location,
        'temperature': data['temp'],
        'unit': unit,
        'conditions': data['conditions']
    })

Database Query

def query_database(query: str) -> str:
    # Execute SQL query
    results = database.execute(query)
    return json.dumps({
        'results': results,
        'count': len(results)
    })

File Operations

def read_file(filepath: str) -> str:
    try:
        with open(filepath, 'r') as f:
            content = f.read()
        return json.dumps({
            'content': content,
            'success': True
        })
    except Exception as e:
        return json.dumps({
            'error': str(e),
            'success': False
        })

Troubleshooting

  • Ensure function description is clear and relevant
  • Check parameter descriptions are detailed
  • Verify function name doesn’t conflict
  • Try adding examples in the description
  • Use json5 library for lenient parsing
  • Add validation in function implementation
  • Provide clear parameter descriptions
  • Use enum for restricted values
  • Verify parallel_function_calls=True is set
  • Check model supports parallel calls
  • Ensure functions are independent
  • Verify function returns valid JSON string
  • Check role is set to ‘function’ in message
  • Ensure function name matches exactly

Next Steps

Assistant Demos

See function calling in complete assistants

Multi-Agent Chat

Combine function calling with multiple agents

Build docs developers (and LLMs) love