FPGA Backend - hls4ml

FPGABackend

Base class for FPGA HLS backends (Vivado, Vitis, Quartus, Catapult). Provides common functionality for precision handling, resource optimization, and code generation.

class FPGABackend(Backend):
    def __init__(self, name):
        super().__init__(name)

Resource Management

get_valid_reuse_factors

Calculate valid reuse factors for a layer.

backend.get_valid_reuse_factors(n_in, n_out)

n_in

int

required

Number of input neurons/channels.

n_out

int

required

Number of output neurons/channels.

valid_rf

list

List of valid reuse factor values.

Example

from hls4ml.backends import get_backend

backend = get_backend('Vivado')

# For a Dense layer: 128 inputs -> 64 outputs
valid_rf = backend.get_valid_reuse_factors(128, 64)
print(f"Valid reuse factors: {valid_rf}")
# Output: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]

get_closest_reuse_factor

Find the closest valid reuse factor.

backend.get_closest_reuse_factor(valid_rf, chosen_rf)

valid_rf

list

required

List of valid reuse factors (sorted).

chosen_rf

int

required

Desired reuse factor.

closest_rf

int

The nearest valid reuse factor.

Example

backend = get_backend('Vivado')
valid_rf = backend.get_valid_reuse_factors(128, 64)

# User wants RF=10, but it's not valid
closest = backend.get_closest_reuse_factor(valid_rf, 10)
print(f"Closest valid RF to 10: {closest}")  # Output: 8

set_closest_reuse_factor

Set the closest valid reuse factor for a layer.

backend.set_closest_reuse_factor(
    layer,
    n_in,
    n_out,
    attribute='reuse_factor',
    include_max_rf=True
)

layer

Layer

required

Layer to configure.

n_in

int

required

Number of inputs.

n_out

int

required

Number of outputs.

attribute

str

default:"reuse_factor"

Attribute name to set.

include_max_rf

bool

default:"True"

Include maximum reuse factor (fully sequential).

Example

import hls4ml

# Convert model
hls_model = hls4ml.converters.convert_from_keras_model(keras_model)

backend = hls4ml.backends.get_backend('Vivado')

# Adjust reuse factors
for layer in hls_model.get_layers():
    if layer.class_name == 'Dense':
        n_in = layer.get_attr('n_in')
        n_out = layer.get_attr('n_out')
        backend.set_closest_reuse_factor(layer, n_in, n_out)
        print(f"{layer.name}: RF={layer.get_attr('reuse_factor')}")

Precision Handling

convert_precision_string

Convert precision strings to internal types.

backend.convert_precision_string(precision)

Supported formats:

AP Fixed: 'ap_fixed<16,6>', 'ap_ufixed<8,4>'
AP Int: 'ap_int<8>', 'ap_uint<8>'
Generic: 'fixed<16,6>', 'int<8>'
Float: 'float', 'double', 'half', 'bfloat16'
AP Float: 'ap_float<16,5>'
AC Types: 'ac_fixed<16,6,true>', 'ac_int<8,true>'

Examples

from hls4ml.backends import get_backend

backend = get_backend('Vivado')

# Fixed-point precision
fp = backend.convert_precision_string('ap_fixed<16,6>')
print(fp)  # FixedPrecisionType(width=16, integer=6, signed=True)

# With rounding and saturation
fp_modes = backend.convert_precision_string('ap_fixed<16,6,AP_RND,AP_SAT>')
print(fp_modes.rounding_mode)  # RoundingMode.RND
print(fp_modes.saturation_mode)  # SaturationMode.SAT

# Unsigned fixed-point
ufp = backend.convert_precision_string('ap_ufixed<8,4>')
print(ufp.signed)  # False

# Integer
int_p = backend.convert_precision_string('ap_int<8>')
print(int_p)  # IntegerPrecisionType(width=8, signed=True)

# Float types
float_p = backend.convert_precision_string('float')
print(float_p)  # StandardFloatPrecisionType(width=32, exponent=8)

double_p = backend.convert_precision_string('double')
print(double_p)  # StandardFloatPrecisionType(width=64, exponent=11)

half_p = backend.convert_precision_string('half')
print(half_p)  # StandardFloatPrecisionType(width=16, exponent=5)

Precision Inference

The backend can automatically infer appropriate precisions:

import hls4ml

hls_config = {
    'Model': {
        'Precision': 'ap_fixed<16,6>',
        'ReuseFactor': 1
    }
}

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    hls_config=hls_config
)

# Apply precision inference
hls_model.apply_flow('vivado:infer_precision')

# Check inferred precisions
for layer in hls_model.get_layers():
    if hasattr(layer, 'get_output_variable'):
        var = layer.get_output_variable()
        print(f"{layer.name}: {var.type.precision}")

Convolution Optimization

compute_conv1d_instructions

Generate optimized instructions for 1D convolution.

backend.compute_conv1d_instructions(
    in_W,
    in_C,
    kernel_size=3,
    stride=1,
    pad=0
)

in_W

int

required

Input width.

in_C

int

required

Number of input channels.

kernel_size

int

default:"3"

Convolution kernel size.

stride

int

default:"1"

Stride value.

pad

int

default:"0"

Padding.

result

tuple

Returns (min_W, windows_int) - minimal width and window instructions.

compute_conv2d_instructions

Generate optimized instructions for 2D convolution.

backend.compute_conv2d_instructions(
    in_H,
    in_W,
    in_C,
    kernel_size=3,
    stride=1,
    pad=0
)

result

tuple

Returns (min_H, min_W, windows_int) - minimal dimensions and window instructions.

generate_conv1d_line_buffer_fn

Generate C++ code for 1D convolution line buffer.

backend.generate_conv1d_line_buffer_fn(
    layer_idx,
    n_partitions,
    in_W,
    in_C,
    kernel=3,
    stride=1,
    pad=0,
    dilation=1
)

layer_idx

int

required

Layer index for naming.

n_partitions

int

required

Number of parallel partitions.

code

str

Generated C++ code for the line buffer function.

generate_conv2d_line_buffer_fn

Generate C++ code for 2D convolution line buffer.

backend.generate_conv2d_line_buffer_fn(
    layer_idx,
    n_partitions,
    in_H,
    in_W,
    in_C,
    kernel=(3, 3),
    stride=(1, 1),
    pad=(0, 0, 0, 0),
    dilation=(1, 1)
)

code

str

Generated C++ code for the line buffer function.

Product Type Selection

product_type

Determine which multiplication implementation to use.

backend.product_type(data_T, weight_T)

data_T

PrecisionType

required

Data/activation precision type.

weight_T

PrecisionType

required

Weight precision type.

product

str

Product type: 'mult', 'weight_binary', 'data_binary', 'both_binary', 'weight_ternary', 'weight_exponential'.

Example

from hls4ml.backends import get_backend
from hls4ml.model.types import FixedPrecisionType, XnorPrecisionType

backend = get_backend('Vivado')

# Standard multiplication
data_t = FixedPrecisionType(16, 6)
weight_t = FixedPrecisionType(16, 6)
product = backend.product_type(data_t, weight_t)
print(product)  # 'mult'

# Binary weights
weight_binary = XnorPrecisionType()
product = backend.product_type(data_t, weight_binary)
print(product)  # 'weight_binary'

# Both binary
data_binary = XnorPrecisionType()
product = backend.product_type(data_binary, weight_binary)
print(product)  # 'both_binary'

Layer Attributes

FPGA backend adds attributes to layers:

Dense/Conv Layers

accum_t: Accumulator precision type
reuse_factor: Parallelization factor (1 = fully parallel)

Activation Layers

table_size: Size of lookup table (for sigmoid, tanh, etc.)
table_t: Lookup table precision

Softmax Layers

implementation: 'latency', 'stable', 'argmax', 'legacy'
skip: Skip computation (for inference optimization)
exp_table_t: Exponential table precision
inv_table_t: Inverse table precision

Example

import hls4ml

hls_model = hls4ml.converters.convert_from_keras_model(keras_model)

for layer in hls_model.get_layers():
    if layer.class_name == 'Dense':
        print(f"{layer.name}:")
        print(f"  ReuseFactor: {layer.get_attr('reuse_factor')}")
        print(f"  AccumType: {layer.get_attr('accum_t')}")
    
    elif layer.class_name == 'Activation':
        print(f"{layer.name}:")
        print(f"  TableSize: {layer.get_attr('table_size')}")
        print(f"  TableType: {layer.get_attr('table_t')}")

Strategy Configuration

Resource vs Latency

# Latency strategy - maximize parallelism
hls_config = {
    'Model': {
        'Strategy': 'Latency',
        'ReuseFactor': 1  # Fully parallel
    }
}

# Resource strategy - minimize resource usage
hls_config = {
    'Model': {
        'Strategy': 'Resource',
        'ReuseFactor': 64  # More sequential
    }
}

# Mixed strategy - per layer
hls_config = {
    'Model': {
        'Strategy': 'Latency',
        'ReuseFactor': 1
    },
    'LayerName': {
        'large_dense': {
            'Strategy': 'Resource',
            'ReuseFactor': 32
        }
    }
}

Compilation

compile

Compile the generated project.

backend.compile(model)

Generates a shared library for simulation:

output_dir/
├── firmware/
│   ├── myproject.cpp
│   ├── myproject.h
│   ├── nnet_utils/
│   └── weights/
├── build_lib.sh
└── myproject-<stamp>.so

Example

import hls4ml
import numpy as np

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    output_dir='my-hls-test'
)

# Compile for simulation
print("Compiling...")
hls_model.compile()

# Test
X_test = np.random.rand(10, 784).astype(np.float32)
y_pred = hls_model.predict(X_test)
print(f"Predictions shape: {y_pred.shape}")

Build Process

build

Run HLS synthesis.

report = backend.build(
    model,
    reset=False,
    csim=True,
    synth=True,
    cosim=False,
    export=False,
    vsynth=False
)

report

dict

Contains:

LUT, FF, DSP, BRAM - Resource usage
LatencyBest, LatencyWorst - Latency in cycles
IntervalMin, IntervalMax - Throughput
ClockPeriod - Achieved clock period

Example

import hls4ml

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    output_dir='synth_test',
    backend='Vivado'
)

# Run synthesis
report = hls_model.build(
    csim=True,     # C simulation
    synth=True,    # HLS synthesis
    cosim=False,   # Skip co-sim (slow)
    export=False   # Don't export IP
)

# Print results
print("\nSynthesis Results:")
print(f"  LUTs: {report['LUT']}")
print(f"  FFs: {report['FF']}")
print(f"  DSPs: {report['DSP']}")
print(f"  BRAMs: {report['BRAM_18K']}")
print(f"  Latency: {report['LatencyBest']} - {report['LatencyWorst']} cycles")
print(f"  II: {report['IntervalMin']} - {report['IntervalMax']} cycles")

API Overview

Converters

Model

Backends

Utilities

​FPGABackend

​Resource Management

​get_valid_reuse_factors

​Example

​get_closest_reuse_factor

​Example

​set_closest_reuse_factor

​Example

​Precision Handling

​convert_precision_string

​Examples

​Precision Inference

​Convolution Optimization

​compute_conv1d_instructions

​compute_conv2d_instructions

​generate_conv1d_line_buffer_fn

​generate_conv2d_line_buffer_fn

​Product Type Selection

​product_type

​Example

​Layer Attributes

​Dense/Conv Layers

​Activation Layers

​Softmax Layers

​Example

​Strategy Configuration

​Resource vs Latency

​Compilation

​compile

​Example

​Build Process

​build

​Example

​See Also

Build docs developers (and LLMs) love

FPGABackend

Resource Management

get_valid_reuse_factors

Example

get_closest_reuse_factor

Example

set_closest_reuse_factor

Example

Precision Handling

convert_precision_string

Examples

Precision Inference

Convolution Optimization

compute_conv1d_instructions

compute_conv2d_instructions

generate_conv1d_line_buffer_fn

generate_conv2d_line_buffer_fn

Product Type Selection

product_type

Example

Layer Attributes

Dense/Conv Layers

Activation Layers

Softmax Layers

Example

Strategy Configuration

Resource vs Latency

Compilation

compile

Example

Build Process

build

Example

See Also