Skip to main content

FPGABackend

Base class for FPGA HLS backends (Vivado, Vitis, Quartus, Catapult). Provides common functionality for precision handling, resource optimization, and code generation.
class FPGABackend(Backend):
    def __init__(self, name):
        super().__init__(name)

Resource Management

get_valid_reuse_factors

Calculate valid reuse factors for a layer.
backend.get_valid_reuse_factors(n_in, n_out)
n_in
int
required
Number of input neurons/channels.
n_out
int
required
Number of output neurons/channels.
valid_rf
list
List of valid reuse factor values.

Example

from hls4ml.backends import get_backend

backend = get_backend('Vivado')

# For a Dense layer: 128 inputs -> 64 outputs
valid_rf = backend.get_valid_reuse_factors(128, 64)
print(f"Valid reuse factors: {valid_rf}")
# Output: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]

get_closest_reuse_factor

Find the closest valid reuse factor.
backend.get_closest_reuse_factor(valid_rf, chosen_rf)
valid_rf
list
required
List of valid reuse factors (sorted).
chosen_rf
int
required
Desired reuse factor.
closest_rf
int
The nearest valid reuse factor.

Example

backend = get_backend('Vivado')
valid_rf = backend.get_valid_reuse_factors(128, 64)

# User wants RF=10, but it's not valid
closest = backend.get_closest_reuse_factor(valid_rf, 10)
print(f"Closest valid RF to 10: {closest}")  # Output: 8

set_closest_reuse_factor

Set the closest valid reuse factor for a layer.
backend.set_closest_reuse_factor(
    layer,
    n_in,
    n_out,
    attribute='reuse_factor',
    include_max_rf=True
)
layer
Layer
required
Layer to configure.
n_in
int
required
Number of inputs.
n_out
int
required
Number of outputs.
attribute
str
default:"reuse_factor"
Attribute name to set.
include_max_rf
bool
default:"True"
Include maximum reuse factor (fully sequential).

Example

import hls4ml

# Convert model
hls_model = hls4ml.converters.convert_from_keras_model(keras_model)

backend = hls4ml.backends.get_backend('Vivado')

# Adjust reuse factors
for layer in hls_model.get_layers():
    if layer.class_name == 'Dense':
        n_in = layer.get_attr('n_in')
        n_out = layer.get_attr('n_out')
        backend.set_closest_reuse_factor(layer, n_in, n_out)
        print(f"{layer.name}: RF={layer.get_attr('reuse_factor')}")

Precision Handling

convert_precision_string

Convert precision strings to internal types.
backend.convert_precision_string(precision)
Supported formats:
  • AP Fixed: 'ap_fixed<16,6>', 'ap_ufixed<8,4>'
  • AP Int: 'ap_int<8>', 'ap_uint<8>'
  • Generic: 'fixed<16,6>', 'int<8>'
  • Float: 'float', 'double', 'half', 'bfloat16'
  • AP Float: 'ap_float<16,5>'
  • AC Types: 'ac_fixed<16,6,true>', 'ac_int<8,true>'

Examples

from hls4ml.backends import get_backend

backend = get_backend('Vivado')

# Fixed-point precision
fp = backend.convert_precision_string('ap_fixed<16,6>')
print(fp)  # FixedPrecisionType(width=16, integer=6, signed=True)

# With rounding and saturation
fp_modes = backend.convert_precision_string('ap_fixed<16,6,AP_RND,AP_SAT>')
print(fp_modes.rounding_mode)  # RoundingMode.RND
print(fp_modes.saturation_mode)  # SaturationMode.SAT

# Unsigned fixed-point
ufp = backend.convert_precision_string('ap_ufixed<8,4>')
print(ufp.signed)  # False

# Integer
int_p = backend.convert_precision_string('ap_int<8>')
print(int_p)  # IntegerPrecisionType(width=8, signed=True)

# Float types
float_p = backend.convert_precision_string('float')
print(float_p)  # StandardFloatPrecisionType(width=32, exponent=8)

double_p = backend.convert_precision_string('double')
print(double_p)  # StandardFloatPrecisionType(width=64, exponent=11)

half_p = backend.convert_precision_string('half')
print(half_p)  # StandardFloatPrecisionType(width=16, exponent=5)

Precision Inference

The backend can automatically infer appropriate precisions:
import hls4ml

hls_config = {
    'Model': {
        'Precision': 'ap_fixed<16,6>',
        'ReuseFactor': 1
    }
}

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    hls_config=hls_config
)

# Apply precision inference
hls_model.apply_flow('vivado:infer_precision')

# Check inferred precisions
for layer in hls_model.get_layers():
    if hasattr(layer, 'get_output_variable'):
        var = layer.get_output_variable()
        print(f"{layer.name}: {var.type.precision}")

Convolution Optimization

compute_conv1d_instructions

Generate optimized instructions for 1D convolution.
backend.compute_conv1d_instructions(
    in_W,
    in_C,
    kernel_size=3,
    stride=1,
    pad=0
)
in_W
int
required
Input width.
in_C
int
required
Number of input channels.
kernel_size
int
default:"3"
Convolution kernel size.
stride
int
default:"1"
Stride value.
pad
int
default:"0"
Padding.
result
tuple
Returns (min_W, windows_int) - minimal width and window instructions.

compute_conv2d_instructions

Generate optimized instructions for 2D convolution.
backend.compute_conv2d_instructions(
    in_H,
    in_W,
    in_C,
    kernel_size=3,
    stride=1,
    pad=0
)
result
tuple
Returns (min_H, min_W, windows_int) - minimal dimensions and window instructions.

generate_conv1d_line_buffer_fn

Generate C++ code for 1D convolution line buffer.
backend.generate_conv1d_line_buffer_fn(
    layer_idx,
    n_partitions,
    in_W,
    in_C,
    kernel=3,
    stride=1,
    pad=0,
    dilation=1
)
layer_idx
int
required
Layer index for naming.
n_partitions
int
required
Number of parallel partitions.
code
str
Generated C++ code for the line buffer function.

generate_conv2d_line_buffer_fn

Generate C++ code for 2D convolution line buffer.
backend.generate_conv2d_line_buffer_fn(
    layer_idx,
    n_partitions,
    in_H,
    in_W,
    in_C,
    kernel=(3, 3),
    stride=(1, 1),
    pad=(0, 0, 0, 0),
    dilation=(1, 1)
)
code
str
Generated C++ code for the line buffer function.

Product Type Selection

product_type

Determine which multiplication implementation to use.
backend.product_type(data_T, weight_T)
data_T
PrecisionType
required
Data/activation precision type.
weight_T
PrecisionType
required
Weight precision type.
product
str
Product type: 'mult', 'weight_binary', 'data_binary', 'both_binary', 'weight_ternary', 'weight_exponential'.

Example

from hls4ml.backends import get_backend
from hls4ml.model.types import FixedPrecisionType, XnorPrecisionType

backend = get_backend('Vivado')

# Standard multiplication
data_t = FixedPrecisionType(16, 6)
weight_t = FixedPrecisionType(16, 6)
product = backend.product_type(data_t, weight_t)
print(product)  # 'mult'

# Binary weights
weight_binary = XnorPrecisionType()
product = backend.product_type(data_t, weight_binary)
print(product)  # 'weight_binary'

# Both binary
data_binary = XnorPrecisionType()
product = backend.product_type(data_binary, weight_binary)
print(product)  # 'both_binary'

Layer Attributes

FPGA backend adds attributes to layers:

Dense/Conv Layers

  • accum_t: Accumulator precision type
  • reuse_factor: Parallelization factor (1 = fully parallel)

Activation Layers

  • table_size: Size of lookup table (for sigmoid, tanh, etc.)
  • table_t: Lookup table precision

Softmax Layers

  • implementation: 'latency', 'stable', 'argmax', 'legacy'
  • skip: Skip computation (for inference optimization)
  • exp_table_t: Exponential table precision
  • inv_table_t: Inverse table precision

Example

import hls4ml

hls_model = hls4ml.converters.convert_from_keras_model(keras_model)

for layer in hls_model.get_layers():
    if layer.class_name == 'Dense':
        print(f"{layer.name}:")
        print(f"  ReuseFactor: {layer.get_attr('reuse_factor')}")
        print(f"  AccumType: {layer.get_attr('accum_t')}")
    
    elif layer.class_name == 'Activation':
        print(f"{layer.name}:")
        print(f"  TableSize: {layer.get_attr('table_size')}")
        print(f"  TableType: {layer.get_attr('table_t')}")

Strategy Configuration

Resource vs Latency

# Latency strategy - maximize parallelism
hls_config = {
    'Model': {
        'Strategy': 'Latency',
        'ReuseFactor': 1  # Fully parallel
    }
}

# Resource strategy - minimize resource usage
hls_config = {
    'Model': {
        'Strategy': 'Resource',
        'ReuseFactor': 64  # More sequential
    }
}

# Mixed strategy - per layer
hls_config = {
    'Model': {
        'Strategy': 'Latency',
        'ReuseFactor': 1
    },
    'LayerName': {
        'large_dense': {
            'Strategy': 'Resource',
            'ReuseFactor': 32
        }
    }
}

Compilation

compile

Compile the generated project.
backend.compile(model)
Generates a shared library for simulation:
output_dir/
├── firmware/
   ├── myproject.cpp
   ├── myproject.h
   ├── nnet_utils/
   └── weights/
├── build_lib.sh
└── myproject-<stamp>.so

Example

import hls4ml
import numpy as np

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    output_dir='my-hls-test'
)

# Compile for simulation
print("Compiling...")
hls_model.compile()

# Test
X_test = np.random.rand(10, 784).astype(np.float32)
y_pred = hls_model.predict(X_test)
print(f"Predictions shape: {y_pred.shape}")

Build Process

build

Run HLS synthesis.
report = backend.build(
    model,
    reset=False,
    csim=True,
    synth=True,
    cosim=False,
    export=False,
    vsynth=False
)
report
dict
Contains:
  • LUT, FF, DSP, BRAM - Resource usage
  • LatencyBest, LatencyWorst - Latency in cycles
  • IntervalMin, IntervalMax - Throughput
  • ClockPeriod - Achieved clock period

Example

import hls4ml

hls_model = hls4ml.converters.convert_from_keras_model(
    keras_model,
    output_dir='synth_test',
    backend='Vivado'
)

# Run synthesis
report = hls_model.build(
    csim=True,     # C simulation
    synth=True,    # HLS synthesis
    cosim=False,   # Skip co-sim (slow)
    export=False   # Don't export IP
)

# Print results
print("\nSynthesis Results:")
print(f"  LUTs: {report['LUT']}")
print(f"  FFs: {report['FF']}")
print(f"  DSPs: {report['DSP']}")
print(f"  BRAMs: {report['BRAM_18K']}")
print(f"  Latency: {report['LatencyBest']} - {report['LatencyWorst']} cycles")
print(f"  II: {report['IntervalMin']} - {report['IntervalMax']} cycles")

See Also

Build docs developers (and LLMs) love