from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
import numpy as np
# Prepare data
X_numeric = np.random.randn(100, 3).astype(np.float32)
X_categorical = np.random.randint(0, 5, size=(100, 2))
X = np.hstack([X_numeric, X_categorical])
y = np.random.randint(0, 2, size=100)
# Create pipeline with mixed types
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), [0, 1, 2]),
('cat', OneHotEncoder(), [3, 4])
])
model = Pipeline([
('preprocessor', preprocessor),
('classifier', RandomForestClassifier(n_estimators=10))
])
model.fit(X, y)
# Define mixed input types
initial_type = [
('numeric_input', FloatTensorType([None, 3])),
('categorical_input', Int64TensorType([None, 2]))
]
onnx_model = convert_sklearn(
model,
initial_types=initial_type,
target_opset=14
)