Skip to main content
This guide walks through training a linear regression model and a logistic classifier using MLPP’s clean, intuitive API.

Prerequisites

Ensure you’ve installed MLPP and have a C++20 compiler with Eigen3 available.

Linear regression example

Let’s train a simple linear regression model to fit a line to noisy data.
1

Create your source file

Create linear_example.cpp:
linear_example.cpp
#include <mlpp.h>
#include <iostream>
#include <Eigen/Dense>

using namespace mlpp::regression;
using Matrix = Eigen::MatrixXd;
using Vector = Eigen::VectorXd;

int main() {
    // Generate synthetic data: y = 2x + 1 + noise
    Matrix X(100, 1);
    Vector y(100);
    
    for (int i = 0; i < 100; ++i) {
        X(i, 0) = i / 10.0;
        y(i) = 2.0 * X(i, 0) + 1.0 + (rand() % 100 - 50) / 100.0;
    }

    // Create and fit the model
    LinearRegression<double> model(true, 0.0);
    model.fit(X, y);

    // Display results
    std::cout << "Coefficient: " << model.coefficients()(0) << std::endl;
    std::cout << "Intercept: " << model.intercept() << std::endl;
    std::cout << "R² score: " << model.score(X, y) << std::endl;

    // Make predictions
    Matrix X_test(3, 1);
    X_test << 5.0, 10.0, 15.0;
    Vector predictions = model.predict(X_test);
    
    std::cout << "\nPredictions for X = [5, 10, 15]:" << std::endl;
    std::cout << predictions.transpose() << std::endl;

    return 0;
}
2

Compile the example

g++ -std=c++20 linear_example.cpp -o linear_example -I/usr/local/include
Or with CMake:
CMakeLists.txt
cmake_minimum_required(VERSION 3.20)
project(MLPPExamples CXX)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

find_package(mlpp 0.3 REQUIRED)

add_executable(linear_example linear_example.cpp)
target_link_libraries(linear_example PRIVATE mlpp::mlpp)
3

Run the program

./linear_example
Expected output:
Coefficient: 2.00123
Intercept: 0.998765
R² score: 0.995432

Predictions for X = [5, 10, 15]:
11.0051  21.0113  31.0175

Understanding the API

MLPP follows scikit-learn’s familiar interface:
// Constructor: (fit_intercept, regularization, method)
LinearRegression<double> model(
    true,                          // fit intercept term
    0.0,                          // no regularization (pure OLS)
    LinearRegression<double>::SolveMethod::Auto  // auto-select solver
);

Solver selection

Linear regression supports multiple solvers optimized for different problem shapes:
  • Auto: Automatically selects based on data dimensions (recommended)
  • Cholesky: Fast for tall matrices (n >> d), uses normal equations
  • SVD: Numerically stable for any shape, uses thin BDCSVD
  • JacobiSVD: Maximum stability for ill-conditioned problems
LinearRegression<double> model(
    true, 
    0.0, 
    LinearRegression<double>::SolveMethod::SVD  // force SVD solver
);

Logistic regression example

Now let’s train a binary classifier:
logistic_example.cpp
#include <mlpp.h>
#include <iostream>
#include <Eigen/Dense>

using namespace mlpp::classifiers;
using Matrix = Eigen::MatrixXd;
using Vector = Eigen::VectorXd;

int main() {
    // Generate binary classification data
    Matrix X(200, 2);
    Vector y(200);
    
    for (int i = 0; i < 100; ++i) {
        // Class 0: points around (0, 0)
        X(i, 0) = (rand() % 100 - 50) / 50.0;
        X(i, 1) = (rand() % 100 - 50) / 50.0;
        y(i) = 0.0;
    }
    
    for (int i = 100; i < 200; ++i) {
        // Class 1: points around (3, 3)
        X(i, 0) = 3.0 + (rand() % 100 - 50) / 50.0;
        X(i, 1) = 3.0 + (rand() % 100 - 50) / 50.0;
        y(i) = 1.0;
    }

    // Train logistic regression
    LogisticRegressionBinary<double> model;
    model.fit(
        X, y,
        0.1,    // learning rate
        1000,   // max iterations
        1e-6    // convergence tolerance
    );

    // Predict probabilities
    Vector probabilities = model.predict_proba(X);
    
    // Predict class labels
    Vector predictions = model.predict(X, 0.5);

    // Calculate accuracy
    int correct = 0;
    for (int i = 0; i < y.size(); ++i) {
        if (predictions(i) == y(i)) correct++;
    }
    
    std::cout << "Training accuracy: " 
              << (100.0 * correct / y.size()) << "%" << std::endl;
    
    std::cout << "Intercept: " << model.intercept() << std::endl;
    std::cout << "Coefficients: " << model.coefficients().transpose() << std::endl;

    return 0;
}
Expected output:
Training accuracy: 98.5%
Intercept: -4.52134
Coefficients: -4.52134  1.50711  1.50893

Model validation

MLPP provides comprehensive validation tools:
validation_example.cpp
#include <mlpp.h>
#include <iostream>

using namespace mlpp::classifiers;
using namespace mlpp::model_validation;

int main() {
    // ... train model and get predictions ...
    
    // Create confusion matrix
    ConfusionMatrix<int> cm(2);  // 2 classes
    
    for (int i = 0; i < y.size(); ++i) {
        cm.update(y(i), predictions(i));
    }
    
    // Compute metrics
    Metrics<ConfusionMatrix<int>> metrics(cm);
    
    std::cout << "Precision (class 0): " << metrics.precision(0) << std::endl;
    std::cout << "Recall (class 0): " << metrics.recall(0) << std::endl;
    std::cout << "F1 score (class 0): " << metrics.f1(0) << std::endl;
    std::cout << "Macro F1: " << metrics.macro_f1() << std::endl;
    std::cout << "Micro F1: " << metrics.micro_f1() << std::endl;
    
    return 0;
}

Multiclass classification

For problems with more than 2 classes, use LogisticRegressionMulti:
using namespace mlpp::classifiers;

// Labels should be integers: 0, 1, 2, ..., K-1
LogisticRegressionMulti<double> model;
model.fit(
    X, y,
    0.01,   // learning rate
    1000,   // max iterations
    1e-6    // tolerance
);

// Get class probabilities (one column per class)
Matrix proba = model.predict_proba(X_test);

// Get predicted class labels
Vector predictions = model.predict(X_test);
Multiclass logistic regression uses one-vs-rest training internally, fitting K independent binary classifiers.

Dimensionality reduction

Reduce feature dimensions with PCA:
pca_example.cpp
#include <mlpp.h>
#include <iostream>

int main() {
    // High-dimensional data: 100 samples, 50 features
    Eigen::MatrixXd X(100, 50);
    // ... populate X ...
    
    // Reduce to 10 principal components
    PCA pca(10);
    pca.fit(X);
    
    // Transform data to lower dimension
    Eigen::MatrixXd X_reduced = pca.transform(X);
    std::cout << "Reduced shape: " << X_reduced.rows() 
              << " x " << X_reduced.cols() << std::endl;
    
    // Reconstruct (lossy)
    Eigen::MatrixXd X_reconstructed = pca.inverse_transform(X_reduced);
    
    // Access components
    Eigen::MatrixXd components = pca.get_components();
    
    return 0;
}

Complete project structure

Here’s a recommended project layout:
my_ml_project/
├── CMakeLists.txt
├── src/
│   └── main.cpp
└── external/
    └── mlpp/  (git submodule)
CMakeLists.txt:
cmake_minimum_required(VERSION 3.20)
project(MyMLProject VERSION 1.0 LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Option 1: Use installed MLPP
find_package(mlpp 0.3 REQUIRED)

# Option 2: Use as subdirectory
# add_subdirectory(external/mlpp)

add_executable(my_app src/main.cpp)
target_link_libraries(my_app PRIVATE mlpp::mlpp)

Next steps

API reference

Explore the complete API documentation for all algorithms

Advanced examples

Learn about SVM kernels, cross-validation, and hyperparameter tuning

Performance tips

Optimize your models for production use

Contributing

Contribute new algorithms or improvements to MLPP

Build docs developers (and LLMs) love