Skip to main content

Overview

CryptoView Pro provides a flexible architecture for creating custom prediction models. This guide shows you how to extend the existing XGBoost, Prophet, and Hybrid model classes to build specialized predictors tailored to your trading strategy.

Model Architecture

Model Class Structure

All models in CryptoView Pro follow a consistent interface:
  • train(df) - Train the model on historical data
  • predict_future(df, periods) - Generate future predictions
  • Feature engineering - Transform raw OHLCV data into ML features
  • Validation - Built-in backtesting and metrics

Extending XGBoost Models

Basic Custom XGBoost Model

The XGBoost predictor uses gradient boosting for short-term predictions (1-72 hours). Here’s how to extend it:
from models.xgboost_model import XGBoostCryptoPredictor
import pandas as pd
import numpy as np

class CustomXGBoostPredictor(XGBoostCryptoPredictor):
    """
    Custom XGBoost model with additional features
    """
    
    def __init__(self, n_estimators=300, learning_rate=0.05):
        # Initialize with custom hyperparameters
        super().__init__(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=8,  # Deeper trees for more complex patterns
            subsample=0.85,
            colsample_bytree=0.85
        )
    
    def create_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Override to add custom features
        """
        # First, get all base features
        df = super().create_features(df)
        
        # Add custom features
        # 1. Order book imbalance (if you have bid/ask data)
        # df['order_imbalance'] = (df['bid_volume'] - df['ask_volume']) / \
        #                         (df['bid_volume'] + df['ask_volume'])
        
        # 2. Price acceleration (rate of change of returns)
        df['return_acceleration'] = df['return_1'].diff()
        
        # 3. Volume-weighted price momentum
        if 'volume' in df.columns:
            df['vwap_7'] = (df['close'] * df['volume']).rolling(7).sum() / \
                          df['volume'].rolling(7).sum()
            df['price_to_vwap'] = df['close'] / df['vwap_7']
        
        # 4. Volatility regime (low/medium/high)
        vol_rolling = df['return_1'].rolling(24).std()
        df['volatility_regime'] = pd.qcut(
            vol_rolling, 
            q=3, 
            labels=[0, 1, 2],
            duplicates='drop'
        ).astype(float)
        
        # 5. Support/Resistance levels
        df['distance_to_24h_high'] = (df['high'].rolling(24).max() - df['close']) / df['close']
        df['distance_to_24h_low'] = (df['close'] - df['low'].rolling(24).min()) / df['close']
        
        # 6. Fractal indicators (local peaks/troughs)
        df['is_local_max'] = ((df['high'] > df['high'].shift(1)) & 
                              (df['high'] > df['high'].shift(-1))).astype(int)
        df['is_local_min'] = ((df['low'] < df['low'].shift(1)) & 
                              (df['low'] < df['low'].shift(-1))).astype(int)
        
        return df

Adding Custom Trading Signals

class SignalEnhancedPredictor(CustomXGBoostPredictor):
    """
    Predictor with integrated trading signals
    """
    
    def create_features(self, df: pd.DataFrame) -> pd.DataFrame:
        df = super().create_features(df)
        
        # Golden Cross / Death Cross signals
        df['ma_50'] = df['close'].rolling(50).mean()
        df['ma_200'] = df['close'].rolling(200).mean()
        df['golden_cross'] = ((df['ma_50'] > df['ma_200']) & 
                              (df['ma_50'].shift(1) <= df['ma_200'].shift(1))).astype(int)
        df['death_cross'] = ((df['ma_50'] < df['ma_200']) & 
                             (df['ma_50'].shift(1) >= df['ma_200'].shift(1))).astype(int)
        
        # RSI divergence
        if 'rsi' in df.columns:
            price_peaks = df['close'].rolling(14).max()
            rsi_peaks = df['rsi'].rolling(14).max()
            df['rsi_divergence'] = (price_peaks.pct_change() - rsi_peaks.pct_change()) / 100
        
        # MACD momentum
        if 'macd' in df.columns and 'macd_signal' in df.columns:
            df['macd_momentum'] = df['macd'] - df['macd'].shift(3)
            df['macd_crossover'] = ((df['macd'] > df['macd_signal']) & 
                                    (df['macd'].shift(1) <= df['macd_signal'].shift(1))).astype(int)
        
        return df

    def predict_with_confidence(self, df: pd.DataFrame, periods: int = 24):
        """
        Enhanced prediction with confidence scores
        """
        predictions = self.predict_future(df, periods)
        
        # Add confidence based on recent model performance
        recent_errors = []
        for i in range(min(10, len(df) - 24)):
            test_point = df.iloc[-(i+25):-i-1] if i > 0 else df.iloc[-25:]
            pred = self.predict_future(test_point, 1)
            actual = df['close'].iloc[-(i+1)]
            error = abs(pred['predicted_price'].iloc[0] - actual) / actual
            recent_errors.append(error)
        
        avg_error = np.mean(recent_errors)
        predictions['confidence'] = 1 / (1 + avg_error)
        
        return predictions

Extending Prophet Models

Custom Prophet with Additional Regressors

Prophet excels at long-term forecasting (1 week - 1 month). Add external factors:
from models.prophet_model import ProphetCryptoPredictor
import pandas as pd

class EnhancedProphetPredictor(ProphetCryptoPredictor):
    """
    Prophet with additional regressors for external factors
    """
    
    def __init__(self, **kwargs):
        super().__init__(
            changepoint_prior_scale=0.8,  # More flexible for crypto
            seasonality_prior_scale=15,
            **kwargs
        )
        self.regressors_added = False
    
    def add_custom_regressors(self):
        """
        Add regressors for external factors
        """
        if not self.regressors_added:
            # Volume as regressor
            self.model.add_regressor('volume_normalized')
            
            # Volatility as regressor
            self.model.add_regressor('volatility')
            
            # Market sentiment (if available)
            # self.model.add_regressor('fear_greed_index')
            
            self.regressors_added = True
    
    def prepare_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prepare data with additional regressors
        """
        prophet_df = super().prepare_data(df)
        
        # Add volume (normalized)
        if 'volume' in df.columns:
            volume_normalized = (df['volume'] - df['volume'].mean()) / df['volume'].std()
            prophet_df['volume_normalized'] = volume_normalized.values
        else:
            prophet_df['volume_normalized'] = 0
        
        # Add volatility
        returns = df['close'].pct_change()
        volatility = returns.rolling(24).std().fillna(0)
        prophet_df['volatility'] = volatility.values
        
        return prophet_df
    
    def train(self, df: pd.DataFrame):
        """
        Train with custom regressors
        """
        self.add_custom_regressors()
        return super().train(df)

Multi-Cryptocurrency Prophet Model

class MultiCryptoProphet:
    """
    Train Prophet models for multiple cryptocurrencies
    and use correlation for improved predictions
    """
    
    def __init__(self, crypto_symbols: list):
        self.symbols = crypto_symbols
        self.models = {symbol: EnhancedProphetPredictor() 
                      for symbol in crypto_symbols}
        self.correlation_matrix = None
    
    def train_all(self, data_dict: dict):
        """
        Train models for all cryptocurrencies
        
        Args:
            data_dict: {symbol: dataframe} mapping
        """
        results = {}
        
        for symbol in self.symbols:
            if symbol in data_dict:
                print(f"Training {symbol}...")
                results[symbol] = self.models[symbol].train(data_dict[symbol])
        
        # Calculate correlation matrix
        self._calculate_correlations(data_dict)
        
        return results
    
    def _calculate_correlations(self, data_dict: dict):
        """
        Calculate price correlations between cryptocurrencies
        """
        prices = pd.DataFrame({
            symbol: data_dict[symbol]['close'] 
            for symbol in self.symbols if symbol in data_dict
        })
        
        self.correlation_matrix = prices.corr()
    
    def predict_ensemble(self, symbol: str, periods: int):
        """
        Predict using correlation-weighted ensemble
        """
        # Get primary prediction
        primary_pred = self.models[symbol].predict_future(periods)
        
        # Get predictions from correlated assets
        if self.correlation_matrix is not None:
            correlations = self.correlation_matrix[symbol].drop(symbol)
            
            # Weight by correlation strength
            weighted_pred = primary_pred['predicted_price'].copy()
            
            for other_symbol, correlation in correlations.items():
                if abs(correlation) > 0.5:  # Only use strong correlations
                    other_pred = self.models[other_symbol].predict_future(periods)
                    weighted_pred += other_pred['predicted_price'] * correlation * 0.2
            
            primary_pred['predicted_price'] = weighted_pred / (1 + 0.2 * sum(abs(correlations[abs(correlations) > 0.5])))
        
        return primary_pred

Creating Custom Hybrid Models

Adaptive Hybrid with Dynamic Weighting

from models.hybrid_model import HybridCryptoPredictor
import numpy as np

class AdaptiveHybridPredictor(HybridCryptoPredictor):
    """
    Hybrid model with dynamic weights based on recent performance
    """
    
    def __init__(self):
        super().__init__()
        self.xgboost = CustomXGBoostPredictor()  # Use our custom XGBoost
        self.prophet = EnhancedProphetPredictor()  # Use enhanced Prophet
        self.performance_history = []
    
    def predict_future(self, df: pd.DataFrame, periods: int):
        """
        Adaptive prediction with dynamic model selection
        """
        predictions = super().predict_future(df, periods)
        
        # Calculate dynamic weights based on recent accuracy
        if len(self.performance_history) >= 5:
            xgb_accuracy = np.mean([p['xgboost_mape'] for p in self.performance_history[-5:]])
            prophet_accuracy = np.mean([p['prophet_mape'] for p in self.performance_history[-5:]])
            
            # Lower MAPE = better accuracy = higher weight
            total_accuracy = xgb_accuracy + prophet_accuracy
            xgb_weight = prophet_accuracy / total_accuracy  # Inverse weighting
            prophet_weight = xgb_accuracy / total_accuracy
        else:
            # Default weights
            xgb_weight = max(0, 1 - (periods / 168))
            prophet_weight = 1 - xgb_weight
        
        # Recalculate hybrid prediction with dynamic weights
        if 'xgboost' in predictions and 'prophet' in predictions:
            combined = predictions['prophet'].copy()
            xgb_pred = predictions['xgboost']['predicted_price']
            prophet_pred = predictions['prophet']['predicted_price']
            
            overlap_len = min(len(xgb_pred), len(prophet_pred))
            combined.loc[combined.index[:overlap_len], 'predicted_price'] = (
                xgb_pred[:overlap_len].values * xgb_weight +
                prophet_pred[:overlap_len].values * prophet_weight
            )
            
            predictions['hybrid'] = combined
            predictions['dynamic_weights'] = {
                'xgboost': xgb_weight, 
                'prophet': prophet_weight
            }
        
        return predictions
    
    def update_performance(self, actual_price: float, 
                          xgb_prediction: float, 
                          prophet_prediction: float):
        """
        Update performance history for adaptive weighting
        """
        xgb_error = abs(actual_price - xgb_prediction) / actual_price * 100
        prophet_error = abs(actual_price - prophet_prediction) / actual_price * 100
        
        self.performance_history.append({
            'xgboost_mape': xgb_error,
            'prophet_mape': prophet_error
        })
        
        # Keep only last 20 predictions
        if len(self.performance_history) > 20:
            self.performance_history = self.performance_history[-20:]

Using Custom Models in the App

Integration Example

# In app.py or a custom script
import streamlit as st
from your_models import AdaptiveHybridPredictor

# Initialize custom model
if 'custom_predictor' not in st.session_state:
    st.session_state.custom_predictor = AdaptiveHybridPredictor()

# Train
if st.button("Train Custom Model"):
    with st.spinner("Training adaptive hybrid..."):
        training_info = st.session_state.custom_predictor.train(df)
        st.success(f"Trained on {training_info['data_points']} points")
        st.json(training_info)

# Predict
if st.button("Predict with Custom Model"):
    predictions = st.session_state.custom_predictor.predict_future(df, periods=168)
    
    # Show dynamic weights
    if 'dynamic_weights' in predictions:
        st.info(f"Dynamic Weights: {predictions['dynamic_weights']}")
    
    # Display prediction
    best_pred = st.session_state.custom_predictor.get_best_prediction(predictions)
    st.line_chart(best_pred['predicted_price'])

Best Practices

  • Keep it simple first: Start with base features, add complexity gradually
  • Domain knowledge: Use trading knowledge to create meaningful features
  • Avoid leakage: Never use future information in features
  • Handle NaN: Features with rolling windows create NaN at the start
  • Scale appropriately: XGBoost handles unscaled features, but normalization can help
  • XGBoost: Best for short-term (1-72h), high-frequency patterns
  • Prophet: Best for long-term (1 week+), captures seasonality
  • Hybrid: Best for medium-term (3 days - 2 weeks)
  • Custom: When you have specific domain knowledge or data sources
  • Time-series split: Never use random train/test split
  • Walk-forward validation: Test on multiple out-of-sample periods
  • Direction accuracy: More important than exact price for trading
  • Backtest thoroughly: At least 3-6 months of historical data
  • Save models: Use joblib.dump(model, 'model.pkl') to persist
  • Versioning: Track model versions and parameters
  • Monitoring: Log predictions vs actuals for performance tracking
  • Retraining: Retrain weekly/monthly as market conditions change
  • Fallback: Always have a backup model or strategy

Advanced: Custom Loss Functions

import xgboost as xgb

def custom_asymmetric_loss(y_true, y_pred):
    """
    Custom loss that penalizes underestimation more than overestimation
    Useful for long positions where missing upside is costly
    """
    residual = y_true - y_pred
    grad = np.where(residual > 0, -2.0 * residual, -0.5 * residual)
    hess = np.where(residual > 0, 2.0, 0.5)
    return grad, hess

class AsymmetricXGBoost(CustomXGBoostPredictor):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.model.set_params(objective=custom_asymmetric_loss)

Next Steps

Feature Engineering

Deep dive into advanced feature engineering techniques

Production Deployment

Deploy your custom models to production

API Reference

Complete API documentation for all model classes

Examples

More custom model examples and use cases

Build docs developers (and LLMs) love