Multi-Chamber Heart Analysis
TheMultiChamberPipeline performs chamber-specific analysis to identify patterns unique to each cardiac chamber (RA, RV, LA, LV).
Overview
This pipeline analyzes:- Chamber-specific cell type composition
- Chamber-specific marker genes
- Cross-chamber correlation patterns
- Chamber-to-chamber differential expression
Prerequisites
Your data must include chamber annotations in
adata.obs['chamber'] with values: RA, RV, LA, LVimport scanpy as sc
# Load your data
adata = sc.read_h5ad('data/heart_data.h5ad')
# Check for chamber information
if 'chamber' in adata.obs.columns:
print("Chamber distribution:")
print(adata.obs['chamber'].value_counts())
else:
print("Warning: No chamber annotations found!")
print("Available metadata:", adata.obs.columns.tolist())
import pandas as pd
# Option 1: From sample metadata
# Map sample IDs to chambers
chamber_map = {
'sample_1': 'RA',
'sample_2': 'RV',
'sample_3': 'LA',
'sample_4': 'LV'
}
adata.obs['chamber'] = adata.obs['sample'].map(chamber_map)
# Option 2: From cell names (if encoded)
# Example: cell names like "RA_AAACCTGAG..."
adata.obs['chamber'] = adata.obs_names.str.split('_').str[0]
# Option 3: Manual assignment based on metadata
def infer_chamber(row):
tissue = row['tissue'].upper()
if 'RIGHT ATRI' in tissue or 'RA' in tissue:
return 'RA'
elif 'RIGHT VENT' in tissue or 'RV' in tissue:
return 'RV'
elif 'LEFT ATRI' in tissue or 'LA' in tissue:
return 'LA'
elif 'LEFT VENT' in tissue or 'LV' in tissue:
return 'LV'
return 'Unknown'
adata.obs['chamber'] = adata.obs.apply(infer_chamber, axis=1)
# Verify
print("Chamber distribution:")
print(adata.obs['chamber'].value_counts())
# Save annotated data
adata.write('data/heart_data_with_chambers.h5ad')
from heartmap import Config
config = Config.default()
config.update_paths('./multi_chamber_analysis')
config.create_directories()
from heartmap.pipelines import MultiChamberPipeline
# Initialize pipeline
pipeline = MultiChamberPipeline(config)
# Run analysis
results = pipeline.run(
data_path='data/heart_data_with_chambers.h5ad',
output_dir='results/multi_chamber'
)
print("Multi-chamber analysis completed!")
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
adata = results['adata']
# Create composition table
composition = pd.crosstab(
adata.obs['leiden'],
adata.obs['chamber'],
normalize='columns'
) * 100 # Convert to percentages
print("Cell type composition by chamber (%):\n")
print(composition.round(1))
# Visualize as heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(composition, annot=True, fmt='.1f',
cmap='YlOrRd', cbar_kws={'label': '% of cells'})
plt.title('Cell Type Composition Across Chambers')
plt.xlabel('Chamber')
plt.ylabel('Cluster')
plt.tight_layout()
plt.savefig('results/multi_chamber/composition_heatmap.png', dpi=300)
import scanpy as sc
# Find marker genes for each chamber
sc.tl.rank_genes_groups(
adata,
groupby='chamber',
method='wilcoxon',
key_added='chamber_markers'
)
# Extract top markers per chamber
for chamber in ['RA', 'RV', 'LA', 'LV']:
print(f"\nTop 10 markers for {chamber}:")
markers = sc.get.rank_genes_groups_df(
adata,
group=chamber,
key='chamber_markers'
).head(10)
for _, row in markers.iterrows():
print(f" {row['names']}: "
f"log2fc={row['logfoldchanges']:.2f}, "
f"padj={row['pvals_adj']:.2e}")
# Save all markers
for chamber in adata.obs['chamber'].unique():
markers = sc.get.rank_genes_groups_df(
adata,
group=chamber,
key='chamber_markers'
)
markers.to_csv(
f'results/multi_chamber/markers_{chamber}.csv',
index=False
)
import numpy as np
from scipy.stats import pearsonr
# Calculate mean expression per chamber
chamber_profiles = {}
for chamber in ['RA', 'RV', 'LA', 'LV']:
chamber_mask = adata.obs['chamber'] == chamber
if hasattr(adata.X, 'toarray'):
mean_expr = np.asarray(adata.X[chamber_mask].mean(axis=0)).flatten()
else:
mean_expr = adata.X[chamber_mask].mean(axis=0)
chamber_profiles[chamber] = mean_expr
# Compute pairwise correlations
chambers = ['RA', 'RV', 'LA', 'LV']
corr_matrix = pd.DataFrame(
index=chambers,
columns=chambers,
dtype=float
)
for i, ch1 in enumerate(chambers):
for j, ch2 in enumerate(chambers):
if i == j:
corr_matrix.loc[ch1, ch2] = 1.0
else:
corr, _ = pearsonr(
chamber_profiles[ch1],
chamber_profiles[ch2]
)
corr_matrix.loc[ch1, ch2] = corr
print("\nCross-chamber correlations:")
print(corr_matrix.round(3))
# Visualize
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix.astype(float),
annot=True, fmt='.3f',
cmap='coolwarm', center=0.9,
vmin=0.8, vmax=1.0)
plt.title('Cross-Chamber Expression Correlations')
plt.tight_layout()
plt.savefig('results/multi_chamber/chamber_correlations.png', dpi=300)
import scanpy as sc
# UMAP colored by chamber
sc.pl.umap(adata, color='chamber',
palette={'RA': '#FF6B6B', 'RV': '#4ECDC4',
'LA': '#45B7D1', 'LV': '#96CEB4'},
title='Cells by Chamber',
save='_by_chamber.png')
# Split UMAP by chamber
sc.pl.umap(adata, color='leiden',
ncols=2, wspace=0.3,
split_show=['RA', 'RV', 'LA', 'LV'],
save='_split_by_chamber.png')
# Chamber-specific marker expression
chamber_markers = {
'RA': ['NPPA', 'MIR100HG', 'MYL7'],
'RV': ['NEAT1', 'MYH7', 'FHL2'],
'LA': ['NPPA', 'ELN', 'EBF2'],
'LV': ['CD36', 'FHL2', 'MYH7']
}
# Plot marker genes
for chamber, genes in chamber_markers.items():
# Filter genes present in data
genes_present = [g for g in genes if g in adata.var_names]
if genes_present:
sc.pl.umap(adata, color=genes_present,
ncols=3, cmap='viridis',
title=f'{chamber} Markers',
save=f'_{chamber}_markers.png')
# Save chamber-specific subsets
for chamber in ['RA', 'RV', 'LA', 'LV']:
chamber_data = adata[adata.obs['chamber'] == chamber].copy()
chamber_data.write(
f'results/multi_chamber/data_{chamber}.h5ad'
)
print(f"{chamber}: {chamber_data.n_obs} cells saved")
# Export summary statistics
summary = []
for chamber in adata.obs['chamber'].unique():
chamber_mask = adata.obs['chamber'] == chamber
summary.append({
'chamber': chamber,
'n_cells': chamber_mask.sum(),
'n_cell_types': adata.obs[chamber_mask]['leiden'].nunique(),
'mean_genes': adata.obs[chamber_mask]['n_genes'].mean(),
'mean_counts': adata.obs[chamber_mask]['total_counts'].mean()
})
summary_df = pd.DataFrame(summary)
summary_df.to_csv('results/multi_chamber/chamber_summary.csv', index=False)
print("\nChamber Summary:")
print(summary_df)
Complete Working Example
Expected Output Structure
Scientific Context
Known Chamber-Specific Markers
Right Atrium (RA)
Right Atrium (RA)
- NPPA: Atrial natriuretic peptide
- MIR100HG: microRNA host gene
- MYL7: Myosin light chain 7
- PDE4D: Phosphodiesterase 4D
Right Ventricle (RV)
Right Ventricle (RV)
- NEAT1: Nuclear paraspeckle assembly
- MYH7: Myosin heavy chain 7
- FHL2: Four and a half LIM domains 2
- PCDH7: Protocadherin 7
Left Atrium (LA)
Left Atrium (LA)
- NPPA: Shared with RA
- ELN: Elastin
- EBF2: Early B-cell factor 2
- RORA: RAR-related orphan receptor A
Left Ventricle (LV)
Left Ventricle (LV)
- CD36: Fatty acid transporter
- FHL2: Shared with RV
- MYH7: Shared with RV
- TTN: Titin
Expected Correlations
Based on published data:- RV vs LV: r ≈ 0.985 (highest, both ventricles)
- RA vs LA: r ≈ 0.960 (both atria)
- LA vs LV: r ≈ 0.870 (lowest, different chambers)
Best Practices
Data Quality
- Ensure balanced cell numbers across chambers
- Minimum 1,000 cells per chamber recommended
- Verify chamber annotations are accurate
Statistical Power
- Use appropriate multiple testing correction
- Consider chamber-specific batch effects
- Account for donor-to-donor variation
Biological Interpretation
- Validate findings with chamber physiology
- Consider functional differences (atria vs ventricles)
- Check for known disease markers
Next Steps
Comprehensive Pipeline
Combine all analyses
Visualization
Advanced plotting
API Reference
Full documentation