Load, parse, and analyze the pipeline’s compressed JSON output in Python, pandas, and command-line tools
The EDL Pipeline produces all_stocks_fundamental_analysis.json.gz, a compressed JSON file containing 86 fields for ~2,775 NSE stocks. This guide shows how to load and work with this data.
import pandas as pdimport gzipimport jsonwith gzip.open('all_stocks_fundamental_analysis.json.gz', 'rt') as f: df = pd.DataFrame(json.load(f))# Average P/E by sectorsector_pe = df.groupby('Sector')['P/E'].agg(['mean', 'median', 'count'])print(sector_pe.sort_values('mean', ascending=False))# Top sectors by average market capsector_mcap = df.groupby('Sector')['Market Cap(Cr.)'].mean().sort_values(ascending=False)print(sector_mcap.head(10))
import gzipimport jsonwith gzip.open('all_stocks_fundamental_analysis.json.gz', 'rt') as f: stocks = json.load(f)# Find stocks with recent results (📊 marker)recent_results = [ s for s in stocks if '📊' in s.get('Event Markers', '')]print(f"Found {len(recent_results)} stocks with recent results:")for stock in recent_results[:20]: print(f"{stock['Symbol']:15} Returns since Earnings: {stock.get('Returns since Earnings(%)', 0)}%")
import gzipimport jsonwith gzip.open('all_stocks_fundamental_analysis.json.gz', 'rt') as f: stocks = json.load(f)# Stocks above all major SMAs with strong momentumstrong_momentum = [ s for s in stocks if 'SMA 20: Above' in s.get('SMA Status', '') and 'SMA 50: Above' in s.get('SMA Status', '') and 'SMA 200: Above' in s.get('SMA Status', '') and s.get('RSI (14)', 0) > 50 and s.get('1 Month Returns(%)', 0) > 5]print(f"Found {len(strong_momentum)} stocks in strong uptrend:")for stock in strong_momentum[:15]: print(f"{stock['Symbol']:15} 1M: {stock['1 Month Returns(%)']:6.2f}% RSI: {stock['RSI (14)']}")
For extremely large datasets, stream processing avoids loading the entire file into memory:
import gzipimport json# Process stocks one at a time (memory-efficient)with gzip.open('all_stocks_fundamental_analysis.json.gz', 'rt') as f: # Read opening bracket f.read(1) high_pe_count = 0 for line in f: if line.strip() in [']', ',']: continue stock = json.loads(line.rstrip(',')) if stock.get('P/E', 0) > 50: high_pe_count += 1 print(f"{stock['Symbol']}: P/E {stock['P/E']}") print(f"\nTotal high P/E stocks: {high_pe_count}")
The current output file (~2 MB compressed, ~50 MB uncompressed) easily fits in memory on most systems. Stream processing is only needed for significantly larger datasets.
import gzipimport jsonwith gzip.open('all_stocks_fundamental_analysis.json.gz', 'rt') as f: stocks = json.load(f)# Required fieldsrequired = ['Symbol', 'Stock Price(₹)', 'Market Cap(Cr.)', 'P/E']missing_data = []for stock in stocks: for field in required: if field not in stock or stock[field] in [None, 0, '']: missing_data.append((stock.get('Symbol', 'UNKNOWN'), field))if missing_data: print(f"Found {len(missing_data)} missing field instances:") for symbol, field in missing_data[:20]: print(f" {symbol}: {field}")else: print("All required fields present!")