Overview
Phase 3 is the critical transformation step where raw data from Phases 1 and 2 is analyzed and merged into the masterall_stocks_fundamental_analysis.json file. This single script (bulk_market_analyzer.py) builds the base JSON structure with ~60 fields per stock.
Critical Script: If
bulk_market_analyzer.py fails, the pipeline aborts. All Phase 4 scripts depend on this output.Script: bulk_market_analyzer.py
Location:bulk_market_analyzer.py
Input Dependencies:
fundamental_data.json(Phase 1)dhan_data_response.json(Phase 1)advanced_indicator_data.json(Phase 2)nse_equity_list.csv(Phase 1)
all_stocks_fundamental_analysis.json- Base master JSON
Data Integration Flow
Core Analysis Logic
1. Data Loading
def analyze_all_stocks():
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
input_file = os.path.join(BASE_DIR, "fundamental_data.json")
ADVANCED_FILE = os.path.join(BASE_DIR, "advanced_indicator_data.json")
output_file = os.path.join(BASE_DIR, "all_stocks_fundamental_analysis.json")
# Load fundamental data
with open(input_file, "r") as f:
data = json.load(f)
# Load listing dates
listing_date_map = {}
csv_path = os.path.join(BASE_DIR, "nse_equity_list.csv")
with open(csv_path, "r") as f:
reader = csv.DictReader(f)
for row in reader:
sym = row.get("SYMBOL")
date_list = row.get(" DATE OF LISTING") or row.get("DATE OF LISTING")
if sym and date_list:
listing_date_map[sym] = date_list
# Load technical data from Dhan
dhan_tech_map = {}
dhan_file = os.path.join(BASE_DIR, "dhan_data_response.json")
with open(dhan_file, "r") as f:
dhan_data = json.load(f)
for item in dhan_data:
sym = item.get("Sym")
if sym:
dhan_tech_map[sym] = item
# Load advanced indicators
adv_tech_map = {}
with open(ADVANCED_FILE, "r") as f:
adv_data = json.load(f)
for item in adv_data:
sym = item.get("Symbol")
if sym:
adv_tech_map[sym] = item
2. Helper Functions
def get_float(value_str):
"""Safely convert string to float."""
try:
return float(value_str)
except (ValueError, TypeError):
return 0.0
def calculate_change(current, previous):
"""Calculate percentage change."""
if previous == 0:
return 0.0
return ((current - previous) / abs(previous)) * 100
def get_value_from_pipe_string(pipe_string, index):
"""Extract value from pipe-separated string.
Example: "18234.5|17123.2|16345.8" with index=0 returns 18234.5
"""
if not pipe_string:
return 0.0
parts = pipe_string.split('|')
if index < len(parts):
return get_float(parts[index])
return 0.0
3. Per-Stock Analysis Loop
analyzed_data = []
for item in data: # Loop through fundamental_data.json
symbol = item.get("Symbol", "UNKNOWN")
tech = dhan_tech_map.get(symbol, {})
adv_tech = adv_tech_map.get(symbol, {})
# Extract data sections
cq = item.get("incomeStat_cq", {}) # Quarterly P&L
cy = item.get("incomeStat_cy", {}) # Annual P&L
ttm_cy = item.get("TTM_cy", {}) # Trailing 12 months
cv = item.get("CV", {}) # Company variables
roce_roe = item.get("roce_roe", {}) # Return ratios
shp = item.get("sHp", {}) # Shareholding pattern
bs_c = item.get("bs_c", {}) # Balance sheet
# ... Analysis continues
4. Fundamental Metrics Extraction
Profit & Loss Analysis
Profit & Loss Analysis
# Latest 5 quarters of Net Profit
np_latest = get_value_from_pipe_string(cq.get("NET_PROFIT"), 0)
np_prev = get_value_from_pipe_string(cq.get("NET_PROFIT"), 1)
np_2_back = get_value_from_pipe_string(cq.get("NET_PROFIT"), 2)
np_3_back = get_value_from_pipe_string(cq.get("NET_PROFIT"), 3)
np_last_year_q = get_value_from_pipe_string(cq.get("NET_PROFIT"), 4)
# Calculate QoQ and YoY growth
qoq_np = calculate_change(np_latest, np_prev)
yoy_np = calculate_change(np_latest, np_last_year_q)
EPS Analysis
EPS Analysis
# Quarterly EPS
eps_latest = get_value_from_pipe_string(cq.get("EPS"), 0)
eps_prev = get_value_from_pipe_string(cq.get("EPS"), 1)
eps_last_year_q = get_value_from_pipe_string(cq.get("EPS"), 4)
# Annual EPS
eps_last_year_annual = get_value_from_pipe_string(cy.get("EPS"), 0)
eps_2_years_back_annual = get_value_from_pipe_string(cy.get("EPS"), 1)
# Growth rates
qoq_eps = calculate_change(eps_latest, eps_prev)
yoy_eps = calculate_change(eps_latest, eps_last_year_q)
Revenue Analysis
Revenue Analysis
# Quarterly Sales
sales_latest = get_value_from_pipe_string(cq.get("SALES"), 0)
sales_prev = get_value_from_pipe_string(cq.get("SALES"), 1)
sales_last_year_q = get_value_from_pipe_string(cq.get("SALES"), 4)
# Annual Sales for 5-year CAGR
sales_current_annual = get_value_from_pipe_string(cy.get("SALES"), 0)
sales_5_years_ago = get_value_from_pipe_string(cy.get("SALES"), 5)
# Calculate 5-year CAGR
sales_growth_5y = 0.0
if sales_5_years_ago > 0:
sales_growth_5y = ((sales_current_annual / sales_5_years_ago) ** (1/5) - 1) * 100
Operating Margin Analysis
Operating Margin Analysis
# Quarterly OPM
opm_latest = get_value_from_pipe_string(cq.get("OPM"), 0)
opm_prev = get_value_from_pipe_string(cq.get("OPM"), 1)
opm_last_year_q = get_value_from_pipe_string(cq.get("OPM"), 4)
opm_ttm = get_float(ttm_cy.get("OPM"))
# QoQ and YoY changes
qoq_opm = calculate_change(opm_latest, opm_prev)
yoy_opm = calculate_change(opm_latest, opm_last_year_q)
5. Valuation Ratios
# Return ratios
roe = get_float(roce_roe.get("ROE"))
roce = get_float(roce_roe.get("ROCE"))
# Valuation multiples
pe = get_float(cv.get("STOCK_PE"))
# Debt-to-Equity
non_current_liab = get_value_from_pipe_string(bs_c.get("NON_CURRENT_LIABILITIES"), 0)
total_equity = get_value_from_pipe_string(bs_c.get("TOTAL_EQUITY"), 0)
de_ratio = non_current_liab / total_equity if total_equity != 0 else 0.0
# PEG Ratio
peg = 0.0
if yoy_eps > 0 and pe > 0:
peg = pe / yoy_eps
# Forward P/E
forward_pe = 0.0
if eps_latest > 0 and pe > 0:
annualized_eps = eps_latest * 4
ttm_eps = get_float(ttm_cy.get("EPS"))
if annualized_eps > 0:
forward_pe = pe * (ttm_eps / annualized_eps)
6. Shareholding Patterns
# FII Holdings
fii_latest = get_value_from_pipe_string(shp.get("FII"), 0)
fii_prev = get_value_from_pipe_string(shp.get("FII"), 1)
fii_change_qoq = fii_latest - fii_prev
# DII Holdings
dii_latest = get_value_from_pipe_string(shp.get("DII"), 0)
dii_prev = get_value_from_pipe_string(shp.get("DII"), 1)
dii_change_qoq = dii_latest - dii_prev
# Free Float
promoter_latest = get_value_from_pipe_string(shp.get("PROMOTER"), 0)
free_float_pct = 100.0 - promoter_latest
# Float Shares (in crores)
float_shares_cr = 0.0
if mcap_cr > 0 and ltp > 0:
total_shares_cr = mcap_cr / ltp
float_shares_cr = total_shares_cr * (free_float_pct / 100.0)
7. Technical Indicators
# Price and Moving Averages
ltp = get_float(tech.get("Ltp", 0))
sma_200 = get_float(tech.get("DaySMA200CurrentCandle", 0))
sma_50 = get_float(tech.get("DaySMA50CurrentCandle", 0))
rsi_14 = get_float(tech.get("DayRSI14CurrentCandle", 0))
# Distance from SMAs
pct_from_sma_200 = 0.0
if sma_200 > 0 and ltp > 0:
pct_from_sma_200 = ((ltp - sma_200) / sma_200) * 100
pct_from_sma_50 = 0.0
if sma_50 > 0 and ltp > 0:
pct_from_sma_50 = ((ltp - sma_50) / sma_50) * 100
# 52-week high
high_52w = get_float(tech.get("High1Yr", 0))
pct_from_52w_high = 0.0
if high_52w > 0 and ltp > 0:
pct_from_52w_high = ((ltp - high_52w) / high_52w) * 100
8. Index Membership
# Requested indices (from pipeline design)
requested_indices = {
13, 51, 38, 17, 18, 19, 20, 37, 1, 442, 443, 22, 5, 3, 444, 7, 14,
25, 27, 28, 447, 35, 41, 46, 44, 16, 43, 42, 45, 39, 466, 34, 32, 15, 33, 31, 30, 29
}
indices_found = []
idx_list_raw = tech.get("idxlist", [])
if isinstance(idx_list_raw, list):
for idx_obj in idx_list_raw:
idx_id = idx_obj.get("Indexid")
idx_name = idx_obj.get("Name")
if idx_id in requested_indices and idx_name:
indices_found.append(idx_name)
- 13 = NIFTY 50
- 51 = NIFTY 100
- 1 = NIFTY 500
- 442 = NIFTY MIDCAP 100
- 443 = NIFTY SMALLCAP 100
9. Advanced Technical Indicators
# SMA Status with % distance
sma_signals = []
smas = adv_tech.get("SMA", [])
target_smas = ["20", "50", "200"]
for s in smas:
ind_name = s.get("Indicator", "").replace("-SMA", "")
val = get_float(s.get("Value"))
if ind_name in target_smas and val > 0 and ltp > 0:
diff = ((ltp - val) / val) * 100
status = "Above" if diff > 0 else "Below"
sma_signals.append(f"SMA {ind_name}: {status} ({round(diff, 1)}%)")
# EMA Status
ema_signals = []
emas = adv_tech.get("EMA", [])
target_emas = ["20", "50", "200"]
for e in emas:
ind_name = e.get("Indicator", "").replace("-EMA", "")
val = get_float(e.get("Value"))
if ind_name in target_emas and val > 0 and ltp > 0:
diff = ((ltp - val) / val) * 100
status = "Above" if diff > 0 else "Below"
ema_signals.append(f"EMA {ind_name}: {status} ({round(diff, 1)}%)")
# Technical Sentiment (RSI, MACD)
tech_inds = adv_tech.get("TechnicalIndicators", [])
sentiment_summary = []
for t in tech_inds:
name = t.get("Indicator", "")
action = t.get("Action", "")
if "RSI" in name:
sentiment_summary.append(f"RSI: {action}")
elif "MACD" in name:
sentiment_summary.append(f"MACD: {action}")
# Pivot Points
pivots = adv_tech.get("Pivots", [])
classic_pivot = "N/A"
if pivots and isinstance(pivots, list):
classic = pivots[0].get("Classic", {})
classic_pivot = classic.get("PP", "N/A")
10. Final Assembly
stock_analysis = {
# Basic Info
"Symbol": symbol,
"Name": item.get("Name", ""),
"Listing Date": listing_date_map.get(symbol, "N/A"),
"Basic Industry": cv.get("INDUSTRY_NAME", "N/A"),
"Sector": cv.get("SECTOR", "N/A"),
"Market Cap(Cr.)": mcap_cr,
"Stock Price(₹)": ltp,
"Index": ", ".join(indices_found) if indices_found else "N/A",
# Latest Quarter Data
"Latest Quarter": latest_quarter_str,
"Net Profit Latest Quarter": np_latest,
"Net Profit Previous Quarter": np_prev,
"QoQ % Net Profit Latest": round(qoq_np, 2),
"YoY % Net Profit Latest": round(yoy_np, 2),
# EPS
"EPS Latest Quarter": eps_latest,
"QoQ % EPS Latest": round(qoq_eps, 2),
"YoY % EPS Latest": round(yoy_eps, 2),
"EPS Last Year": eps_last_year_annual,
# Revenue
"Sales Latest Quarter": sales_latest,
"QoQ % Sales Latest": round(qoq_sales, 2),
"YoY % Sales Latest": round(yoy_sales, 2),
"Sales Growth 5 Years(%)": round(sales_growth_5y, 2),
# Margins
"OPM Latest Quarter": opm_latest,
"QoQ % OPM Latest": round(qoq_opm, 2),
"YoY % OPM Latest": round(yoy_opm, 2),
"OPM TTM(%)": opm_ttm,
# Ratios
"ROE(%)": roe,
"ROCE(%)": roce,
"D/E": round(de_ratio, 2),
"P/E": pe,
"PEG": round(peg, 2),
"Forward P/E": round(forward_pe, 2),
# Shareholding
"FII % change QoQ": round(fii_change_qoq, 2),
"DII % change QoQ": round(dii_change_qoq, 2),
"Free Float(%)": round(free_float_pct, 2),
"Float Shares(Cr.)": round(float_shares_cr, 2),
# Returns
"1 Day Returns(%)": get_float(tech.get("PPerchange", 0)),
"1 Week Returns(%)": get_float(tech.get("PricePerchng1week", 0)),
"1 Month Returns(%)": get_float(tech.get("PricePerchng1mon", 0)),
"3 Month Returns(%)": get_float(tech.get("PricePerchng3mon", 0)),
"1 Year Returns(%)": get_float(tech.get("PricePerchng1year", 0)),
"% from 52W High": round(pct_from_52w_high, 2),
# Technical
"RSI (14)": round(rsi_14, 2),
"Gap Up %": 0.0, # Calculated in Phase 4
"SMA Status": " | ".join(sma_signals),
"EMA Status": " | ".join(ema_signals),
"Technical Sentiment": " | ".join(sentiment_summary),
"Pivot Point": classic_pivot
}
analyzed_data.append(stock_analysis)
Output Structure
Sample Record:{
"Symbol": "RELIANCE",
"Name": "Reliance Industries Ltd",
"Listing Date": "29-NOV-1977",
"Basic Industry": "Refineries & Marketing",
"Sector": "Oil, Gas & Consumable Fuels",
"Market Cap(Cr.)": 1850234.5,
"Stock Price(₹)": 2745.30,
"Index": "NIFTY 50, NIFTY 100, NIFTY 500",
"Latest Quarter": "Q3 FY25",
"Net Profit Latest Quarter": 18234.5,
"QoQ % Net Profit Latest": 6.5,
"YoY % Net Profit Latest": 29.1,
"EPS Latest Quarter": 27.2,
"QoQ % EPS Latest": 6.7,
"YoY % EPS Latest": 29.5,
"Sales Latest Quarter": 234567.8,
"QoQ % Sales Latest": 2.5,
"YoY % Sales Latest": 8.9,
"Sales Growth 5 Years(%)": 12.3,
"OPM Latest Quarter": 15.2,
"OPM TTM(%)": 15.1,
"ROE(%)": 12.5,
"ROCE(%)": 14.2,
"D/E": 0.49,
"P/E": 28.3,
"PEG": 0.96,
"Forward P/E": 27.5,
"FII % change QoQ": 0.3,
"DII % change QoQ": 0.2,
"Free Float(%)": 49.7,
"Float Shares(Cr.)": 335.6,
"1 Day Returns(%)": 1.2,
"1 Week Returns(%)": 3.4,
"1 Month Returns(%)": 5.6,
"3 Month Returns(%)": 8.9,
"1 Year Returns(%)": 15.2,
"% from 52W High": -12.4,
"RSI (14)": 58.3,
"Gap Up %": 0.0,
"SMA Status": "SMA 20: Above (2.4%) | SMA 50: Above (4.7%) | SMA 200: Above (11.7%)",
"EMA Status": "EMA 20: Above (2.2%) | EMA 50: Above (4.8%) | EMA 200: Above (12.0%)",
"Technical Sentiment": "RSI: Buy | MACD: Buy",
"Pivot Point": 2745.0
}
Performance
Typical Runtime: 8-12 seconds for ~5000 stocks Processing Rate: ~500-600 stocks/second Output Size: 15-25 MB (uncompressed JSON)Next Steps
Phase 4: Enrichment Injection
Learn how Phase 4 scripts add advanced metrics and events to this base JSON