Skip to main content

Overview

Phase 3 is the critical transformation step where raw data from Phases 1 and 2 is analyzed and merged into the master all_stocks_fundamental_analysis.json file. This single script (bulk_market_analyzer.py) builds the base JSON structure with ~60 fields per stock.
Critical Script: If bulk_market_analyzer.py fails, the pipeline aborts. All Phase 4 scripts depend on this output.

Script: bulk_market_analyzer.py

Location: bulk_market_analyzer.py Input Dependencies:
  • fundamental_data.json (Phase 1)
  • dhan_data_response.json (Phase 1)
  • advanced_indicator_data.json (Phase 2)
  • nse_equity_list.csv (Phase 1)
Output:
  • all_stocks_fundamental_analysis.json - Base master JSON

Data Integration Flow


Core Analysis Logic

1. Data Loading

def analyze_all_stocks():
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(BASE_DIR, "fundamental_data.json")
    ADVANCED_FILE = os.path.join(BASE_DIR, "advanced_indicator_data.json")
    output_file = os.path.join(BASE_DIR, "all_stocks_fundamental_analysis.json")

    # Load fundamental data
    with open(input_file, "r") as f:
        data = json.load(f)

    # Load listing dates
    listing_date_map = {}
    csv_path = os.path.join(BASE_DIR, "nse_equity_list.csv")
    with open(csv_path, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            sym = row.get("SYMBOL")
            date_list = row.get(" DATE OF LISTING") or row.get("DATE OF LISTING")
            if sym and date_list:
                listing_date_map[sym] = date_list

    # Load technical data from Dhan
    dhan_tech_map = {}
    dhan_file = os.path.join(BASE_DIR, "dhan_data_response.json")
    with open(dhan_file, "r") as f:
        dhan_data = json.load(f)
        for item in dhan_data:
            sym = item.get("Sym")
            if sym:
                dhan_tech_map[sym] = item

    # Load advanced indicators
    adv_tech_map = {}
    with open(ADVANCED_FILE, "r") as f:
        adv_data = json.load(f)
        for item in adv_data:
            sym = item.get("Symbol")
            if sym:
                adv_tech_map[sym] = item

2. Helper Functions

def get_float(value_str):
    """Safely convert string to float."""
    try:
        return float(value_str)
    except (ValueError, TypeError):
        return 0.0

def calculate_change(current, previous):
    """Calculate percentage change."""
    if previous == 0:
        return 0.0
    return ((current - previous) / abs(previous)) * 100

def get_value_from_pipe_string(pipe_string, index):
    """Extract value from pipe-separated string.
    
    Example: "18234.5|17123.2|16345.8" with index=0 returns 18234.5
    """
    if not pipe_string:
        return 0.0
    parts = pipe_string.split('|')
    if index < len(parts):
        return get_float(parts[index])
    return 0.0

3. Per-Stock Analysis Loop

analyzed_data = []

for item in data:  # Loop through fundamental_data.json
    symbol = item.get("Symbol", "UNKNOWN")
    tech = dhan_tech_map.get(symbol, {})
    adv_tech = adv_tech_map.get(symbol, {})
    
    # Extract data sections
    cq = item.get("incomeStat_cq", {})  # Quarterly P&L
    cy = item.get("incomeStat_cy", {})  # Annual P&L
    ttm_cy = item.get("TTM_cy", {})     # Trailing 12 months
    cv = item.get("CV", {})              # Company variables
    roce_roe = item.get("roce_roe", {}) # Return ratios
    shp = item.get("sHp", {})            # Shareholding pattern
    bs_c = item.get("bs_c", {})          # Balance sheet
    
    # ... Analysis continues

4. Fundamental Metrics Extraction

# Latest 5 quarters of Net Profit
np_latest = get_value_from_pipe_string(cq.get("NET_PROFIT"), 0)
np_prev = get_value_from_pipe_string(cq.get("NET_PROFIT"), 1)
np_2_back = get_value_from_pipe_string(cq.get("NET_PROFIT"), 2)
np_3_back = get_value_from_pipe_string(cq.get("NET_PROFIT"), 3)
np_last_year_q = get_value_from_pipe_string(cq.get("NET_PROFIT"), 4)

# Calculate QoQ and YoY growth
qoq_np = calculate_change(np_latest, np_prev)
yoy_np = calculate_change(np_latest, np_last_year_q)
# Quarterly EPS
eps_latest = get_value_from_pipe_string(cq.get("EPS"), 0)
eps_prev = get_value_from_pipe_string(cq.get("EPS"), 1)
eps_last_year_q = get_value_from_pipe_string(cq.get("EPS"), 4)

# Annual EPS
eps_last_year_annual = get_value_from_pipe_string(cy.get("EPS"), 0)
eps_2_years_back_annual = get_value_from_pipe_string(cy.get("EPS"), 1)

# Growth rates
qoq_eps = calculate_change(eps_latest, eps_prev)
yoy_eps = calculate_change(eps_latest, eps_last_year_q)
# Quarterly Sales
sales_latest = get_value_from_pipe_string(cq.get("SALES"), 0)
sales_prev = get_value_from_pipe_string(cq.get("SALES"), 1)
sales_last_year_q = get_value_from_pipe_string(cq.get("SALES"), 4)

# Annual Sales for 5-year CAGR
sales_current_annual = get_value_from_pipe_string(cy.get("SALES"), 0)
sales_5_years_ago = get_value_from_pipe_string(cy.get("SALES"), 5)

# Calculate 5-year CAGR
sales_growth_5y = 0.0
if sales_5_years_ago > 0:
    sales_growth_5y = ((sales_current_annual / sales_5_years_ago) ** (1/5) - 1) * 100
# Quarterly OPM
opm_latest = get_value_from_pipe_string(cq.get("OPM"), 0)
opm_prev = get_value_from_pipe_string(cq.get("OPM"), 1)
opm_last_year_q = get_value_from_pipe_string(cq.get("OPM"), 4)
opm_ttm = get_float(ttm_cy.get("OPM"))

# QoQ and YoY changes
qoq_opm = calculate_change(opm_latest, opm_prev)
yoy_opm = calculate_change(opm_latest, opm_last_year_q)

5. Valuation Ratios

# Return ratios
roe = get_float(roce_roe.get("ROE"))
roce = get_float(roce_roe.get("ROCE"))

# Valuation multiples
pe = get_float(cv.get("STOCK_PE"))

# Debt-to-Equity
non_current_liab = get_value_from_pipe_string(bs_c.get("NON_CURRENT_LIABILITIES"), 0)
total_equity = get_value_from_pipe_string(bs_c.get("TOTAL_EQUITY"), 0)
de_ratio = non_current_liab / total_equity if total_equity != 0 else 0.0

# PEG Ratio
peg = 0.0
if yoy_eps > 0 and pe > 0:
    peg = pe / yoy_eps

# Forward P/E
forward_pe = 0.0
if eps_latest > 0 and pe > 0:
    annualized_eps = eps_latest * 4
    ttm_eps = get_float(ttm_cy.get("EPS"))
    if annualized_eps > 0:
        forward_pe = pe * (ttm_eps / annualized_eps)

6. Shareholding Patterns

# FII Holdings
fii_latest = get_value_from_pipe_string(shp.get("FII"), 0)
fii_prev = get_value_from_pipe_string(shp.get("FII"), 1)
fii_change_qoq = fii_latest - fii_prev

# DII Holdings
dii_latest = get_value_from_pipe_string(shp.get("DII"), 0)
dii_prev = get_value_from_pipe_string(shp.get("DII"), 1)
dii_change_qoq = dii_latest - dii_prev

# Free Float
promoter_latest = get_value_from_pipe_string(shp.get("PROMOTER"), 0)
free_float_pct = 100.0 - promoter_latest

# Float Shares (in crores)
float_shares_cr = 0.0
if mcap_cr > 0 and ltp > 0:
    total_shares_cr = mcap_cr / ltp
    float_shares_cr = total_shares_cr * (free_float_pct / 100.0)

7. Technical Indicators

# Price and Moving Averages
ltp = get_float(tech.get("Ltp", 0))
sma_200 = get_float(tech.get("DaySMA200CurrentCandle", 0))
sma_50 = get_float(tech.get("DaySMA50CurrentCandle", 0))
rsi_14 = get_float(tech.get("DayRSI14CurrentCandle", 0))

# Distance from SMAs
pct_from_sma_200 = 0.0
if sma_200 > 0 and ltp > 0:
    pct_from_sma_200 = ((ltp - sma_200) / sma_200) * 100

pct_from_sma_50 = 0.0
if sma_50 > 0 and ltp > 0:
    pct_from_sma_50 = ((ltp - sma_50) / sma_50) * 100

# 52-week high
high_52w = get_float(tech.get("High1Yr", 0))
pct_from_52w_high = 0.0
if high_52w > 0 and ltp > 0:
    pct_from_52w_high = ((ltp - high_52w) / high_52w) * 100

8. Index Membership

# Requested indices (from pipeline design)
requested_indices = {
    13, 51, 38, 17, 18, 19, 20, 37, 1, 442, 443, 22, 5, 3, 444, 7, 14, 
    25, 27, 28, 447, 35, 41, 46, 44, 16, 43, 42, 45, 39, 466, 34, 32, 15, 33, 31, 30, 29
}

indices_found = []
idx_list_raw = tech.get("idxlist", [])

if isinstance(idx_list_raw, list):
    for idx_obj in idx_list_raw:
        idx_id = idx_obj.get("Indexid")
        idx_name = idx_obj.get("Name")
        if idx_id in requested_indices and idx_name:
            indices_found.append(idx_name)
Example indices:
  • 13 = NIFTY 50
  • 51 = NIFTY 100
  • 1 = NIFTY 500
  • 442 = NIFTY MIDCAP 100
  • 443 = NIFTY SMALLCAP 100

9. Advanced Technical Indicators

# SMA Status with % distance
sma_signals = []
smas = adv_tech.get("SMA", [])
target_smas = ["20", "50", "200"]

for s in smas:
    ind_name = s.get("Indicator", "").replace("-SMA", "")
    val = get_float(s.get("Value"))
    
    if ind_name in target_smas and val > 0 and ltp > 0:
        diff = ((ltp - val) / val) * 100
        status = "Above" if diff > 0 else "Below"
        sma_signals.append(f"SMA {ind_name}: {status} ({round(diff, 1)}%)")

# EMA Status
ema_signals = []
emas = adv_tech.get("EMA", [])
target_emas = ["20", "50", "200"]

for e in emas:
    ind_name = e.get("Indicator", "").replace("-EMA", "")
    val = get_float(e.get("Value"))
    
    if ind_name in target_emas and val > 0 and ltp > 0:
        diff = ((ltp - val) / val) * 100
        status = "Above" if diff > 0 else "Below"
        ema_signals.append(f"EMA {ind_name}: {status} ({round(diff, 1)}%)")

# Technical Sentiment (RSI, MACD)
tech_inds = adv_tech.get("TechnicalIndicators", [])
sentiment_summary = []
for t in tech_inds:
    name = t.get("Indicator", "")
    action = t.get("Action", "")
    if "RSI" in name:
        sentiment_summary.append(f"RSI: {action}")
    elif "MACD" in name:
        sentiment_summary.append(f"MACD: {action}")

# Pivot Points
pivots = adv_tech.get("Pivots", [])
classic_pivot = "N/A"
if pivots and isinstance(pivots, list):
    classic = pivots[0].get("Classic", {})
    classic_pivot = classic.get("PP", "N/A")

10. Final Assembly

stock_analysis = {
    # Basic Info
    "Symbol": symbol,
    "Name": item.get("Name", ""),
    "Listing Date": listing_date_map.get(symbol, "N/A"),
    "Basic Industry": cv.get("INDUSTRY_NAME", "N/A"),
    "Sector": cv.get("SECTOR", "N/A"),
    "Market Cap(Cr.)": mcap_cr,
    "Stock Price(₹)": ltp,
    "Index": ", ".join(indices_found) if indices_found else "N/A",
    
    # Latest Quarter Data
    "Latest Quarter": latest_quarter_str,
    "Net Profit Latest Quarter": np_latest,
    "Net Profit Previous Quarter": np_prev,
    "QoQ % Net Profit Latest": round(qoq_np, 2),
    "YoY % Net Profit Latest": round(yoy_np, 2),
    
    # EPS
    "EPS Latest Quarter": eps_latest,
    "QoQ % EPS Latest": round(qoq_eps, 2),
    "YoY % EPS Latest": round(yoy_eps, 2),
    "EPS Last Year": eps_last_year_annual,
    
    # Revenue
    "Sales Latest Quarter": sales_latest,
    "QoQ % Sales Latest": round(qoq_sales, 2),
    "YoY % Sales Latest": round(yoy_sales, 2),
    "Sales Growth 5 Years(%)": round(sales_growth_5y, 2),
    
    # Margins
    "OPM Latest Quarter": opm_latest,
    "QoQ % OPM Latest": round(qoq_opm, 2),
    "YoY % OPM Latest": round(yoy_opm, 2),
    "OPM TTM(%)": opm_ttm,
    
    # Ratios
    "ROE(%)": roe,
    "ROCE(%)": roce,
    "D/E": round(de_ratio, 2),
    "P/E": pe,
    "PEG": round(peg, 2),
    "Forward P/E": round(forward_pe, 2),
    
    # Shareholding
    "FII % change QoQ": round(fii_change_qoq, 2),
    "DII % change QoQ": round(dii_change_qoq, 2),
    "Free Float(%)": round(free_float_pct, 2),
    "Float Shares(Cr.)": round(float_shares_cr, 2),
    
    # Returns
    "1 Day Returns(%)": get_float(tech.get("PPerchange", 0)),
    "1 Week Returns(%)": get_float(tech.get("PricePerchng1week", 0)),
    "1 Month Returns(%)": get_float(tech.get("PricePerchng1mon", 0)),
    "3 Month Returns(%)": get_float(tech.get("PricePerchng3mon", 0)),
    "1 Year Returns(%)": get_float(tech.get("PricePerchng1year", 0)),
    "% from 52W High": round(pct_from_52w_high, 2),
    
    # Technical
    "RSI (14)": round(rsi_14, 2),
    "Gap Up %": 0.0,  # Calculated in Phase 4
    "SMA Status": " | ".join(sma_signals),
    "EMA Status": " | ".join(ema_signals),
    "Technical Sentiment": " | ".join(sentiment_summary),
    "Pivot Point": classic_pivot
}

analyzed_data.append(stock_analysis)

Output Structure

Sample Record:
{
  "Symbol": "RELIANCE",
  "Name": "Reliance Industries Ltd",
  "Listing Date": "29-NOV-1977",
  "Basic Industry": "Refineries & Marketing",
  "Sector": "Oil, Gas & Consumable Fuels",
  "Market Cap(Cr.)": 1850234.5,
  "Stock Price(₹)": 2745.30,
  "Index": "NIFTY 50, NIFTY 100, NIFTY 500",
  "Latest Quarter": "Q3 FY25",
  "Net Profit Latest Quarter": 18234.5,
  "QoQ % Net Profit Latest": 6.5,
  "YoY % Net Profit Latest": 29.1,
  "EPS Latest Quarter": 27.2,
  "QoQ % EPS Latest": 6.7,
  "YoY % EPS Latest": 29.5,
  "Sales Latest Quarter": 234567.8,
  "QoQ % Sales Latest": 2.5,
  "YoY % Sales Latest": 8.9,
  "Sales Growth 5 Years(%)": 12.3,
  "OPM Latest Quarter": 15.2,
  "OPM TTM(%)": 15.1,
  "ROE(%)": 12.5,
  "ROCE(%)": 14.2,
  "D/E": 0.49,
  "P/E": 28.3,
  "PEG": 0.96,
  "Forward P/E": 27.5,
  "FII % change QoQ": 0.3,
  "DII % change QoQ": 0.2,
  "Free Float(%)": 49.7,
  "Float Shares(Cr.)": 335.6,
  "1 Day Returns(%)": 1.2,
  "1 Week Returns(%)": 3.4,
  "1 Month Returns(%)": 5.6,
  "3 Month Returns(%)": 8.9,
  "1 Year Returns(%)": 15.2,
  "% from 52W High": -12.4,
  "RSI (14)": 58.3,
  "Gap Up %": 0.0,
  "SMA Status": "SMA 20: Above (2.4%) | SMA 50: Above (4.7%) | SMA 200: Above (11.7%)",
  "EMA Status": "EMA 20: Above (2.2%) | EMA 50: Above (4.8%) | EMA 200: Above (12.0%)",
  "Technical Sentiment": "RSI: Buy | MACD: Buy",
  "Pivot Point": 2745.0
}

Performance

Typical Runtime: 8-12 seconds for ~5000 stocks Processing Rate: ~500-600 stocks/second Output Size: 15-25 MB (uncompressed JSON)

Next Steps

Phase 4: Enrichment Injection

Learn how Phase 4 scripts add advanced metrics and events to this base JSON

Build docs developers (and LLMs) love