Skip to main content

Overview

Phase 1 establishes the data foundation for the entire pipeline. It fetches core market data and fundamental metrics for ~5000 NSE stocks, producing the critical master_isin_map.json that ALL subsequent scripts depend on.
Critical Phase: If any Phase 1 script fails, the entire pipeline must abort. These scripts produce the foundational data structures.

Scripts

1. fetch_dhan_data.py

Purpose: Fetches live market data for all NSE equities and creates the master ISIN → Symbol mapping. API Endpoint:
POST https://ow-scanx-analytics.dhan.co/customscan/fetchdt
Payload Structure:
payload = {
    "data": {
        "sort": "Mcap",
        "sorder": "desc",
        "count": 5000,  # Large count to get all stocks
        "fields": [
            "Isin", "DispSym", "Mcap", "Pe", "DivYeild", "Revenue", 
            "Year1RevenueGrowth", "NetProfitMargin", "YoYLastQtrlyProfitGrowth", 
            "EBIDTAMargin", "volume", "PricePerchng1year", "PricePerchng3year",
            "PricePerchng5year", "Ind_Pe", "Pb", "Eps", "DaySMA50CurrentCandle", 
            "DaySMA200CurrentCandle", "DayRSI14CurrentCandle", "ROCE", "Ltp", 
            "Roe", "RtAwayFrom5YearHigh", "RtAwayFrom1MonthHigh", "High5yr", 
            "High3Yr", "High1Yr", "High1Wk", "Sym", "PricePerchng1mon", 
            "PricePerchng1week", "PricePerchng3mon", "YearlyEarningPerShare", 
            "OCFGrowthOnYr", "Year1CAGREPSGrowth", "NetChangeInCash", 
            "FreeCashFlow", "PricePerchng2week", "DayBbUpper_Sub_BbLower", 
            "DayATR14CurrentCandleMul_2", "Min5HighCurrentCandle", 
            "Min15HighCurrentCandle", "Min5EMA50CurrentCandle", 
            "Min15EMA50CurrentCandle", "Min15SMA100CurrentCandle", "Open", 
            "BcClose", "Rmp", "PledgeBenefit", "idxlist", "Sid", "FnoFlag"
        ],
        "params": [
            {"field": "OgInst", "op": "", "val": "ES"},
            {"field": "Exch", "op": "", "val": "NSE"}
        ],
        "pgno": 0,
        "sorder": "desc",
        "sort": "Mcap"
    }
}
Headers:
from pipeline_utils import get_headers

headers = get_headers(include_origin=True)
# Returns:
# {
#     "Content-Type": "application/json",
#     "User-Agent": "Mozilla/5.0 ...",  # Rotated
#     "Accept": "application/json, text/plain, */*",
#     "Origin": "https://scanx.dhan.co",
#     "Referer": "https://scanx.dhan.co/"
# }
Output Files:
[
  {
    "Sym": "RELIANCE",
    "DispSym": "Reliance Industries Ltd",
    "Isin": "INE002A01018",
    "Sid": 1234,
    "FnoFlag": 1,
    "Mcap": 1850234.5,
    "Ltp": 2745.30,
    "Pe": 28.3,
    "Roe": 12.5,
    "DaySMA50CurrentCandle": 2680.5,
    "DaySMA200CurrentCandle": 2550.2,
    "DayRSI14CurrentCandle": 58.3,
    "idxlist": [
      {"Indexid": 13, "Name": "NIFTY 50"},
      {"Indexid": 51, "Name": "NIFTY 100"}
    ],
    ...
  }
]
Core Logic:
def fetch_all_dhan_data():
    url = "https://ow-scanx-analytics.dhan.co/customscan/fetchdt"
    
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    output_file = os.path.join(BASE_DIR, "dhan_data_response.json")
    master_map_file = os.path.join(BASE_DIR, "master_isin_map.json")

    payload = { ... }  # See above
    headers = get_headers(include_origin=True)

    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    
    data = response.json()
    
    if 'data' in data and isinstance(data['data'], list):
        cleaned_data = data['data']
        
        # Save full response
        with open(output_file, "w") as f:
            json.dump(cleaned_data, f, indent=4)
        print(f"Successfully fetched {len(cleaned_data)} items.")
        
        # Create Master ISIN Map
        master_map = []
        for item in cleaned_data:
            sym = item.get('Sym')
            isin = item.get('Isin')
            disp_sym = item.get('DispSym')
            sid = item.get('Sid')
            fno_flag = item.get('FnoFlag', 0)
            
            if sym and isin:
                master_map.append({
                    "Symbol": sym,
                    "ISIN": isin,
                    "Name": disp_sym,
                    "Sid": sid,
                    "FnoFlag": fno_flag
                })
        
        # Sort for consistency
        master_map.sort(key=lambda x: x['Symbol'])
        
        with open(master_map_file, "w") as f_map:
            json.dump(master_map, f_map, indent=4)
        print(f"Saved {len(master_map)} symbols to {master_map_file}")
Key Fields Extracted:
Sym
string
Stock symbol (e.g., “RELIANCE”)
Isin
string
ISIN code (e.g., “INE002A01018”)
Sid
integer
Security ID used by Dhan APIs
FnoFlag
integer
F&O eligibility: 1 = Yes, 0 = No
idxlist
array
List of index memberships with Indexid and Name

2. fetch_fundamental_data.py

Purpose: Fetches detailed fundamental data (P&L, balance sheet, ratios) for all stocks using the master ISIN map. API Endpoint:
POST https://open-web-scanx.dhan.co/scanx/fundamental
Batching Strategy:
  • Loads all ISINs from master_isin_map.json
  • Chunks ISINs into batches of 100
  • Fetches each batch sequentially with 0.5s delay
Payload Structure:
payload = {
    "data": {
        "isins": [
            "INE002A01018",
            "INE467B01029",
            # ... up to 100 ISINs per batch
        ]
    }
}
Response Structure:
{
  "status": "success",
  "data": [
    {
      "isin": "INE002A01018",
      "Symbol": "RELIANCE",
      "Name": "Reliance Industries Ltd",
      "incomeStat_cq": {
        "NET_PROFIT": "18234.5|17123.2|16345.8|15234.1|14123.5",
        "EPS": "27.2|25.5|24.3|22.7|21.0",
        "SALES": "234567.8|228934.2|221345.6|215678.9|208934.5",
        "OPM": "15.2|14.8|14.5|14.1|13.8",
        "YEAR": "Q3 FY25|Q2 FY25|Q1 FY25|Q4 FY24|Q3 FY24"
      },
      "incomeStat_cy": {
        "EPS": "95.5|88.2|82.1|75.3|68.9",
        "SALES": "912345.6|878934.2|845678.9|812345.6|778934.2"
      },
      "TTM_cy": {
        "EPS": 99.7,
        "OPM": 15.1
      },
      "CV": {
        "INDUSTRY_NAME": "Refineries & Marketing",
        "SECTOR": "Oil, Gas & Consumable Fuels",
        "MARKET_CAP": 1850234.5,
        "STOCK_PE": 28.3
      },
      "roce_roe": {
        "ROE": 12.5,
        "ROCE": 14.2
      },
      "sHp": {
        "FII": "23.5|23.2|22.9|22.6",
        "DII": "15.8|15.6|15.4|15.2",
        "PROMOTER": "50.3|50.4|50.5|50.6"
      },
      "bs_c": {
        "NON_CURRENT_LIABILITIES": "345678.9|342345.6|338934.2",
        "TOTAL_EQUITY": "712345.6|698934.2|685678.9"
      }
    }
  ]
}
Core Logic:
def fetch_fundamental_data():
    master_map_file = "master_isin_map.json"
    api_url = "https://open-web-scanx.dhan.co/scanx/fundamental"
    output_file = "fundamental_data.json"
    
    headers = get_headers()

    # Load ISINs from Master Map
    with open(master_map_file, "r") as f:
        master_map = json.load(f)

    # Create lookup: ISIN -> {Symbol, Name}
    isin_lookup = {
        item['ISIN']: {"Symbol": item.get("Symbol"), "Name": item.get("Name")} 
        for item in master_map if item.get('ISIN')
    }
    
    all_isins = list(isin_lookup.keys())
    total_isins = len(all_isins)
    print(f"Loaded {total_isins} ISINs from master map.")

    # Chunk into batches of 100
    batch_size = 100
    all_fundamental_data = []
    
    for i in range(0, total_isins, batch_size):
        batch_isins = all_isins[i:i + batch_size]
        print(f"Fetching batch {i//batch_size + 1}: {len(batch_isins)} ISINs...")
        
        payload = {"data": {"isins": batch_isins}}
        
        response = requests.post(api_url, json=payload, headers=headers, timeout=30)
        
        if response.status_code == 200:
            data = response.json()
            
            if data.get('status') == 'success':
                batch_results = data.get('data', [])
                if batch_results:
                    # Enrich with Symbol and Name
                    for item in batch_results:
                        item_isin = item.get('isin')
                        if item_isin and item_isin in isin_lookup:
                            item['Symbol'] = isin_lookup[item_isin]['Symbol']
                            item['Name'] = isin_lookup[item_isin]['Name']
                    
                    all_fundamental_data.extend(batch_results)
                    print(f"  Success: Received {len(batch_results)} records.")
        
        time.sleep(0.5)  # Be polite

    # Save Consolidated Data
    with open(output_file, "w") as f:
        json.dump(all_fundamental_data, f, indent=4)
    print(f"Saved fundamental data for {len(all_fundamental_data)} securities")
Data Sections Explained:
Contains pipe-separated quarterly values (latest to oldest):
  • NET_PROFIT: Net profit in crores
  • EPS: Earnings per share
  • SALES: Revenue in crores
  • OPM: Operating profit margin (%)
  • YEAR: Quarter labels (e.g., “Q3 FY25|Q2 FY25|…”)
Format: "latest|prev|2back|3back|lastyearQ"
Contains pipe-separated annual values:
  • EPS: Annual EPS for last 5 years
  • SALES: Annual revenue for last 5 years
Single values for TTM metrics:
  • EPS: TTM earnings per share
  • OPM: TTM operating profit margin
Company classification and valuation:
  • INDUSTRY_NAME: Specific industry
  • SECTOR: Broader sector
  • MARKET_CAP: Market capitalization (crores)
  • STOCK_PE: Price-to-earnings ratio
Profitability metrics:
  • ROE: Return on equity (%)
  • ROCE: Return on capital employed (%)
Pipe-separated quarterly shareholding:
  • FII: Foreign institutional holding (%)
  • DII: Domestic institutional holding (%)
  • PROMOTER: Promoter holding (%)
Pipe-separated quarterly balance sheet items:
  • NON_CURRENT_LIABILITIES: Long-term debt
  • TOTAL_EQUITY: Shareholder equity

3. NSE Listing Dates CSV

Purpose: Downloads listing dates from NSE for calculating stock age. Source:
https://nsearchives.nseindia.com/content/equities/EQUITY_L.csv
Fetch Command:
curl -s -o nse_equity_list.csv \
  "https://nsearchives.nseindia.com/content/equities/EQUITY_L.csv" \
  --http1.1 \
  --header "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
CSV Structure:
SYMBOL,NAME OF COMPANY,SERIES,DATE OF LISTING,PAID UP VALUE,MARKET LOT,ISIN NUMBER,FACE VALUE
RELIANCE,Reliance Industries Limited,EQ,29-NOV-1977,2,1,INE002A01018,10
TCS,Tata Consultancy Services Limited,EQ,25-AUG-2004,1,1,INE467B01029,1
Usage in bulk_market_analyzer.py:
listing_date_map = {}
with open("nse_equity_list.csv", "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        sym = row.get("SYMBOL")
        date_list = row.get(" DATE OF LISTING") or row.get("DATE OF LISTING")
        if sym and date_list:
            listing_date_map[sym] = date_list

Dependency Chain

Error Handling

fetch_dhan_data.py

try:
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    data = response.json()
    
    if 'data' in data and isinstance(data['data'], list):
        # Process data
        pass
    else:
        print("Response structure different than expected.")
except Exception as e:
    print(f"Error fetching data: {e}")
    if 'response' in locals():
        print(f"Response status: {response.status_code}")
        print(f"Response text: {response.text[:500]}")

fetch_fundamental_data.py

try:
    with open(master_map_file, "r") as f:
        master_map = json.load(f)
except FileNotFoundError:
    print(f"Error: {master_map_file} not found. Run 'fetch_dhan_data.py' first.")
    return

# Per-batch error handling
for batch in batches:
    try:
        response = requests.post(api_url, json=payload, headers=headers, timeout=30)
        if response.status_code == 200:
            # Process
            pass
        else:
            print(f"  HTTP Error: {response.status_code}")
    except Exception as e:
        print(f"  Exception fetching batch: {e}")
    
    time.sleep(0.5)  # Rate limiting

Performance

ScriptAvg TimeRecordsThroughput
fetch_dhan_data.py3-5 sec~5000Single API call
fetch_fundamental_data.py2-3 min~5000~2500 records/min (batched)
NSE CSV download1-2 sec~5000Direct download
Total Phase 12-4 min~5000-

Next Steps

Phase 2: Data Enrichment

Explore how the master ISIN map is used to fetch enrichment data

Build docs developers (and LLMs) love