The Python backend (backend/api.py) is a FastAPI application that serves as the data processing and persistence layer. It receives scraped chapters via HTTP API, stores them in JSONL files, and generates EPUB books when scraping is complete.
The backend receives the user data path as a command-line argument from Electron:
api.py:17-28
# Electron will pass the 'userData' path as the first argumentBASE_OUTPUT = sys.argv[1] if len(sys.argv) > 1 else os.path.join(os.getcwd(), "output")# Define subdirectories for organized storageHISTORY_DIR = os.path.join(BASE_OUTPUT, "history")JOBS_DIR = os.path.join(BASE_OUTPUT, "jobs")EPUB_DIR = os.path.join(BASE_OUTPUT, "epubs")# Create directories if they don't existfor folder in [HISTORY_DIR, JOBS_DIR, EPUB_DIR]: os.makedirs(folder, exist_ok=True)
This design allows the backend to work both as a standalone server (development) and as a bundled binary (production) by accepting the output directory as an argument.
Returns all EPUBs in the library with extracted metadata:
api.py:223-251
@app.get("/api/library")def get_library(): epubs = [] if os.path.exists(EPUB_DIR): for file in os.listdir(EPUB_DIR): if file.endswith(".epub"): filepath = os.path.join(EPUB_DIR, file) # Default fallback names title = file.replace(".epub", "").replace("_", " ") author = "Unknown Author" # Extract the real Title and Author from the EPUB metadata try: book = epub.read_epub(filepath) title_meta = book.get_metadata('DC', 'title') if title_meta: title = title_meta[0][0] author_meta = book.get_metadata('DC', 'creator') if author_meta: author = author_meta[0][0] except Exception as e: pass epubs.append({ "filename": file, "title": title, "author": author }) return epubs
Extracts the cover image from an EPUB for display in the UI:
api.py:286-308
@app.get("/api/cover/{filename}")def get_cover(filename: str): # Ensure the filename is perfectly URL-decoded clean_filename = urllib.parse.unquote(filename) epub_path = os.path.join(EPUB_DIR, clean_filename) if not os.path.exists(epub_path): raise HTTPException(status_code=404, detail="Not found") try: book = epub.read_epub(epub_path) # Find ANY image inside the EPUB for item in book.get_items(): if item.media_type and item.media_type.startswith('image/'): return Response(content=item.get_content(), media_type=item.media_type) except Exception as e: print(f"Error reading cover for {clean_filename}: {e}") raise HTTPException(status_code=404, detail="No cover found")
def load_history(): if os.path.exists(HISTORY_FILE): with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f) return {}def save_history(history_data): with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history_data, f, ensure_ascii=False, indent=2)
def load_active_scrapes(): if os.path.exists(ACTIVE_SCRAPES_FILE): with open(ACTIVE_SCRAPES_FILE, "r", encoding="utf-8") as f: return json.load(f) return {}def save_active_scrapes(data): with open(ACTIVE_SCRAPES_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2)
if __name__ == "__main__": import uvicorn # Use port 8000 by default uvicorn.run(app, host="127.0.0.1", port=8000)
In production, Electron starts this as a compiled binary using PyInstaller. The if __name__ == "__main__" block allows running python api.py directly during development.