Basic Usage
const rewriter = new HTMLRewriter();
rewriter.on("p", {
element(element) {
element.setInnerContent("Hello World!");
},
});
const input = "<html><body><p>Original</p></body></html>";
const output = rewriter.transform(new Response(input));
console.log(await output.text());
// <html><body><p>Hello World!</p></body></html>
Selecting Elements
CSS Selectors
Use any valid CSS selector:const rewriter = new HTMLRewriter();
// Tag selectors
rewriter.on("p", handlers);
rewriter.on("div", handlers);
// Class selectors
rewriter.on(".my-class", handlers);
// ID selectors
rewriter.on("#header", handlers);
// Attribute selectors
rewriter.on('[data-user-id="123"]', handlers);
rewriter.on('a[href^="https://"]', handlers);
// Combinators
rewriter.on("div > p", handlers); // Direct child
rewriter.on("div p", handlers); // Descendant
rewriter.on("h1 + p", handlers); // Adjacent sibling
// Pseudo-classes
rewriter.on("li:first-child", handlers);
rewriter.on("p:not(.ignore)", handlers);
Multiple Selectors
const rewriter = new HTMLRewriter();
// Different handlers for different selectors
rewriter.on("h1", {
element(el) {
el.setAttribute("class", "title");
},
});
rewriter.on("p", {
element(el) {
el.setAttribute("class", "paragraph");
},
});
Element Handlers
Element Handler
Called when an element is encountered:rewriter.on("div", {
element(element) {
console.log("Found div:", element.tagName);
},
});
Text Handler
Called for text nodes:rewriter.on("p", {
text(text) {
if (text.text.includes("TODO")) {
text.replace("[PENDING]");
}
},
});
Comment Handler
Called for HTML comments:rewriter.on("div", {
comments(comment) {
if (comment.text.includes("debug")) {
comment.remove();
}
},
});
Modifying Elements
Attributes
rewriter.on("img", {
element(element) {
// Get attribute
const src = element.getAttribute("src");
// Set attribute
element.setAttribute("loading", "lazy");
// Remove attribute
element.removeAttribute("width");
// Check if attribute exists
if (element.hasAttribute("alt")) {
console.log("Has alt text");
}
},
});
Content
rewriter.on("div", {
element(element) {
// Replace all content
element.setInnerContent("New content");
// Replace with HTML
element.setInnerContent("<p>Paragraph</p>", { html: true });
// Prepend content
element.prepend("Start: ");
element.prepend("<b>Bold</b>", { html: true });
// Append content
element.append(" :End");
element.append("<i>Italic</i>", { html: true });
},
});
Insertion
rewriter.on("h1", {
element(element) {
// Insert before element
element.before("<nav>Navigation</nav>", { html: true });
// Insert after element
element.after("<hr>", { html: true });
},
});
Removal
rewriter.on(".ad", {
element(element) {
// Remove element and its content
element.remove();
},
});
rewriter.on(".strip", {
element(element) {
// Remove element but keep content
element.removeAndKeepContent();
},
});
Text Manipulation
Text Nodes
rewriter.on("p", {
text(text) {
// Get text content
console.log(text.text);
// Check if last in element
if (text.lastInTextNode) {
text.after(" (end)");
}
// Replace text
text.replace(text.text.toUpperCase());
// Insert before/after
text.before("[");
text.after("]");
// Remove text
text.remove();
},
});
Text Processing
rewriter.on("code", {
text(text) {
// Escape HTML entities
const escaped = text.text
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">");
text.replace(escaped);
},
});
Document Handlers
Document-Level Events
rewriter.onDocument({
doctype(doctype) {
console.log("DOCTYPE:", doctype.name);
},
comments(comment) {
// Handle document-level comments
if (comment.text.includes("remove")) {
comment.remove();
}
},
text(text) {
// Handle document-level text (outside elements)
},
end(end) {
// Called at end of document
console.log("Document processing complete");
},
});
Transforming Responses
Basic Transform
const rewriter = new HTMLRewriter();
rewriter.on("title", {
element(element) {
element.setInnerContent("New Title");
},
});
const response = await fetch("https://example.com");
const transformed = rewriter.transform(response);
const html = await transformed.text();
Streaming Transform
const rewriter = new HTMLRewriter();
rewriter.on("img", {
element(element) {
element.setAttribute("loading", "lazy");
},
});
const response = await fetch("https://example.com");
const stream = rewriter.transform(response).body;
// Stream chunks as they're transformed
for await (const chunk of stream) {
console.log("Chunk:", chunk.length, "bytes");
}
Common Use Cases
Add Analytics
const rewriter = new HTMLRewriter();
rewriter.on("head", {
element(element) {
element.append(
`<script>
// Analytics code
gtag('config', 'GA_MEASUREMENT_ID');
</script>`,
{ html: true },
);
},
});
Lazy Load Images
const rewriter = new HTMLRewriter();
rewriter.on("img", {
element(element) {
const src = element.getAttribute("src");
if (src && !src.startsWith("data:")) {
element.setAttribute("loading", "lazy");
element.setAttribute("decoding", "async");
}
},
});
Rewrite Links
const rewriter = new HTMLRewriter();
rewriter.on('a[href^="http://"]', {
element(element) {
const href = element.getAttribute("href");
// Upgrade to HTTPS
if (href) {
element.setAttribute("href", href.replace("http://", "https://"));
}
// Add external link indicator
element.setAttribute("target", "_blank");
element.setAttribute("rel", "noopener noreferrer");
},
});
Content Security
const rewriter = new HTMLRewriter();
// Remove scripts from untrusted HTML
rewriter.on("script", {
element(element) {
element.remove();
},
});
// Remove inline event handlers
rewriter.on("*", {
element(element) {
const dangerous = [
"onclick", "onload", "onerror",
"onmouseover", "onmouseout",
];
for (const attr of dangerous) {
if (element.hasAttribute(attr)) {
element.removeAttribute(attr);
}
}
},
});
Extract Metadata
const metadata: any = {};
const rewriter = new HTMLRewriter();
rewriter.on("title", {
text(text) {
metadata.title = text.text;
},
});
rewriter.on('meta[name="description"]', {
element(element) {
metadata.description = element.getAttribute("content");
},
});
rewriter.on('meta[property^="og:"]', {
element(element) {
const property = element.getAttribute("property");
const content = element.getAttribute("content");
metadata[property] = content;
},
});
const response = await fetch(url);
await rewriter.transform(response).arrayBuffer();
console.log(metadata);
Syntax Highlighting
import { highlight } from "./highlighter";
const rewriter = new HTMLRewriter();
rewriter.on("code", {
async element(element) {
const lang = element.getAttribute("class")?.replace("language-", "");
// Collect text content
let code = "";
element.onEndTag(() => {
const highlighted = highlight(code, lang);
element.setInnerContent(highlighted, { html: true });
});
},
text(text) {
code += text.text;
},
});
Localization
const translations = {
en: { hello: "Hello", goodbye: "Goodbye" },
es: { hello: "Hola", goodbye: "Adiós" },
};
function localize(lang: string) {
const rewriter = new HTMLRewriter();
rewriter.on('[data-i18n]', {
element(element) {
const key = element.getAttribute("data-i18n");
if (key) {
const text = translations[lang]?.[key] || key;
element.setInnerContent(text);
}
},
});
return rewriter;
}
const spanish = localize("es");
// <p data-i18n="hello">Hello</p> -> <p data-i18n="hello">Hola</p>
Performance
Streaming Benefits
HTMLRewriter processes HTML as it streams:const rewriter = new HTMLRewriter();
rewriter.on("img", {
element(element) {
element.setAttribute("loading", "lazy");
},
});
// Starts transforming before entire HTML is downloaded
const response = await fetch("https://example.com/large.html");
const transformed = rewriter.transform(response);
// Can start reading output immediately
const reader = transformed.body.getReader();
const { value } = await reader.read();
console.log("First chunk:", value);
Memory Efficiency
// Bad - loads entire HTML into memory
const html = await response.text();
const modified = html.replace(/<img/g, '<img loading="lazy"');
// Good - streams and transforms
const transformed = new HTMLRewriter()
.on("img", {
element(el) {
el.setAttribute("loading", "lazy");
},
})
.transform(response);
Error Handling
const rewriter = new HTMLRewriter();
rewriter.on("img", {
element(element) {
try {
const src = element.getAttribute("src");
if (!src) {
console.warn("Image missing src attribute");
return;
}
// Transform src
element.setAttribute("src", transformUrl(src));
} catch (err) {
console.error("Error processing image:", err);
}
},
});
try {
const response = await fetch(url);
const transformed = rewriter.transform(response);
const html = await transformed.text();
} catch (err) {
console.error("Transform failed:", err);
}
Best Practices
-
Use specific selectors
// Good - specific rewriter.on("nav.main-menu > li > a", handlers); // Bad - too broad rewriter.on("a", handlers); -
Avoid blocking operations
// Bad - blocks streaming rewriter.on("img", { async element(element) { const data = await fetch(element.getAttribute("src")); // ... }, }); // Good - non-blocking rewriter.on("img", { element(element) { const src = element.getAttribute("src"); processLater(src); }, }); -
Minimize transformations
// Good - single rewriter const rewriter = new HTMLRewriter() .on("img", imgHandler) .on("a", linkHandler); // Bad - multiple passes let html = original; html = new HTMLRewriter().on("img", imgHandler).transform(html); html = new HTMLRewriter().on("a", linkHandler).transform(html); -
Handle missing attributes
rewriter.on("a", { element(element) { const href = element.getAttribute("href"); if (!href) return; // Skip links without href // Process href }, });
API Reference
HTMLRewriter
const rewriter = new HTMLRewriter();
rewriter.on()
rewriter.on(selector: string, handlers: ElementHandlers)
rewriter.onDocument()
rewriter.onDocument(handlers: DocumentHandlers)
rewriter.transform()
rewriter.transform(response: Response): Response
Element
interface Element {
tagName: string;
getAttribute(name: string): string | null;
hasAttribute(name: string): boolean;
setAttribute(name: string, value: string): void;
removeAttribute(name: string): void;
before(content: string, options?: ContentOptions): void;
after(content: string, options?: ContentOptions): void;
prepend(content: string, options?: ContentOptions): void;
append(content: string, options?: ContentOptions): void;
setInnerContent(content: string, options?: ContentOptions): void;
remove(): void;
removeAndKeepContent(): void;
}
Text
interface Text {
text: string;
lastInTextNode: boolean;
before(content: string, options?: ContentOptions): void;
after(content: string, options?: ContentOptions): void;
replace(content: string, options?: ContentOptions): void;
remove(): void;
}
Platform Support
HTMLRewriter works on:- Bun
- Cloudflare Workers
- Any environment with a compatible polyfill