The k6/html module provides HTML parsing and DOM manipulation using jQuery-like API.
Functions
parseHTML(src)
Parses HTML string and returns a Selection object.
Selection object for querying the DOM
import { parseHTML } from 'k6/html';
import http from 'k6/http';
export default function () {
const res = http.get('https://test.k6.io');
const doc = parseHTML(res.body);
const title = doc.find('title').text();
console.log(title);
}
Selection Object
The Selection object provides jQuery-like methods for querying and manipulating HTML.
Traversal Methods
find(selector)
Finds descendants matching the selector.
const links = doc.find('a');
filter(selector)
Filters current selection.
const externalLinks = doc.find('a').filter('[href^="http"]');
not(selector)
Removes elements matching selector from the selection.
const notExternal = doc.find('a').not('[href^="http"]');
first()
Returns first element in selection.
const firstLink = doc.find('a').first();
last()
Returns last element in selection.
eq(index)
Returns element at the specified index.
const secondLink = doc.find('a').eq(1);
slice(start, [end])
Reduces selection to a subset.
next([selector])
Returns next sibling.
nextAll([selector])
Returns all following siblings.
prev([selector])
Returns previous sibling.
prevAll([selector])
Returns all preceding siblings.
parent([selector])
Returns parent element.
parents([selector])
Returns all ancestors.
children([selector])
Returns direct children.
siblings([selector])
Returns all siblings.
closest(selector)
Finds closest ancestor matching selector.
Content Methods
text()
Gets text content of all matched elements.
const text = doc.find('h1').text();
html()
Gets HTML content of first matched element.
const html = doc.find('div').html();
attr(name, [default])
Gets attribute value.
const href = doc.find('a').attr('href');
const title = doc.find('a').attr('title', 'default title');
val()
Gets value of form elements.
const inputValue = doc.find('input[name="username"]').val();
data([key])
Gets data attributes.
const userId = doc.find('div').data('user-id');
const allData = doc.find('div').data();
Iteration Methods
each(fn)
Iterates over selection.
doc.find('a').each(function (idx, el) {
console.log(idx, el.text());
});
map(fn)
Maps selection to array.
const hrefs = doc.find('a').map(function (idx, el) {
return el.attr('href');
});
Utility Methods
size()
Returns number of elements in selection.
const linkCount = doc.find('a').size();
get([index])
Gets element(s) as array.
const allLinks = doc.find('a').get();
const firstLink = doc.find('a').get(0);
toArray()
Converts selection to array.
Examples
import { parseHTML } from 'k6/html';
import http from 'k6/http';
export default function () {
const res = http.get('https://test.k6.io');
const doc = parseHTML(res.body);
doc.find('a').each(function (idx, el) {
const href = el.attr('href');
console.log(`Link ${idx}: ${href}`);
});
}
import { parseHTML } from 'k6/html';
import http from 'k6/http';
import { check } from 'k6';
export default function () {
const res = http.get('https://test.k6.io/my_messages.php');
const doc = parseHTML(res.body);
const formData = {
username: doc.find('input[name="username"]').val(),
password: doc.find('input[name="password"]').val(),
token: doc.find('input[name="_token"]').val(),
};
check(formData, {
'has token': (fd) => fd.token !== '',
});
}
Scraping Data
import { parseHTML } from 'k6/html';
import http from 'k6/http';
export default function () {
const res = http.get('https://test.k6.io');
const doc = parseHTML(res.body);
// Get all product prices
const prices = doc.find('.product .price').map(function (idx, el) {
return parseFloat(el.text().replace('$', ''));
});
console.log('Prices:', prices);
}
Validate Page Structure
import { parseHTML } from 'k6/html';
import http from 'k6/http';
import { check } from 'k6';
export default function () {
const res = http.get('https://test.k6.io');
const doc = parseHTML(res.body);
check(doc, {
'has title': (d) => d.find('title').size() === 1,
'has navigation': (d) => d.find('nav').size() > 0,
'has footer': (d) => d.find('footer').size() === 1,
'has logo': (d) => d.find('img.logo').size() > 0,
});
}
import { parseHTML } from 'k6/html';
import http from 'k6/http';
export default function () {
const res = http.get('https://test.k6.io');
const doc = parseHTML(res.body);
const metadata = {
title: doc.find('title').text(),
description: doc.find('meta[name="description"]').attr('content'),
keywords: doc.find('meta[name="keywords"]').attr('content'),
ogTitle: doc.find('meta[property="og:title"]').attr('content'),
};
console.log(JSON.stringify(metadata, null, 2));
}