curl --request GET \
--url https://api.example.com/api/get-document-content{
"success": true,
"chunks": [
{
"chunks[].chunk_text": "<string>",
"chunks[].chunk_index": 123
}
],
"content": "<string>",
"chunk_count": 123,
"error": "<string>"
}Retrieve the text content and chunks of a document
curl --request GET \
--url https://api.example.com/api/get-document-content{
"success": true,
"chunks": [
{
"chunks[].chunk_text": "<string>",
"chunks[].chunk_index": 123
}
],
"content": "<string>",
"chunk_count": 123,
"error": "<string>"
}\n\n---\n\n between each chunkcurl -X GET "https://your-domain.com/api/get-document-content?id=42"
{
"success": true,
"chunks": [
{
"chunk_text": "Welcome to our company handbook. This document outlines our policies, procedures, and company culture. Our mission is to provide exceptional service while maintaining a positive work environment.",
"chunk_index": 0
},
{
"chunk_text": "Employee Benefits: We offer comprehensive health insurance, 401(k) matching, unlimited PTO, and professional development opportunities. All employees are eligible for benefits after 30 days of employment.",
"chunk_index": 1
},
{
"chunk_text": "Work Schedule: Our standard work week is Monday through Friday, 9 AM to 5 PM. Remote work options are available for eligible positions. Please discuss flexible arrangements with your manager.",
"chunk_index": 2
}
],
"content": "Welcome to our company handbook. This document outlines our policies, procedures, and company culture. Our mission is to provide exceptional service while maintaining a positive work environment.\n\n---\n\nEmployee Benefits: We offer comprehensive health insurance, 401(k) matching, unlimited PTO, and professional development opportunities. All employees are eligible for benefits after 30 days of employment.\n\n---\n\nWork Schedule: Our standard work week is Monday through Friday, 9 AM to 5 PM. Remote work options are available for eligible positions. Please discuss flexible arrangements with your manager.",
"chunk_count": 3
}
{
"success": false,
"error": "Error al obtener contenido del documento"
}
{
"success": true,
"chunks": [],
"content": "",
"chunk_count": 0
}
vectors table directly (api/get-document-content.php:15-18):
$chunks = $db->fetchAll(
'SELECT chunk_text, chunk_index FROM vectors WHERE document_id = :id ORDER BY chunk_index ASC',
[':id' => $id]
);
$content = implode("\n\n---\n\n", array_column($chunks, 'chunk_text'));
\n\n---\n\n makes it easy to visually distinguish between chunks when displaying the full content.
chunk_index ASC, ensuring the content appears in the same sequence as the original document.
const { content, chunk_count } = await fetch(
`/api/get-document-content?id=${docId}`
).then(r => r.json());
// Show first 500 characters as preview
const preview = content.substring(0, 500) + '...';
console.log(`Preview (${chunk_count} chunks total):\n${preview}`);
const { chunks } = await fetch(
`/api/get-document-content?id=${docId}`
).then(r => r.json());
const chunkLengths = chunks.map(c => c.chunk_text.length);
const avgLength = chunkLengths.reduce((a, b) => a + b, 0) / chunks.length;
const maxLength = Math.max(...chunkLengths);
const minLength = Math.min(...chunkLengths);
console.log(`Avg: ${avgLength}, Min: ${minLength}, Max: ${maxLength}`);
const { chunks } = await fetch(
`/api/get-document-content?id=${docId}`
).then(r => r.json());
// Find chunks containing specific keywords
const keyword = 'pricing';
const relevantChunks = chunks.filter(c =>
c.chunk_text.toLowerCase().includes(keyword)
);
console.log(`Found "${keyword}" in ${relevantChunks.length} chunks:`);
relevantChunks.forEach(c => {
console.log(` Chunk ${c.chunk_index}: ${c.chunk_text.substring(0, 100)}...`);
});
const { content } = await fetch(
`/api/get-document-content?id=${docId}`
).then(r => r.json());
const blob = new Blob([content], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'document-content.txt';
a.click();
async function searchInDocument(docId, searchTerm) {
const { chunks } = await fetch(
`/api/get-document-content?id=${docId}`
).then(r => r.json());
const results = chunks
.map(chunk => ({
index: chunk.chunk_index,
text: chunk.chunk_text,
matches: (chunk.chunk_text.match(
new RegExp(searchTerm, 'gi')
) || []).length
}))
.filter(r => r.matches > 0)
.sort((a, b) => b.matches - a.matches);
return results;
}
const results = await searchInDocument(42, 'customer service');
console.log(`Found ${results.length} chunks with matches`);
rag.chunk_size)rag.chunk_overlap)