Skip to main content
The ParseUtils module provides utilities for parsing text files and command-line arguments. It includes a buffered stream reader for efficient file I/O and template functions for parsing common patterns like integers, whitespace, and string matching.

StreamBuffer class

Buffered character stream reader with support for gzip-compressed files.

Constructor

explicit StreamBuffer(gzFile i)
Creates a buffered stream from a gzip file handle.
i
gzFile
required
gzip file handle obtained from gzopen()

Members

buffer_size
static const int
default:"65536"
Internal buffer size (64 KB)

Operators

int operator*() const
Returns the current character, or EOF if at end of file.
return
int
Current character as unsigned char, or EOF
void operator++()
Advances to the next character in the stream.
int position() const
Returns the current position in the buffer.
return
int
Current buffer position

Usage

#include "minisat/utils/ParseUtils.h"
#include <zlib.h>

using namespace Minisat;

gzFile in = gzopen("input.cnf.gz", "rb");
if (in == NULL) {
    fprintf(stderr, "Error opening file\n");
    exit(1);
}

StreamBuffer stream(in);

while (*stream != EOF) {
    char c = *stream;
    // Process character
    ++stream;
}

gzclose(in);
StreamBuffer automatically handles buffering and lookahead, making it efficient for parsing large files. It works seamlessly with both compressed (.gz) and uncompressed files via zlib.

End-of-file detection

isEof (StreamBuffer)

static inline bool isEof(StreamBuffer& in)
Checks if a StreamBuffer has reached end of file.
in
StreamBuffer&
required
Stream buffer to check
return
bool
True if at EOF, false otherwise

isEof (const char*)

static inline bool isEof(const char* in)
Checks if a character pointer has reached null terminator.
in
const char*
required
Character pointer to check
return
bool
True if pointing to ‘\0’, false otherwise

Parsing functions

All parsing functions are templates that work with both StreamBuffer and const char* types.

skipWhitespace

template<class B>
static void skipWhitespace(B& in)
Skips whitespace characters (space, tab, newline, carriage return).
in
B&
required
Input stream (StreamBuffer or const char*)
Skips characters with ASCII values 9-13 (tab, line feed, vertical tab, form feed, carriage return) and 32 (space).

Usage

StreamBuffer stream(in);
skipWhitespace(stream);
// stream now points to first non-whitespace character

skipLine

template<class B>
static void skipLine(B& in)
Skips characters until end of line or end of file.
in
B&
required
Input stream

Usage

// Skip comment line in CNF file
if (*stream == 'c') {
    skipLine(stream);
}

parseInt

template<class B>
static int parseInt(B& in)
Parses an integer from the input stream, skipping leading whitespace.
in
B&
required
Input stream
return
int
Parsed integer value
Supports optional ’+’ or ’-’ sign. Exits with error code 3 if the next non-whitespace character is not a digit or sign.

Usage

StreamBuffer stream(in);

// Parse "p cnf 100 200" header
skipWhitespace(stream);
if (*stream == 'p') {
    ++stream;
    skipWhitespace(stream);
    // Skip "cnf"
    int num_vars = parseInt(stream);
    int num_clauses = parseInt(stream);
}

String matching

match

template<class B>
static bool match(B& in, const char* str)
Matches a string at the current position. Requires random access iterator.
in
B&
required
Input stream (must support indexing with [])
str
const char*
required
String to match
return
bool
True if string matches and input is advanced, false otherwise
On success, the input iterator is advanced by the length of the matched string. On failure, the input position is unchanged.

Usage

const char* input = "-verbosity=2";
if (match(input, "-verbosity=")) {
    // input now points to "2"
    int val = parseInt(input);
}

eagerMatch

template<class B>
static bool eagerMatch(B& in, const char* str)
Matches a string eagerly, consuming characters one at a time. Does not require random access.
in
B&
required
Input stream (only requires operator* and operator++)
str
const char*
required
String to match
return
bool
True if string matches, false otherwise
Unlike match(), this function consumes characters even if the match ultimately fails. Use this for forward-only iterators like StreamBuffer.

Complete parsing example

Example of parsing a simplified CNF file:
#include "minisat/utils/ParseUtils.h"
#include <zlib.h>
#include <stdio.h>

using namespace Minisat;

void parseCNF(const char* filename) {
    gzFile in = gzopen(filename, "rb");
    if (in == NULL) {
        fprintf(stderr, "ERROR: Could not open file: %s\n", filename);
        return;
    }
    
    StreamBuffer stream(in);
    
    int num_vars = 0;
    int num_clauses = 0;
    
    // Parse header
    while (!isEof(stream)) {
        skipWhitespace(stream);
        
        if (*stream == 'c') {
            // Comment line
            skipLine(stream);
        } else if (*stream == 'p') {
            // Problem line: p cnf <vars> <clauses>
            ++stream;
            skipWhitespace(stream);
            
            // Match "cnf"
            if (!eagerMatch(stream, "cnf")) {
                fprintf(stderr, "ERROR: Expected 'cnf' in problem line\n");
                gzclose(in);
                return;
            }
            
            num_vars = parseInt(stream);
            num_clauses = parseInt(stream);
            skipLine(stream);
            break;
        } else {
            ++stream;
        }
    }
    
    printf("Variables: %d, Clauses: %d\n", num_vars, num_clauses);
    
    // Parse clauses
    for (int i = 0; i < num_clauses; i++) {
        skipWhitespace(stream);
        
        // Skip comments
        while (*stream == 'c') {
            skipLine(stream);
            skipWhitespace(stream);
        }
        
        // Read literals until 0
        while (true) {
            int lit = parseInt(stream);
            if (lit == 0) break;
            
            // Process literal
            printf("Literal: %d\n", lit);
        }
    }
    
    gzclose(in);
}

int main() {
    parseCNF("example.cnf");
    return 0;
}

Template specialization

All parsing functions work with two main types:
StreamBuffer
buffered stream
Efficient buffered reading from gzip files. Use for file parsing.
const char*
string parsing
Direct string parsing. Use for command-line argument parsing.

Example with const char*

const char* str = "  -verbosity=2";
skipWhitespace(str);
if (match(str, "-verbosity=")) {
    int value = parseInt(str);
    printf("Verbosity: %d\n", value);
}

Integration with Options

The ParseUtils module is used internally by the Options system for parsing command-line arguments:
// From Options.h
virtual bool parse(const char* str) {
    const char* span = str;
    if (!match(span, "-") || !match(span, name) || !match(span, "="))
        return false;
    
    char* end;
    int32_t tmp = strtol(span, &end, 10);
    // ... validation and assignment
    return true;
}

Build docs developers (and LLMs) love