The ParseUtils module provides utilities for parsing text files and command-line arguments. It includes a buffered stream reader for efficient file I/O and template functions for parsing common patterns like integers, whitespace, and string matching.
StreamBuffer class
Buffered character stream reader with support for gzip-compressed files.
Constructor
explicit StreamBuffer(gzFile i)
Creates a buffered stream from a gzip file handle.
gzip file handle obtained from gzopen()
Members
buffer_size
static const int
default:"65536"
Internal buffer size (64 KB)
Operators
Returns the current character, or EOF if at end of file.
Current character as unsigned char, or EOF
Advances to the next character in the stream.
Returns the current position in the buffer.
Usage
#include "minisat/utils/ParseUtils.h"
#include <zlib.h>
using namespace Minisat;
gzFile in = gzopen("input.cnf.gz", "rb");
if (in == NULL) {
fprintf(stderr, "Error opening file\n");
exit(1);
}
StreamBuffer stream(in);
while (*stream != EOF) {
char c = *stream;
// Process character
++stream;
}
gzclose(in);
StreamBuffer automatically handles buffering and lookahead, making it efficient for parsing large files. It works seamlessly with both compressed (.gz) and uncompressed files via zlib.
End-of-file detection
isEof (StreamBuffer)
static inline bool isEof(StreamBuffer& in)
Checks if a StreamBuffer has reached end of file.
True if at EOF, false otherwise
isEof (const char*)
static inline bool isEof(const char* in)
Checks if a character pointer has reached null terminator.
Character pointer to check
True if pointing to ‘\0’, false otherwise
Parsing functions
All parsing functions are templates that work with both StreamBuffer and const char* types.
skipWhitespace
template<class B>
static void skipWhitespace(B& in)
Skips whitespace characters (space, tab, newline, carriage return).
Input stream (StreamBuffer or const char*)
Skips characters with ASCII values 9-13 (tab, line feed, vertical tab, form feed, carriage return) and 32 (space).
Usage
StreamBuffer stream(in);
skipWhitespace(stream);
// stream now points to first non-whitespace character
skipLine
template<class B>
static void skipLine(B& in)
Skips characters until end of line or end of file.
Usage
// Skip comment line in CNF file
if (*stream == 'c') {
skipLine(stream);
}
parseInt
template<class B>
static int parseInt(B& in)
Parses an integer from the input stream, skipping leading whitespace.
Supports optional ’+’ or ’-’ sign. Exits with error code 3 if the next non-whitespace character is not a digit or sign.
Usage
StreamBuffer stream(in);
// Parse "p cnf 100 200" header
skipWhitespace(stream);
if (*stream == 'p') {
++stream;
skipWhitespace(stream);
// Skip "cnf"
int num_vars = parseInt(stream);
int num_clauses = parseInt(stream);
}
String matching
match
template<class B>
static bool match(B& in, const char* str)
Matches a string at the current position. Requires random access iterator.
Input stream (must support indexing with [])
True if string matches and input is advanced, false otherwise
On success, the input iterator is advanced by the length of the matched string. On failure, the input position is unchanged.
Usage
const char* input = "-verbosity=2";
if (match(input, "-verbosity=")) {
// input now points to "2"
int val = parseInt(input);
}
eagerMatch
template<class B>
static bool eagerMatch(B& in, const char* str)
Matches a string eagerly, consuming characters one at a time. Does not require random access.
Input stream (only requires operator* and operator++)
True if string matches, false otherwise
Unlike match(), this function consumes characters even if the match ultimately fails. Use this for forward-only iterators like StreamBuffer.
Complete parsing example
Example of parsing a simplified CNF file:
#include "minisat/utils/ParseUtils.h"
#include <zlib.h>
#include <stdio.h>
using namespace Minisat;
void parseCNF(const char* filename) {
gzFile in = gzopen(filename, "rb");
if (in == NULL) {
fprintf(stderr, "ERROR: Could not open file: %s\n", filename);
return;
}
StreamBuffer stream(in);
int num_vars = 0;
int num_clauses = 0;
// Parse header
while (!isEof(stream)) {
skipWhitespace(stream);
if (*stream == 'c') {
// Comment line
skipLine(stream);
} else if (*stream == 'p') {
// Problem line: p cnf <vars> <clauses>
++stream;
skipWhitespace(stream);
// Match "cnf"
if (!eagerMatch(stream, "cnf")) {
fprintf(stderr, "ERROR: Expected 'cnf' in problem line\n");
gzclose(in);
return;
}
num_vars = parseInt(stream);
num_clauses = parseInt(stream);
skipLine(stream);
break;
} else {
++stream;
}
}
printf("Variables: %d, Clauses: %d\n", num_vars, num_clauses);
// Parse clauses
for (int i = 0; i < num_clauses; i++) {
skipWhitespace(stream);
// Skip comments
while (*stream == 'c') {
skipLine(stream);
skipWhitespace(stream);
}
// Read literals until 0
while (true) {
int lit = parseInt(stream);
if (lit == 0) break;
// Process literal
printf("Literal: %d\n", lit);
}
}
gzclose(in);
}
int main() {
parseCNF("example.cnf");
return 0;
}
Template specialization
All parsing functions work with two main types:
Efficient buffered reading from gzip files. Use for file parsing.
Direct string parsing. Use for command-line argument parsing.
Example with const char*
const char* str = " -verbosity=2";
skipWhitespace(str);
if (match(str, "-verbosity=")) {
int value = parseInt(str);
printf("Verbosity: %d\n", value);
}
Integration with Options
The ParseUtils module is used internally by the Options system for parsing command-line arguments:
// From Options.h
virtual bool parse(const char* str) {
const char* span = str;
if (!match(span, "-") || !match(span, name) || !match(span, "="))
return false;
char* end;
int32_t tmp = strtol(span, &end, 10);
// ... validation and assignment
return true;
}