Skip to main content

Parsing Pipeline

Minishell processes user input through multiple stages before execution. The parsing system handles quotes, special characters, environment variables, and syntax validation.
1

Line Preparation

Add spacing around redirection operators
2

Input Validation

Check for unclosed quotes and invalid operator sequences
3

Tokenization

Split input into tokens while respecting quotes
4

Variable Expansion

Replace environment variables with their values
5

Quote Removal

Strip quotes from final command arguments

Line Preparation

The prepare_line() function ensures redirection operators are properly separated:
more_parsing.c:61
char *prepare_line(char *line, int i, int j)
{
    while (line[++i])
    {
        if ((line[i] == '<' || line[i] == '>') && line[i + 1]
            && (line[i + 1] != '<' && line[i + 1] != '>' && line[i + 1] != '|'
                && line[i + 1] != ' ' && line[i + 1] != '\0'
                && line[i + 1] != '\t' && line[i + 1] != '\n'
                && line[i + 1] != '\v'))
            j++;
    }
    if (j == 0)
        return (line);
    return (ft_repared_line(line, j, -1, 0));
}
This function transforms >file into > file to ensure the tokenizer treats the redirection operator and filename as separate tokens.

Input Validation

Before tokenization, the shell validates the input for common syntax errors.

Unclosed Quote Detection

The detectopenquotes() function ensures all quotes are properly closed:
parse_imput.c:93
int detectopenquotes(char *line)
{
    int     i;
    int     d_quotes;
    int     s_quotes;

    i = 0;
    d_quotes = 0;
    s_quotes = 0;
    while (line[i])
    {
        if (line[i] == '\"' && !s_quotes)
            d_quotes = !d_quotes;
        if (line[i] == '\'' && !d_quotes)
            s_quotes = !s_quotes;
        i++;
    }
    if (s_quotes || d_quotes)
        return (0);
    return (1);
}
Quotes only toggle their respective state when the other quote type is not active. This allows "test 'nested' quotes" to work correctly.

Pipe Validation

The check_wrong_pipes() function detects invalid pipe usage:
parse_imput.c:42
int check_wrong_pipes(char **commands)
{
    int i;
    int count;

    i = 0;
    count = 0;
    while (commands[i] != NULL)
    {
        if (!ft_strncmp(commands[i], "|", 1))
        {
            count++;
            if (i == 0 || commands[i + 1] == NULL || \
                !ft_strncmp(commands[i - 1], "<", 1) || \
                !ft_strncmp(commands[i - 1], "<<", 2) || \
                !ft_strncmp(commands[i - 1], ">", 1) || \
                !ft_strncmp(commands[i - 1], ">>", 2) || \
                !ft_strncmp(commands[i - 1], "|", 1))
                return (0);
        }
        i++;
    }
    return (1);
}
This rejects:
  • Pipes at the start: | cat
  • Pipes at the end: cat |
  • Consecutive pipes: cat || grep
  • Pipes after redirections: < file |

Redirection Validation

The check_wrong_redir() function validates redirection operators:
parse_imput.c:67
int check_wrong_redir(char **commands)
{
    int i;

    i = 0;
    while (commands[i] != NULL)
    {
        if (commands[i + 1] && (!ft_strncmp(commands[i], "<", 1)
                || !ft_strncmp(commands[i], "<<", 2)
                || !ft_strncmp(commands[i], ">", 1)
                || !ft_strncmp(commands[i], ">>", 2))
            && (!ft_strncmp(commands[i + 1], "<", 1)
                || !ft_strncmp(commands[i + 1], "<<", 2)
                || !ft_strncmp(commands[i + 1], ">", 1)
                || !ft_strncmp(commands[i + 1], ">>", 2)))
            return (0);
        i++;
    }
    // Check for trailing redirections
    if (i != 0 && (!ft_strncmp(commands[i - 1], "<", 1)
            || !ft_strncmp(commands[i - 1], "<<", 2)
            || !ft_strncmp(commands[i - 1], ">", 1)
            || !ft_strncmp(commands[i - 1], ">>", 2)))
        return (0);
    return (1);
}

Tokenization

The ft_split() function performs quote-aware tokenization:
split.c:87
char **ft_split(char const *s, char c)
{
    int     splits;
    int     i;
    int     len;
    char    **result;

    i = 0;
    splits = ft_num_word(s, c, i, i);
    result = (char **)malloc((splits + 1) * sizeof(char *));
    if (!result)
        return (NULL);
    while (i < splits)
    {
        while (*s == c)
            s++;
        len = ft_sub_len(s, c);
        result[i] = (char *)malloc((len + 1) * sizeof(char));
        if (!result[i])
            return (ft_free_mem(result, i));
        ft_strlcpy(result[i], s, len + 1);
        s = s + len;
        i++;
    }
    result[splits] = NULL;
    return (result);
}

Quote-Aware Word Counting

The ft_num_word() function counts words while treating quoted strings as single tokens:
split.c:30
size_t ft_num_word(char const *s, char c, size_t i, size_t num_word)
{
    num_word = 0;
    i = 0;
    while (s[i])
    {
        if (s[i] == '\'')
        {
            quotelen(s, &i, 1);
            num_word++;
        }
        else if (s[i] == '\"')
        {
            quotelen(s, &i, 2);
            num_word++;
        }
        else if (s[i] != c && (s[i + 1] == c || s[i + 1] == '\0'))
            num_word++;
        i++;
    }
    return (num_word);
}
The entire quoted string (including the quotes) is treated as a single word during tokenization. Quotes are removed later in the pipeline.

Environment Variable Expansion

The vars() function expands environment variables in each token:
env_parsed.c:107
char *vars(char *commands, t_prompt *env, int i, int x)
{
    int     s_quote;
    int     d_quote;
    char    *var_value;

    s_quote = 0;
    d_quote = 0;
    while (commands[i] != '\0')
    {
        if (ft_check_for_quotes(commands[i], &s_quote, &d_quote, &i) == 1)
            continue;
        if (commands[i] == '$' && s_quote == 0)
            x = ft_check_dolar_length(commands, &i);
        if (x != 0 && commands[i])
        {
            var_value = ft_get_var_value(commands, x, &i);
            var_value = ft_checkvar_value(var_value, env, 0);
            commands = ft_reasign(var_value, commands, &i, (x + 1));
        }
        i++;
        x = 0;
    }
    return (commands);
}

Variable Expansion Rules

Variables inside single quotes are NOT expanded:
echo '$HOME' → $HOME
Variables inside double quotes ARE expanded:
echo "$HOME" → /home/user
  • $? - Exit status of last command
  • $$ - Current shell PID
  • $0 - Shell name (“Minishell”)
env_parsed2.c:43
char *ft_asign_rare_value(char *var_value)
{
    char    *new_value;
    int     pid;

    pid = 0;
    new_value = NULL;
    if (var_value[0] == '$')
    {
        pid = getpid();
        new_value = ft_itoa(pid);
    }
    else if (var_value[0] == '0')
        new_value = ft_strdup("Minishell");
    else if (var_value[0] == '?')
        new_value = ft_itoa(g_status);
    free(var_value);
    return (new_value);
}
Undefined variables expand to empty strings:
env_parsed2.c:36
else
{
    free(var_value);
    return (ft_strdup("\"\""));
}

Quote Removal

After variable expansion, quotes are removed from the final command arguments:
parse_imput.c:131
void remove_quotes(char *imput, int i)
{
    int     d_quotes;
    int     s_quotes;

    d_quotes = 0;
    s_quotes = 0;
    while (imput[i])
    {
        if (imput[i] == '\"' && !s_quotes)
        {
            d_quotes = !d_quotes;
            ft_strlcpy(&imput[i], (const char *)&imput[i + 1],
                ft_strlen(&imput[i]));
            continue;
        }
        else if (imput[i] == '\'' && !d_quotes)
        {
            s_quotes = !s_quotes;
            ft_strlcpy(&imput[i], (const char *)&imput[i + 1],
                ft_strlen(&imput[i]));
            continue;
        }
        else
            i++;
    }
}
This function modifies the string in-place by shifting characters left to overwrite quote characters, maintaining the quote state to handle nested scenarios correctly.

Error Handling

Parsing errors set the global status and print error messages:
more_parsing.c:15
int ft_check_input(char *line)
{
    if (!detectopenquotes(line))
        return (g_status = 2, printf("syntax error: dquote\n"), 1);
    if (!check_pipe_redir(line, 0))
        return (g_status = 2, 1);
    return (0);
}
When syntax errors are detected, execution is aborted and the shell returns to the prompt with g_status set to 2.

Build docs developers (and LLMs) love