6. Parsing

6.1. Top Level Parse (orgparse_run)

Parsing is done using the function orgparse_run. All mutable variables are declared locally, so it should be feasible to run nested calls to this function.

orgparse_run expects a properly intialized + set instance of orgparse, a text block, the text block size, and any user data.

<<funcdefs>>=

void orgparse_run(orgparse *op,
                  const char *txt,
                  size_t sz,
                  void *ud);

<<functions>>=

void orgparse_run(orgparse *op,
                  const char *txt,
                  size_t sz,
                  void *ud)
{
    orgparse_state state;
    orgparse_init_and_run(op, txt, sz, ud, &state);
    orgparse_end(op, ud, &state);
}

<<funcdefs>>=

void orgparse_init_and_run(orgparse *op,
                           const char *txt,
                           size_t sz,
                           void *ud,
                           orgparse_state *state);

<<functions>>=

void orgparse_init_and_run(orgparse *op,
                           const char *txt,
                           size_t sz,
                           void *ud,
                           orgparse_state *state)
{
    orgparse_state_init(state, op, txt, sz, ud);
    orgparse_state_run(state);
}

6.2. Orgparse State

To make orgparse re-entrant, all mutable data is stored in a state.

6.2.1. Orgparse Struct

All mutable orgparse data is contained inside of a struct called orgparse_state, which gets instantiated with every call to orgparse_run. This allows orgparse_run to be re-entrant, which is a needed requirement in order for weewiki's scripting capabilities to really work.

6.2.1.1. Struct Declarations

<<typedefs>>=

typedef struct orgparse_state orgparse_state;

<<structs>>=

<<orgparse_flags_struct>>
struct orgparse_state {
    orgparse *op;
    const char *txt;
    size_t sz;
    void *ud;
    size_t pos;
    const char *blk;
    size_t len;
<<state_flags_in_struct>>
};

6.2.1.2. Flags Struct

The flags struct is a component of the orgparse_state. These flags are used to determine position the parser state machine.

<<typedefs>>=

typedef struct orgparse_state_flags orgparse_state_flags;

6.2.1.2.1. The Flags

The mode flag is used to determine the large scale parsing mode, and is used to do general org-mode parsing or aux block parsing.

The txtmode flag is boolean value set every time the parser goes to parse a text block. It is used to help indicate when a new paragraph block should begin.

The newline flag is used to determine whether or not a newline character has been found. This is used to add explicit breaks, as well as start new paragraph blocks.

<<orgparse_flags_struct>>=

struct orgparse_state_flags {
    int mode;
    int txtmode;
    int newline;
};

6.2.1.2.2. Flags in orgparse state

The orgparse_state struct has two orgparse_state_flagsvariables: an internal flags value iflags, and a pointer value flags. By default, flags points to the internal value, but this can be overridden to be an external value. The reasoning for having this feature is to allow more seamless org code parsing generation in the Janet scriping engine via the org command.

<<state_flags_in_struct>>=

orgparse_state_flags *flags;
orgparse_state_flags iflags;

To override flags, use the orgparse_state_flags_setfunction. To get the internal flags, use orgparse_state_flags_get.

<<funcdefs>>=

orgparse_state_flags *orgparse_state_flags_get(orgparse_state *s);
void orgparse_state_flags_set(orgparse_state *s,
                              orgparse_state_flags *f);

<<functions>>=

orgparse_state_flags *orgparse_state_flags_get(orgparse_state *s)
{
    return s->flags;
}
void orgparse_state_flags_set(orgparse_state *s,
                              orgparse_state_flags *f)
{
    s->flags = f;
}

6.2.1.3. Struct Init

<<funcdefs>>=

void orgparse_state_init(orgparse_state *state,
                         orgparse *op,
                         const char *txt,
                         size_t sz,
                         void *ud);

<<functions>>=

void orgparse_state_init(orgparse_state *state,
                         orgparse *op,
                         const char *txt,
                         size_t sz,
                         void *ud)
{
    state->op = op;
    state->txt = txt;
    state->sz = sz;
    state->pos = 0;
    state->flags = &state->iflags;
    state->flags->mode = 0;
    state->flags->txtmode = 0;
    state->len = 0;
    state->ud = ud;
    state->blk = NULL;
    state->flags->newline = 0;
}

6.2.1.4. Size

<<funcdefs>>=

size_t orgparse_state_size(void);

<<functions>>=

size_t orgparse_state_size(void)
{
    return sizeof(orgparse_state);
}

6.2.2. State Run

<<funcdefs>>=

void orgparse_state_run(orgparse_state *state);

<<functions>>=

void orgparse_state_run(orgparse_state *state)
{
    int txtmode;
    size_t sz;

    txtmode = 0;

    sz = state->sz;

    if (state->txt[sz - 1] == '\0') {
        sz--;
    }

    for (state->pos = 0; state->pos < sz; state->pos++) {
        txtmode = 0;
        if (state->flags->mode == 0) {
            if (parse_comment(state)) {
                continue;
            }
            if (parse_codeblock(state)) {
                continue;
            } else if (parse_header(state)) {
                continue;
            } else if (is_cmd(state)) {
                if (parse_name(state)) {
                    continue;
                } else if (parse_title(state)) {
                    continue;
                } else {
                    parse_ucmd(state);
                    continue;
                }
            } else {
                if (!state->flags->txtmode) {
                    if (state->pos < (state->sz - 1)) {
                        orgparse_pgrph_begin(state->op,
                                            state->ud);
                        state->flags->newline = 0;
                    }
                }
                txtmode = 1;
                parse_text(state);
            }
        } else if (state->flags->mode == 1) {
            parse_codeblock(state);
        } else if (state->flags->mode == 2) {
            if (state->flags->txtmode) txtmode = 1;
            if (parse_aux_doiend(state)) {
<<end_the_aux_block>>
            } else {
<<increment_aux_line>>
            }
        }
        state->flags->txtmode = txtmode;
    }
}

6.3. Parse Header

<<static_funcdefs>>=

static int parse_header(orgparse_state *state);

<<functions>>=

static int parse_header(orgparse_state *state)
{
    const char *header;
    size_t n;
    size_t start;
    int mode;
    int lvl;
    size_t hsz;
    int rc;

    orgparse *op;
    const char *str;
    size_t sz;
    void *ud;
    size_t *pos;

    op = state->op;
    str = state->txt;
    sz = state->sz;
    ud = state->ud;
    pos = &state->pos;

    if(sz - *pos < 2) return 0;
    if (str[*pos] != '*') return 0;

    start = *pos;
    mode = 0;
    lvl = 0;
    rc = 0;
    for (n = start; n < sz; n++) {
        if (mode == 2) break;
        switch (mode) {
            case 0:
                if (str[n] == '*') {
                    lvl++;
                    continue;
                } else if (str[n] == ' ') {
                    n++;
                    header = &str[n];
                    hsz = 1;
                    mode = 1;
                    rc = 1;
                    if (n == (sz - 1)) {
                        *pos += n - start;
                        if (op->header != NULL) {
                            if (state->flags->txtmode) {
                                orgparse_pgrph_end(op, ud);
                                state->flags->txtmode = 0;
                            }
                            op->header(ud, header, hsz, lvl);
                        }
                    }
                } else {
                    mode = 2;
                    rc = 0;
                }
                break;
            case 1:
                if (str[n] == '\n') {
                    mode = 2;
                    *pos += n - start;
                    if (op->header != NULL) {
                        if (state->flags->txtmode) {
                            orgparse_pgrph_end(op, ud);
                            state->flags->txtmode = 0;
                        }
                        op->header(ud, header, hsz, lvl);
                    }
                    rc = 1;
                    break;
                }
                hsz++;
                break;
        }
    }

    return rc;
}

6.4. Parse Text

6.4.1. Parse Text Callback

Text is defined by what it isn't. If it isn't a header or code block, it is probably text. For this reason, the text block is a "last resort".

Anything that is not a header or code block is generally considered to be a text block. Text can span multiple lines, and can contain special formatting tags, for things like bold text, italic text, and text as well. In addition, text can also contain links.

When a line is determined to not be anything else (header, codeblock, aux block, etc), it is sent in to be parsed as text.

The text block parser will read lines until it hits stuff that is non-text. Along the way, the text will check for formatting tags and links on a line by line basis.

6.4.1.1. Main Callback

<<static_funcdefs>>=

static int parse_text(orgparse_state *state);

Some clarification details on what mark variable does. mark is used to save the beginning of a chunk of unformatted text. It is called "mark" because it is marking a location. When formatted text is found, it will use this marked position to handle the text preceding the formatted text, before handling the formatted text. The marked position can then be updated to point to text after the formatted text.

The mark variable is also used in line break logic. When a line break is found and the mark and text positions match, which means a line break has been found at the start of a line. For some reason, I decided to use mark instead of start. I think this has to do with the logic found in check_and_parse, but I'm unsure.

<<functions>>=

static int parse_text(orgparse_state *state)
{
    size_t n;
    size_t start;
    size_t mark;
    int rc;

    orgparse *op;
    const char *str;
    size_t sz;
    void *ud;
    size_t *pos;
    int *mode;
    const char **buf;
    size_t *len;

    op = state->op;
    str = state->txt;
    sz = state->sz;
    ud = state->ud;
    pos = &state->pos;
    mode = &state->flags->mode;
    buf = &state->blk;
    len = &state->len;

    rc = 0;
    start = *pos;
    mark = start;

    /* check for end of file */
    if (start == sz) return 0;

    for (n = start; n < sz; n++) {
        if (str[n] == '\n' || n == (sz - 1)) {
            rc = 1;
            if (n == mark) {
<<handle_newline_logic>>
            } else {
                if (op->text != NULL) {
                    /* +1 includes line break*/
                    op->text(ud,
                            &str[mark],
                            (n - mark) + 1);
                }
            }
            break;
        } else if (parse_aux_check(str, sz, &n)) {
<<break_and_begin_aux_block>>
        } else if (state->flags->newline) {
<<newline_begin_paragraph>>
        } else if (parse_bold(op, str, ud, sz, &n, &mark)) {
            continue;
        } else if (parse_code(op, str, ud, sz, &n, &mark)) {
            continue;
        } else if (parse_ul(op, str, ud, sz, &n, &mark)) {
            continue;
        } else if (parse_link(op, str, ud, sz, &n, &mark)) {
            continue;
        }
    }
    *pos += (n - start);
    return rc;
}

6.4.1.2. Handling Newline Logic

In org mode, line break characters "\n" can trigger different behaviors. This is managed via the newline flag.

The newline at the end of some text is used to indicate the end of a line. The text parser reads things a line at a time, so at that point it is an indicator to break out of text parser routine.

Newline behavior changes when the character is found without any other characters. The first newline is considered to mark the end of a paragraph block. Subsequent newlines are registered as line breaks.

<<handle_newline_logic>>=

if (!state->flags->newline) {
    orgparse_pgrph_end(op, ud);
    state->flags->newline = 1;
} else {
    if (op->newline != NULL) {
        op->newline(ud, NULL, 0);
    }
}

Text that begins after a single empty line break is considered to be a new paragraph. This is determined by checking to see if the newline flag has been set.

A bug was discovered where the parser skips the first character of the paragraph. This bug becomes noticeable when any kind of formatting is used at the beginning of a paragraph (such as bold text). To mitigate this, the character pointer (n) moves back one character. While this may introduce other bugs later (not sure), it does not seem to break any tests in the current test suite.

<<newline_begin_paragraph>>=

state->flags->newline = 0;
orgparse_pgrph_begin(op, ud);
n--; /* go backwards 1 */

6.4.2. Tag Check

This functionality will generically check a line for matching tags. If a tag is found, the end position is returned.

<<static_funcdefs>>=

static int tag_check(const char *txt,
                     size_t sz,
                     char tag,
                     size_t *len);

A small but important detail: tags in org mode work on a single line. If a line break is found before a matching tag, it breaks out.

Another detail: tags can't have spaces immediately after or before the tags. * this* is not a tag. *this * is also not a tag.

<<functions>>=

static int tag_check(const char *txt,
                     size_t sz,
                     char tag,
                     size_t *len)
{
    size_t n;
    int rc;
    if (sz <= 2) return 0;
    if (txt[0] != tag) return 0;
    if (txt[1] == ' ') return 0;

    rc = 0;


    for (n = 1; n < sz; n++) {
        if (txt[n] == '\n') {
            rc = 0;
            break;
        }

        if (txt[n] == tag) {
            if (txt[n - 1] == ' ') rc = 0;
            else rc = 1;

            *len = n - 1;
            break;
        }
    }
    return rc;
}

6.4.3. Check and Parse

Checks AND parses particular tag.

<<static_funcdefs>>=

static int check_and_parse(orgparse *op,
                           const char *str,
                           void *ud,
                           size_t sz,
                           size_t *pos,
                           size_t *mark,
                           char tag,
                           orgparse_text f);

Setting the mark and pos variables for mark and position took some trial and error to get right. At some point, I'll need to put in the time and figure this out better.

The gist of it as I understand it now: mark is used to handle text preceding formatted text found with check_and_parse, and pos is pointer to the current character in the parser function parse_text.

When a line break is found, the marker is shifted forward by one. In the next loop iteration in parse_text, the text position will also shift forward to match the marker, which is important, as this equality is needed to trigger a paragraph end if a consecutive line break is found.

<<functions>>=

static int check_and_parse(orgparse *op,
                           const char *str,
                           void *ud,
                           size_t sz,
                           size_t *pos,
                           size_t *mark,
                           char tag,
                           orgparse_text f)
{
    size_t n, m;
    int rc;
    size_t len;

    n = *pos;
    m = *mark;
    rc = 0;
    len = 0;

    if (tag_check(&str[n],
                  sz - m,
                  tag,
                  &len)) {
        if (op->text != NULL && n != m) {
            op->text(ud, &str[m], n - m);
        }
        if (f != NULL) {
            f(ud, &str[n + 1], len);
        }
        n += len + 2;
        m = n;

        /* move the marker up if current character is line break */
        if (n < sz && str[n] == '\n') m++;
        *pos = n;
        *mark = m;
        rc = 1;
    }

    return rc;
}

6.4.4. Parse Bold

This will check and parse bold text.

<<static_funcdefs>>=

static int parse_bold(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark);

<<functions>>=

static int parse_bold(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark)
{
    return check_and_parse(op,
                           str,
                           ud,
                           sz,
                           pos,
                           mark,
                           '*',
                           op->bold);
}

6.4.5. Parse Code.

This will check and parse code text.

<<static_funcdefs>>=

static int parse_code(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark);

<<functions>>=

static int parse_code(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark)
{
    return check_and_parse(op,
                           str,
                           ud,
                           sz,
                           pos,
                           mark,
                           '=',
                           op->code);
}

6.4.6. Parse Underline.

This will check and parse underline text.

<<static_funcdefs>>=

static int parse_ul(orgparse *op,
                    const char *str,
                    void *ud,
                    size_t sz,
                    size_t *pos,
                    size_t *mark);

<<functions>>=

static int parse_ul(orgparse *op,
                    const char *str,
                    void *ud,
                    size_t sz,
                    size_t *pos,
                    size_t *mark)
{
    return check_and_parse(op,
                           str,
                           ud,
                           sz,
                           pos,
                           mark,
                           '_',
                           op->underline);
}

6.4.7. Parse Link

6.4.7.1. Top Level Function

<<static_funcdefs>>=

static int parse_link(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark);

<<functions>>=

static int parse_link(orgparse *op,
                      const char *str,
                      void *ud,
                      size_t sz,
                      size_t *pos,
                      size_t *mark)
{
    int rc;
    const char *link;
    size_t link_sz;
    const char *name;
    size_t name_sz;
    size_t len;
    size_t n, m;

    rc = check_link(str, *pos, sz);

    if (!rc) return 0;

    link_sz = 0;
    name_sz = 0;
    len = 0;

    n = *pos;
    m = *mark;

    extract_link(str, n, sz,
                 &link, &link_sz,
                 &name, &name_sz,
                 &len);

    if (op->text != NULL && n != m) {
        op->text(ud, &str[m], n - m);
    }

    if (op->link != NULL) {
        op->link(ud, link, link_sz, name, name_sz);
    }

    n += len + 2;
    m = n;

    /* move the marker up if current character is line break */
    if (n < sz && str[n] == '\n') m++;

    *pos = n;
    *mark = m;

    return 1;
}

6.4.7.2. Check For Link

<<static_funcdefs>>=

static int check_link(const char *str,
                      size_t pos,
                      size_t sz);

<<functions>>=

static int check_link(const char *str,
                      size_t pos,
                      size_t sz)
{
    size_t n;

    if ((pos - sz) < 5) return 0;

    if (str[pos] != '[' || str[pos+1] != '[') return 0;

    sz -= 1; /* for lookahead */
    pos += 2;

    for (n = pos; n < sz; n++) {
        if (str[n] == '\n') return 0;
        if (str[n] == ']' && str[n + 1] == ']') return 1;
    }

    return 0;
}

6.4.7.3. Extract Link

<<static_funcdefs>>=

static void extract_link(const char *str,
                         size_t pos,
                         size_t sz,
                         const char **plink,
                         size_t *link_sz,
                         const char **pname,
                         size_t *name_sz,
                         size_t *len);

<<functions>>=

static void extract_link(const char *str,
                         size_t pos,
                         size_t sz,
                         const char **plink,
                         size_t *link_sz,
                         const char **pname,
                         size_t *name_sz,
                         size_t *len)
{
    size_t off;
    size_t n;
    const char *link;
    size_t link_size;
    const char *name;
    size_t name_size;
    size_t tmp;
    size_t start;


    sz -= 1; /* for lookahead */

    start = pos;
    pos += 2;

    off = 2;
    name_size = 0;
    link_size = 0;
    tmp = 0;
    link = NULL;
    name = NULL;
    link = &str[pos];
    for(n = pos; n < sz; n++) {
        off++;
        tmp++;
        if(str[n] == ']' && str[n + 1] == '[') {
            link_size = tmp - 1;
            tmp = 0;
            name = &str[n + 2];
            n++;
            off++;
        } else if(str[n] == ']' && str[n + 1] == ']') {
            name_size = tmp - 1;
            if(name == NULL) { /* name not set, assume type 2 link */
                link_size = name_size;
                name = link;
            }
            off++;
            break;
        }
    }
    *name_sz = name_size;
    *pname = name;
    *link_sz = link_size;
    *plink = link;
    *len = n - start;
}

6.5. Parse Comment

Any thing that starts with '# ' (hash + space) is considered to be a comment, and the line will be ignored.

<<static_funcdefs>>=

static int parse_comment(orgparse_state *state);

<<functions>>=

static int parse_comment(orgparse_state *state)
{
    size_t start;
    size_t n;
    const char *txt;
    size_t sz;
    size_t *pos;

    txt = state->txt;
    sz = state->sz;
    pos = &state->pos;

    if ((sz - *pos) < 2) return 0;
    if (txt[*pos] != '#') return 0;
    if (txt[*pos + 1] != ' ') return 0;

    start = *pos;

    for (n = start; n < sz; n++) {
        if (txt[n] == '\n') break;
    }

    /* TODO: this might need to be (n - start) + 1
     * I'm pretty sure this offset needs to include 
     * the line break. Otherwise, it will print weird
     * characters sometimes in ww-server.
     * AKA: the "extra period" bug
     */
    *pos += (n - start);

    return 1;
}

6.6. Parse Command

A 'command' in org mode refers to any line that starts with '#+'. After this, a string of alphanumeric non-space characters creates the command name. Following this is an arbitray number of spaces, followed by the command string.

The parse_cmd function will parse and extract the command, and command string (assuming it is indeed a command).

<<static_funcdefs>>=

static int parse_cmd(const char *str,
                     size_t sz,
                     const char **cmd,
                     size_t *cmd_len,
                     const char **cmdstr,
                     size_t *cmdstr_len,
                     size_t *total_len);

<<functions>>=

static int parse_cmd(const char *str,
                     size_t sz,
                     const char **cmd,
                     size_t *cmd_len,
                     const char **cmdstr,
                     size_t *cmdstr_len,
                     size_t *total_len)
{
    size_t n;
    const char *pcmdstr;
    const char *pcmd;
    size_t len;
    int mode;
    int rc;

    if (sz < 3) return 0;
    if (str[0] != '#') return 0;
    if (str[1] != '+') return 0;
    if (str[2] == ' ') return 0;


    rc = 0;

    mode = 0;

    pcmd = &str[2];
    pcmdstr = NULL;
    len = 0;

    /* zero out lengths */

    *total_len = 0;
    *cmdstr_len = 0;
    *cmd_len = 0;

    for (n = 2; n < sz; n++) {
        if (str[n] == '\n' || (n == (sz - 1))) {
            *cmdstr_len = len;
            if (mode == 0) {
                *cmd_len = len;
                *cmdstr_len = 0;
                rc = 1;
            }
            break;
        }
        switch (mode) {
            case 0: /* command string */
                if (str[n] == ' ') {
                    mode = 1;
                    *cmd_len = len;
                    len = 0;
                    rc = 1;
                    break;
                }
                len++;
                break;
            case 1:
                if (str[n] != ' ') {
                    mode = 2;
                    len = 1;
                    pcmdstr = &str[n];
                }
                break;
            case 2:
                len++;
                break;
        }
    }

    *cmdstr = pcmdstr;
    *cmd = pcmd;
    *total_len = n * rc;

    return rc;
}

is_cmd is used to check if the line is a command to begin with. This is used in the parser so that general command statements are ignored.

<<static_funcdefs>>=

static int is_cmd(orgparse_state *state);

<<functions>>=

static int is_cmd(orgparse_state *state)
{
    const char *str;
    size_t sz;

    sz = state->sz;
    str = &state->txt[state->pos];
    if (sz < 3) return 0;
    if (str[0] != '#') return 0;
    if (str[1] != '+') return 0;
    if (str[2] == ' ') return 0;
    return 1;
}

6.7. Parse User Command

Any command that isn't handled by Orgparse will get sent here. Eventually, a user-defined callback will optionally be able to parse this.

<<static_funcdefs>>=

static int parse_ucmd(orgparse_state *state);

<<functions>>=

static int parse_ucmd(orgparse_state *state)
{
    int rc;
    size_t start;
    size_t cmdlen;
    size_t cmdstrlen;
    const char *cmd;
    const char *cmdstr;
    size_t totallen;

    const char *txt;
    size_t sz;
    size_t *pos;

    txt = state->txt;
    sz = state->sz;
    pos = &state->pos;

    start = *pos;
    cmdlen = 0;
    cmdstrlen = 0;
    totallen = 0;
    rc = parse_cmd(&txt[start],
                   sz - start,
                   &cmd,
                   &cmdlen,
                   &cmdstr,
                   &cmdstrlen,
                   &totallen);
    if (rc) {
        /* TODO: add user-defined callback */
        (*pos) += totallen;
    }
    return rc;
}

6.8. Parse Codeblock

A codeblock is a set of lines smooshed between '#+BEGINSRC' tags.

<<static_funcdefs>>=

static int parse_codeblock(orgparse_state *state);

<<functions>>=

static int parse_codeblock(orgparse_state *state)
{
    int rc;
    size_t start;
    size_t cmdlen;
    size_t cmdstrlen;
    const char *cmd;
    const char *cmdstr;
    size_t totallen;
    size_t n;
    int new_block;

    orgparse *op;
    const char *txt;
    size_t sz;
    void *ud;
    size_t *pos;
    int *mode;
    const char **blk;
    size_t *blklen;

    op = state->op;
    txt = state->txt;
    sz = state->sz;
    ud = state->ud;
    pos = &state->pos;
    mode = &state->flags->mode;
    blk = &state->blk;
    blklen = &state->len;

    start = *pos;
    cmdlen = 0;
    cmdstrlen = 0;
    totallen = 0;
    rc = parse_cmd(&txt[start],
                   sz - start,
                   &cmd,
                   &cmdlen,
                   &cmdstr,
                   &cmdstrlen,
                   &totallen);
    new_block = 0;

    if (rc) {
        if (*mode  == 0) {
            if (!strncmp(cmd, "BEGIN_SRC", cmdlen)) {
                *pos += totallen;
                *blk = &txt[*pos + 1];
                *blklen = 0;
                *mode = 1;
                new_block = 1;
            } else {
                /* another command, not a codeblock */
                rc = 0;
            }
        } else if (*mode == 1) {
            if (!strncmp(cmd, "END_SRC", cmdlen)) {
                *pos += totallen;
                *mode = 0;
                if (op->codeblock != NULL) {
                    op->codeblock(ud, *blk, *blklen - 1);
                }
                *blk = NULL;
                *blklen = 0;
            }
        }
    }

    /* Read a line */
    if (*mode == 1 && !new_block) {
        rc = 1;
        for (n = start; n < sz; n++) {
            if (txt[n] == '\n') break;
        }
        *pos += (n - start);
        *blklen += (n - start) + 1;
    }

    return rc;
}

6.9. Parse Name

Parses a "#+NAME" command.

<<static_funcdefs>>=

static int parse_name(orgparse_state *state);

<<functions>>=

static int parse_name(orgparse_state *state)
{
    int rc;
    size_t start;
    size_t cmdlen;
    size_t cmdstrlen;
    const char *cmd;
    const char *cmdstr;
    size_t totallen;

    orgparse *op;
    const char *txt;
    size_t sz;
    void *ud;
    size_t *pos;

    op = state->op;
    txt = state->txt;
    sz = state->sz;
    ud = state->ud;
    pos = &state->pos;

    start = *pos;
    cmdlen = 0;
    cmdstrlen = 0;
    totallen = 0;
    rc = parse_cmd(&txt[start],
                   sz - start,
                   &cmd,
                   &cmdlen,
                   &cmdstr,
                   &cmdstrlen,
                   &totallen);
    if (rc) {
        if (!strncmp(cmd, "NAME:", cmdlen)) {
            if (op->name != NULL) {
                op->name(ud, cmdstr, cmdstrlen);
            }
            *pos += totallen;
        } else rc = 0;
    }

    return rc;
}

6.10. Parsing an Aux Block

An aux block is anything inside of a @! and !@. For WeeWiki, the idea here is to execute janet code for dynamic page content. Aux blocks can be both in-line and multiline. They start out inside of a text block, but have their own mode for multi-line processing (similar to code blocks).

6.10.1. Beginning a Block

6.10.1.1. Checking for an Aux Block

An aux block can be started at anytime inside of a text block. The parsing process here must check for any @!roaming around. This is wrapped inside of a function called parse_aux_check. It is called inside of parse_text.

<<static_funcdefs>>=

static int parse_aux_check(const char *str,
                           size_t sz,
                           size_t *pos);

At one point, this once also processed the aux block code, but the function ended up taking too many arguments! Now it only checks.

<<functions>>=

static int parse_aux_check(const char *str,
                           size_t sz,
                           size_t *pos)
{
    size_t n;

    n = *pos;

    if ((sz - n) < 4) return 0;
    if (str[n] != '@') return 0;
    if (str[n + 1] != '!') return 0;

    return 1;
}

6.10.1.2. Breaking Out of The Loop

If indeed an aux block has been found, the parsing routine will immediately break out of the loop and set itself up to be in aux block mode (mode 2). Any text up to this point is processed as well.

Return values aren't really being used right now, but the return value is being set to be 0 (false) to indicate that the the text has been short-circuited by an aux block.

The code below is done inside of the parse_text function.

<<break_and_begin_aux_block>>=

if (op->text != NULL && n != mark) {
    op->text(ud, &str[mark], n - mark);
}

if (state->flags->newline) {
    state->flags->newline = 0;
    orgparse_pgrph_begin(op, ud);
}

*mode = 2;
*len = 0;
*buf = &str[n + 2];
rc = 0;
break;

6.10.2. Ending it

Once the parse state machine is set to be in aux block mode, it can only be ended by finding a matching '!@' tag.

6.10.2.1. Do I end?

The only way a aux block ends is with the magic tag '!@'. This is checked with the function parse_aux_doiend.

<<static_funcdefs>>=

static int parse_aux_doiend(orgparse_state *state);

<<functions>>=

static int parse_aux_doiend(orgparse_state *state)
{
    if ((state->sz - state->pos) < 2) return 0;
    return (state->txt[state->pos] == '!' &&
            state->txt[state->pos + 1] == '@');
}

6.10.2.2. Ending The Aux Block

<<end_the_aux_block>>=

state->flags->mode = 0;
state->pos++;
if (state->op->aux != NULL) {
    state->op->aux(state->ud, state->blk, state->len - 1);
}
<<handle_auxblock_endofline>>

An edge case to handle: what happens when an aux block is at the end of a line? When this happens, the parser absorbs the newline character. If this doesn't happen, the text parser adds an extra newline.

<<handle_auxblock_endofline>>=

if ((state->pos + 1) < state->sz &&
    state->txt[state->pos + 1] == '\n') {
    state->pos++;
}

6.10.2.3. Adding to aux line

While in aux block mode, the character block length is increased one character at a time.

<<increment_aux_line>>=

state->len++;

6.11. Parse Title

Parses the TITLE commmand.

<<static_funcdefs>>=

static int parse_title(orgparse_state *state);

<<functions>>=

static int parse_title(orgparse_state *state)
{
    int rc;
    size_t start;
    size_t cmdlen;
    size_t cmdstrlen;
    const char *cmd;
    const char *cmdstr;
    size_t totallen;


    orgparse *op;
    const char *txt;
    size_t sz;
    void *ud;
    size_t *pos;

    op = state->op;
    txt = state->txt;
    sz = state->sz;
    ud = state->ud;
    pos = &state->pos;

    start = *pos;
    cmdlen = 0;
    cmdstrlen = 0;
    totallen = 0;
    rc = parse_cmd(&txt[start],
                   sz - start,
                   &cmd,
                   &cmdlen,
                   &cmdstr,
                   &cmdstrlen,
                   &totallen);
    if (rc) {
        if (!strncmp(cmd, "TITLE:", cmdlen)) {
            if (op->title != NULL) {
                op->title(ud, cmdstr, cmdstrlen);
            }
            *pos += totallen;

            if ((*pos + 1) < sz && txt[*pos + 1] == '\n') {
                (*pos) = (*pos) + 1;
            }
            return 1;
        }
    }

    return 0;
}

prev | home | next