4. Parsing

4.1. Parsing Top

The top-level parsing function is twofold: it is designed to take in org markup, and then produce equivalent HTML output.

Each character of the string is fed through a state machine. The mode of the state machine determines what kind of formatting to add. HTML formatting is tag based. A change to a state indicates the start of a particular tag. A change from from a particular state indicates the end of that particular tag.

Text is written a block at a time using fwrite. Blocks are written any time a state changes. Blocks are also written at the end.

Parsing behavior will change based on the state of the flag mktoc. If enabled, the parser will be set up to generate a table of contents. Otherwise, it is just the regular parser.

4.1.1. The Parse Function

<<function_declarations>>=
static int parse(sorg_d *sorg, FILE *out, char *buf, size_t size);
<<parsing_top>>=
static int parse(sorg_d *sorg, FILE *out, char *buf, size_t size)
{
    sorg->pos = 0;
    sorg->off = 0;
    sorg->blksize = 0;
    if(mktoc) {
<<toc_parse_mode>>
    } else {
<<normal_parse_mode>>
    }
    return 1;
}

4.1.2. Normal Parsing Mode

This is the regular parsing mode, which converts all org syntax into HTML.

<<normal_parse_mode>>=
if(buf[0] == '\n') {
    if(sorg->pmode == MODE_TEXT) {
        fprintf(out,"</p>\n");
        sorg->pmode = MODE_NONE;
    }
    return 1;
}

while(sorg->pos < size) {
    switch(sorg->mode) {
        case MODE_NONE:
            if(is_comment(sorg, buf, size)) {
                sorg->mode = MODE_NONE;
                return 1;
            } if(is_header(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_header(sorg, buf, size, out);
                sorg->mode = MODE_NONE;
                sorg->pmode = sorg->mode;
                return 1;
            } else if(is_name(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_name(sorg, buf, size, out);
                sorg->mode = MODE_BEGIN_CODE;
                sorg->pmode = sorg->mode;
                return 1;
            } else if(is_title(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_title(sorg, buf, size, out);
                sorg->mode = MODE_NONE;
                sorg->pmode = sorg->mode;
                return 1;
            } else if(is_ulistitem(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_ulist_begin(sorg, out);
                print_ulist_item(sorg, buf, size, out);
                sorg->pmode = sorg->mode;
                sorg->mode = MODE_ULIST;
                return 1;
            } else {
                sorg->mode = MODE_TEXT;
                sorg->off = sorg->pos;
                sorg->blksize = 1;
                if(sorg->pmode != MODE_TEXT) fprintf(out, "<p>");
            }
            break;
        case MODE_TEXT:
<<parse_formatted>>
            if(is_link(sorg, &buf[sorg->pos], size - sorg->pos)) {
                print_text(sorg, buf, out);
                sorg->pos += print_link(sorg,
                                        &buf[sorg->pos],
                                        size - sorg->pos,
                                        out);
                sorg->blksize = 1;
                sorg->off = sorg->pos;
            } else if(is_block_ref(sorg, &buf[sorg->pos], size - sorg->pos)) {
                print_text(sorg, buf, out);
                sorg->pos += print_block_ref(sorg,
                                        &buf[sorg->pos],
                                        size - sorg->pos,
                                        out);
                sorg->blksize = 1;
                sorg->off = sorg->pos;
            } else {
                if(buf[sorg->pos] != '\n') sorg->blksize++;
                sorg->pos++;
            }
            break;
        case MODE_BEGIN_CODE:
            if(!is_begin(sorg, buf, size)) return 0;
            else {
                fprintf(out, "\n<p><pre><code>");
                sorg->mode = MODE_CODE;
                return 1;
            }
            break;
        case MODE_CODE:
            if(is_end(sorg, buf, size)) {
                sorg->mode = MODE_NONE;
                fprintf(out, "</code></pre></p>\n");
                return 1;
            }
            sorg->blksize++;
            switch(buf[sorg->pos]) {
                case '<':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, "<");
                    break;
                case '>':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, ">");
                    break;
                case '\n':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, "\n");
                    break;
                case ' ':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, " ");
                    break;
            }
            sorg->pos++;
            break;
        case MODE_ULIST:
            if(is_ulistitem(sorg, buf, size)) {
                print_ulist_item(sorg, buf, size, out);
                return 1;
            } else {
                sorg->pmode = sorg->mode;
                sorg->mode = MODE_NONE;
                print_ulist_end(sorg, out);
                parse(sorg, out, buf, size);
                return 1;
            }
            break;
    }
}

fwrite(buf + sorg->off, 1, sorg->blksize, out);

sorg->pmode = sorg->mode;
if(sorg->mode != MODE_CODE) {
    sorg->mode = MODE_NONE;
}

4.1.3. Table of Contents Parsing Mode

This mode is turned on with a switch from the command line flags.When enabled, the parser is only interested in parsing and printing headers. The rest, it skips.

<<toc_parse_mode>>=
if(buf[0] == '\n') {
    return 1;
}

if(is_title(sorg, buf, size)) {
    print_title(sorg, buf, size, out);
}else if(is_header(sorg, buf, size)) {
    print_header(sorg, buf, size, out);
}

4.2. Type Enum

<<enums>>=
enum {
<<types>>
    MODE_NONE
};

4.3. Headings

4.3.1. Check for Header

Headers in org-mode always start at the beginning of the line, and must have a number of stars, followed by a space. This is checked for with the function is_header.

<<function_declarations>>=
static int is_header(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_header(sorg_d *sorg, char *buf, size_t size)
{
    size_t s;

    if(size == 0) return 0;

    if(buf[0] != '*') return 0;

    for(s = 1; s < size; s++) {
        if(buf[s] == '*') {
            continue;
        } else if(buf[s] == ' ') {
            return 1;
        } else {
            return 0;
        }
    }
    return 0;
}

4.3.2. Print Header

If the line is indeed a header, the parser will scan the line again, this time counting the number of stars before the space indicating the header level. (Note: Since the title is reserved for header 1, the largest heading size starts at level 2.)

The header will then treat the rest of the line as the header text. This action is done with the function print_header.

4.3.2.1. Print Header Function

<<function_declarations>>=
static void print_header(sorg_d *sorg, char *buf, size_t size, FILE *out);
<<functions>>=
static void print_header(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t s;
    size_t off;
    int mode;
    int level;
    off = 0;
    mode = 0;
    level = 1;
    for(s = 0; s < size; s++) {
        if(mode == 2) break;
        switch(mode) {
            case 0: /* asterisks */
                if(buf[off] != '*') {
                    mode = 1;
                    off++;
                    break;
                }
                level++;
                off++;
                break;
             case 1: /* spaces */
                if(buf[off] != ' ') {
                    mode = 2;
                    break;
                }
        }
    }

    if(buf[size - 1] == '\n') size--; /* chomp */

<<update_section_depth>>
    if(mktoc) {
<<print_toc_header>>
    } else {
<<print_normal_header>>
    }
}

It inside the function print_header that the section counter is updated. This overloads the functionality of print_header, but it is a quick solution to the problem.

Level in this parser starts at header level 2, so the value needs to be subtracted by 2 in order to work with the internal section counters.


<<update_section_depth>>=
update_section_depth(sorg, level - 2);
4.3.2.2. Normal Header Printing

<<print_normal_header>>=
fprintf(out, "<a id=\"");
spaces_to_underscores(buf + off, size - off, out);
fprintf(out, "\">\n");
if(level < 6) {
    fprintf(out, "<h%d>", level);
    if(use_secno) print_section(sorg, out);
    fwrite(buf + off, 1, size - off, out);
    fprintf(out, "</h%d>\n", level);
} else {
    fprintf(out, "<u><i>");
    if(use_secno) print_section(sorg, out);
    fwrite(buf + off, 1, size - off, out);
    fprintf(out, "</i></u>\n");
}
fprintf(out, "</a>\n");
4.3.2.3. TOC Header Printing

A TOC section heading in HTML is a list item in an unordered list. HTML supports nested lists with automatic indentation, which makes presenting the hierarchy a little more elegant (no need to compute indentations manually!). However, the program needs to be responsible when to start and end unordered lists. To do this, we need to keep track of the previous section depth as well as the current one. When the depth changes, one has to update the unordered lists.

A new unordered list will only begin when there is a new subsection created, so this only occurs when the current depth is greater than the previous one.

When the depth goes up a level, this indicates that the currently populated list needs to be closed with an end tag. However, sometimes the depth will go up more than one level, which means that each of those lists will need to be closed. For this reason, a variable number of end tags need to be printed. The number of necessary closed tags needed is obtained from getting the difference between the current depth and the previous depth. (Note: I missed this edge case on the first go).

<<print_toc_header>>=
if(sorg->depth > sorg->pdepth) {
    fprintf(out, "<ul>");
} else if(sorg->depth < sorg->pdepth) {
    for(s = 0; s < (sorg->pdepth - sorg->depth); s++)
    fprintf(out, "</ul>");
}
fprintf(out, "<li><a href=\"%s#", indxfile);
spaces_to_underscores(buf + off, size - off, out);
fprintf(out, "\">");
if(use_secno) print_section(sorg, out);
fwrite(buf + off, 1, size - off, out);
fprintf(out, "</a></li>\n");

4.4. Titles

Titles are declared using the TITLE tag in org-mode. They should only be used once per document, and it is expected that they are declared at the top of the file.

<<function_declarations>>=
static int is_title(sorg_d *sorg, const char *buf, size_t size);
<<functions>>=
static int is_title(sorg_d *sorg, const char *buf, size_t size)
{
    const char *title = "#+TITLE:";
    size_t title_size = 8;
    size_t s;

    if(size < title_size) return 0;

    for(s = 0; s < title_size; s++) {
        if(buf[s] != title[s]) return 0;
    }
    return 1;
}
<<function_declarations>>=
static void print_title(sorg_d *sorg, const char *buf, size_t size, FILE *out);
<<functions>>=
static void print_title(sorg_d *sorg, const char *buf, size_t size, FILE *out)
{
    size_t s;
    int mode;
    const char *title;
    size_t tsize;
    mode = 1;
    tsize = 0;
    title = NULL;
    for(s = 8; s < size; s++) {
        if(mode == 0) break;
        switch(mode) {
            case 1: /* spaces */
                if(buf[s] == ' ') {
                    continue;
                }
                mode = 2;
                tsize = 1;
                title = &buf[s];
                break;
            case 2:
                if(buf[s] == '\n') {
                    mode = 0;
                    break;
                }
                tsize++;
                break;
        }
    }

    if(title != NULL) {
        fprintf(out, "<h1>");
        fwrite(title, 1, tsize, out);
        fprintf(out, "</h1>\n");
    }
}

4.5. Plain Text

Plain text can be roughly defined as stuff in between headers that isn't a code block. In this mode, the parser looks for Formatted Text.

<<types>>=
MODE_TEXT,

4.5.1. Writing a block of text

A block of text is written using the function |print_text|.

<<function_declarations>>=
void print_text(sorg_d *sorg, char *buf, FILE *out);

This code is a wrapper around |fwrite|.

<<functions>>=
void print_text(sorg_d *sorg, char *buf, FILE *out)
{
    if(sorg->blksize > 0) {
        fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
    }
}

4.6. Code Blocks

All code blocks are assumed to begin with the NAME tag with the code block immediately following it on the next line.

A NAME tag is found while the state machine is in NONE mode. It is done using the function is_name.

<<function_declarations>>=
static int is_name(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_name(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+NAME:";
    const int name_size = 7;
    size_t s;

    if(size < name_size) return 0;

    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }

    return 1;
}

If a NAME tag is found, this changes the state machine to only look for the beginning of a code block.

<<types>>=
MODE_BEGIN_CODE,

The NAME tag will also be printed to screen. This is a distinct difference between the regular emacs org-mode exporter and this one (and an important one!) Emacs will not export the name of the code block, which leads to more confusing readability!

In addition to printing the name itself, a link target will be printed as well. To distinguish them from section targets, code sections will be prepended with an underscore '_'.

<<function_declarations>>=
static void print_name(sorg_d *sorg, char *buf, size_t size, FILE *out);
<<functions>>=
static void print_name(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    int mode;
    size_t name_size;
    size_t n;
    size_t off;
    buf+=7;
    size -= 7;
    name_size = 0;
    mode = 1;

    off = 0;

    for(n = 0; n < size; n++) {
        if(mode == 0) break;
        switch(mode) {
            case 1: /* spaces */
                if(buf[n] == ' ') {
                    break;
                }
                off = n;
                mode = 2;
                name_size = 1;
                break;
            case 2: /* count to line break*/
                if(buf[n] == '\n' || buf[n] == ' ') {
                    mode = 0;
                    break;
                }
                name_size++;
                break;
        }
    }
    fprintf(out, "<a id=\"_");
    spaces_to_underscores(buf + off, name_size, out);
    fprintf(out, "\">\n");
    fprintf(out, "<div><b><i><<");
    fwrite(buf + off, 1, name_size, out);
    fprintf(out, "</b></i>>>=</div>\n");
    fprintf(out, "</a>");
}

Code blocks begin and end with BEGIN_SRC and END_SRC tags. When a new code block declaration begins, the mode is set to be in code mode.

<<types>>=
MODE_CODE,

The BEGIN_SRC tag is checked with the function is_begin.

<<function_declarations>>=
static int is_begin(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_begin(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+BEGIN_SRC";
    const int name_size = 11;
    size_t s;

    if(size < name_size) return 0;

    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }

    return 1;
}

Lines will remain in code mode until the END_SRC tag is found.

The END_SRC tag is found using the function is_end.

<<function_declarations>>=
static int is_end(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_end(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+END_SRC";
    const int name_size = 9;
    size_t s;

    if(size < name_size) return 0;

    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }

    return 1;
}

4.7. Code Block References

Code block references are hyperlink references to named code blocks in text. This formatting is identical to how it appears in the codeblock code.

A code block reference can be found via is_block_ref.

<<function_declarations>>=
static int is_block_ref(sorg_d *sorg, char *buf, size_t size);

The code for this has been heavily copied from the function is_link.


<<functions>>=
static int is_block_ref(sorg_d *sorg, char *buf, size_t size)
{
    size_t n;

    if(size < 5) return 0;

    if(buf[0] != '<' || buf[1] != '<') return 0;

    size -= 1; /* for lookahead */

    for(n = 2; n < size; n++) {
        if(buf[n] == '>' && buf[n + 1] == '>') return 1;
    }

    return 0;
}

A code block reference is printed with the function print_block_ref. This will print a hyperlink to the targetted codeblock.

<<function_declarations>>=
static size_t print_block_ref(sorg_d *sorg, char *buf, size_t size, FILE *out);

Similar to is_block_ref, print_block_ref is copied from print_link. As mentioned in the code block section,a block reference will always be prepended with an underscore "_".

<<functions>>=
static size_t print_block_ref(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t off;
    size_t n;
    char *name;
    size_t name_size;
    size_t tmp;


    size -= 1; /* for lookahead */

    off = 2;
    name_size = 0;
    tmp = 0;
    name = &buf[2];
    for(n = 2; n < size; n++) {
        off++;
        tmp++;
        if(buf[n] == '>' && buf[n + 1] == '>') {
            name_size = tmp - 1;
            n++;
            off++;
            break;
        }
    }
    fprintf(out, "<a href=\"#_");
    spaces_to_underscores(name, name_size, out);
    fprintf(out, "\"><<");
    fwrite(name, 1, name_size, out);
    fprintf(out, ">></a>");
    return off;
}

4.8. Formatted Text

All formatted text can be generally described as encapsulated text. Each formatting is surrounded by a special character (or characters). Because they are so similar, this kind of parsing can be generalized.

4.8.1. The core mechanic

First, the parser checks to see if an item is encapsulated by a particular delimeter delim of size dsize. If there are matching delimeters on the same line, then the text between is is considered to be encapsulated.

NOTE: for now, dsize only works with a size of 1.

<<function_declarations>>=
static int is_encapsulated(sorg_d *sorg,
                    const char *delim,
                    int dsize,
                    char *buf,
                    size_t size);
<<functions>>=
static int is_encapsulated(sorg_d *sorg,
                    const char *delim,
                    int dsize,
                    char *buf,
                    size_t size)
{
    size_t s;
    if(buf[0] != delim[0]) return 0;
    size = size - 1;
    for(s = 1; s < size; s++) if(buf[s] == delim[0]) return 1;
    return 0;
}

Once encapsulated text is found, the encapsulated text is printed to screen. In HTML, this means they are between HTML tags of a certain label. This function returns how many characters it parsed, so the parser knows to skip over them.

<<function_declarations>>=
static size_t print_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out);

This function extracts the text in between the two delimiters and then prints it between the specified HTML tags. The trick with printing the text is factoring in offsets for both sets of delimiters. To avoid the leftmost delimiter, offset by the size of the delimiter. To avoid the rightmost delimiter, negate the size by two times the delimiter to compensate for both the rightmost and leftmost delimiter.

<<functions>>=
static size_t print_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out)
{
    size_t off;
    size_t s;
    off = 1;
    size = size - 1;
    for(s = 1; s < size; s++) {
        off++;
        if(buf[s] == delim[0]) {
            break;
        }
    }
    fprintf(out, "<%s>", tag);
    fwrite(buf + dsize, 1, off - 2*dsize, out);
    fprintf(out, "</%s>", tag);
    return off;
}

In the parse loop, both is_encapsulated and print_encapsulatedcan be combined together in a function called chk_encapsulated. The chk is short for check, and the abbreviation only used to make sure the function call fits on the same line.

On success, the function will return "true", causing the function to break out of the chain of if statments.

<<function_declarations>>=
static int chk_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out);
<<functions>>=
static int chk_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out)
{
    if(is_encapsulated(sorg,
                        delim,
                        dsize,
                        &buf[sorg->pos],
                        size - sorg->pos))
    {
        fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
        sorg->pos += print_encapsulated(sorg,
                                        delim,
                                        dsize,
                                        &buf[sorg->pos],
                                        size - sorg->pos,
                                        tag,
                                        out);
        sorg->blksize = 1;
        sorg->off = sorg->pos;
        return 1;
    }

    return 0;
}

4.8.2. Format Styles

4.8.2.1. Bold

<<parse_formatted>>=
if(chk_encapsulated(sorg, "*", 1, buf, size, "b", out)) break;
4.8.2.2. Italics

<<parse_formatted>>=
if(chk_encapsulated(sorg, "/", 1, buf, size, "i", out)) break;
4.8.2.3. Preformatted

<<parse_formatted>>=
if(chk_encapsulated(sorg, "=", 1, buf, size, "code", out)) break;
4.8.2.4. Underline

<<parse_formatted>>=
if(chk_encapsulated(sorg, "_", 1, buf, size, "u", out)) break;
4.8.2.5. TeX math mode

This is not yet implemented.

4.9. Ordered Lists

This is not yet implemented.

4.10. Unordered Lists

An unordered list is a list whose entries are noted by dashes "-". In Org Mode, unordered lists can have hierarchy via indentation. This will be implemented later.

4.10.1. Unordered List mode

<<types>>=
MODE_ULIST,

When a list is first found out of MODE_NONE mode, it is set to be MODE_ULIST, or unordered list mode. In this mode, list items will be added until there are no more items. When this happens, it sets the mode back to MODE_NONE and re-reads the line.

4.10.2. Checking for list

A line that begins with the characters "- " (dash space) are considered to be the start of a list. If the line has a dash with no spaces, it is considered to be an ordered list.

This check is done using the function is_ulistitem#+NAME: function_declarations

static int is_ulistitem(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_ulistitem(sorg_d *sorg, char *buf, size_t size)
{
    if(size == 0) return 0;
    else if(buf[0] != '-') return 0;
    else if(size == 2 && buf[0] == '-') return 1;
    else if(buf[0] == '-' && buf[1] == ' ') return 1;
    return 0;
}

4.10.3. Printing the list item

There are different components of a list that need to be printed: the beginning of a list, the end of a list, and a list item.

The beginning of a list prints the HTML tag "ul" via print_ulist_begin.

<<function_declarations>>=
static void print_ulist_begin(sorg_d *sorg, FILE *out);
<<functions>>=
static void print_ulist_begin(sorg_d *sorg, FILE *out)
{
    fprintf(out, "<ul>\n");
}

The end of the list will print the corresponding end tag to "ul" via print_list_end.

<<function_declarations>>=
static void print_ulist_end(sorg_d *sorg, FILE *out);
<<functions>>=
static void print_ulist_end(sorg_d *sorg, FILE *out)
{
    fprintf(out, "</ul>\n");
}

An item of an unordered list is printed via print_ulist_item.

<<function_declarations>>=
static void print_ulist_item(sorg_d *sorg, char *buf, size_t size, FILE *out);

print_ulist_item will begin the item information after the initial dashes and any immediate whitespace proceding it. It will end right before the line break.

This is an interesting function because it recursively calls the top-level parse function in order to format the item string. This has caused some initial bugs to surface. Inside of MODE_TEXT, previous blocks of text get written before any formattings happen. This parser function gets called directly into MODE_TEXT, instead of being called from MODE_NONE. We do this because we only want to limit the parsing scope to things inside of text. This is a break from previous assumptions. Up to this point, it has always been assumed that there is always a block sizes of at least one. Now there are cases where there are empty block sizes, and nothing to handle for that. For some reason, blocks in text mode are printed N-1. When N is 0, you get negative values which results in undefined behavior. To combat this, the function print_text does a zero check.


<<functions>>=
static void print_ulist_item(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t s;
    int mode;
    size_t off;
    size_t len;
    size_t s_blksize;
    size_t s_off;
    size_t s_pos;

    mode = 1;
    off = 1; /* assume first character is '-' */
    len = 0;

    for(s = 1; s < size; s++) {
        if(mode == 0) break;
        switch(mode) {
            case 1: /* whitespaces after initial dash */
                if(buf[s] != ' ') mode = 2;
                else off++;
                break;
            case 2: /* look for linebreak */
                if(buf[s] == '\n') {
                    mode = 0;
                }
                len++;
                break;
        }
    }

    fprintf(out, "<li><p>");
    sorg->pmode = MODE_NONE;
    sorg->mode = MODE_TEXT;
    s_off = sorg->off;
    s_blksize = sorg->blksize;
    s_pos = sorg->pos;
    parse(sorg, out, buf + off, len + 1);
    sorg->off = s_off;
    sorg->blksize = s_blksize;
    sorg->pos = s_pos;

    sorg->mode = MODE_ULIST;
    sorg->pmode = MODE_ULIST;
    /* fwrite(buf + off, len, 1, out); */
    fprintf(out, "</p></li>\n");
}

4.11. TODO "TODO" headings

This is not yet implemented.

4.12. Links

A link can be found in Sorg using the function is_link.

<<function_declarations>>=
static int is_link(sorg_d *sorg, char *buf, size_t size);
<<functions>>=
static int is_link(sorg_d *sorg, char *buf, size_t size)
{
    size_t n;

    if(size < 5) return 0;

    if(buf[0] != '[' || buf[1] != '[') return 0;

    size -= 1; /* for lookahead */

    for(n = 2; n < size; n++) {
        if(buf[n] == ']' && buf[n + 1] == ']') return 1;
    }

    return 0;
}
<<function_declarations>>=
static size_t print_link(sorg_d *sorg, char *buf, size_t size, FILE *out);

Links come in two forms. The first form contains both the link location and named description. The second form only contains the link location which is also the visible named description. If the "][" combination was not found in the buffer, it is assumed to be the second morse terse type. When this happens, the name and name_size are set to be identical to link and link_size.

<<functions>>=
static size_t print_link(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t off;
    size_t n;
    char *link;
    size_t link_size;
    char *name;
    size_t name_size;
    size_t tmp;


    size -= 1; /* for lookahead */

    off = 2;
    name_size = 0;
    link_size = 0;
    tmp = 0;
    link = NULL;
    name = NULL;
    link = &buf[2];
    for(n = 2; n < size; n++) {
        off++;
        tmp++;
        if(buf[n] == ']' && buf[n + 1] == '[') {
            link_size = tmp - 1;
            tmp = 0;
            name = &buf[n + 2];
            n++;
            off++;
        } else if(buf[n] == ']' && buf[n + 1] == ']') {
            name_size = tmp - 1;
            if(name == NULL) { /* name not set, assume type 2 link */
                link_size = name_size;
                name = link;
            }
            off++;
            break;
        }
    }
    if(!strncmp(link, "https://", 8) || !strncmp(link, "http://", 7)) {
        fprintf(out, "<a href=\"");
        fwrite(link, 1, link_size, out);
    } else {
        fprintf(out, "<a href=\"#");
        spaces_to_underscores(link, link_size, out);
    }
    fprintf(out, "\">");
    fwrite(name, 1, name_size, out);
    fprintf(out, "</a>");
    return off;
}

4.13. Comments

A comment in org-mode is denoted by "# " (space included).

<<function_declarations>>=
static int is_comment(sorg_d *sorg, char *line, size_t size);

In addition for checking for the pattern "# ", the is_commentfunction will also check if the line contains only a "#". This will be interpretted as a comment as well.

<<functions>>=
static int is_comment(sorg_d *sorg, char *line, size_t size)
{
    if(size <= 0) return 0;
    if(size == 2 && line[0] == '#') return 1;
    if(line[0] == '#' && line[1] == ' ') return 1;
    return 0;
}



prev | home | next