6. Parsing
6.1. Top Level Parse (orgparse_run)
Parsing is done using the function orgparse_run
.
All mutable variables are declared locally, so it should
be feasible to run nested calls to this function.
orgparse_run
expects a properly intialized + set instance
of orgparse
, a text block, the text block size, and any
user data.
void orgparse_run(orgparse *op,
const char *txt,
size_t sz,
void *ud);
void orgparse_run(orgparse *op,
const char *txt,
size_t sz,
void *ud)
{
orgparse_state state;
orgparse_init_and_run(op, txt, sz, ud, &state);
orgparse_end(op, ud, &state);
}
void orgparse_init_and_run(orgparse *op,
const char *txt,
size_t sz,
void *ud,
orgparse_state *state);
void orgparse_init_and_run(orgparse *op,
const char *txt,
size_t sz,
void *ud,
orgparse_state *state)
{
orgparse_state_init(state, op, txt, sz, ud);
orgparse_state_run(state);
}
6.2. Orgparse State
To make orgparse re-entrant, all mutable data is stored in a state.
6.2.1. Orgparse Struct
All mutable orgparse data is contained inside of a struct
called orgparse_state
, which gets instantiated with every
call to orgparse_run
. This allows orgparse_run
to be
re-entrant, which is a needed requirement in order for
weewiki's scripting capabilities to really work.
6.2.1.1. Struct Declarations
typedef struct orgparse_state orgparse_state;
<<orgparse_flags_struct>>
struct orgparse_state {
orgparse *op;
const char *txt;
size_t sz;
void *ud;
size_t pos;
const char *blk;
size_t len;
<<state_flags_in_struct>>
};
6.2.1.2. Flags Struct
The flags struct is a component of the orgparse_state
.
These flags are used to determine position the parser
state machine.
typedef struct orgparse_state_flags orgparse_state_flags;
6.2.1.2.1. The Flags
The mode
flag is used to determine the large scale parsing
mode, and is used to do general org-mode parsing or aux
block parsing.
The txtmode
flag is boolean value set every time the
parser goes to parse a text block. It is used to help
indicate when a new paragraph block should begin.
The newline
flag is used to determine whether or not
a newline
character has been found. This is used to
add explicit breaks, as well as start new paragraph blocks.
struct orgparse_state_flags {
int mode;
int txtmode;
int newline;
};
6.2.1.2.2. Flags in orgparse state
The orgparse_state
struct has two orgparse_state_flags
variables: an internal flags value iflags
, and a pointer
value flags
. By default, flags
points to the internal
value, but this can be overridden to be an external value.
The reasoning for having this feature is to allow more
seamless org code parsing generation in the Janet scriping
engine via the org
command.
orgparse_state_flags *flags;
orgparse_state_flags iflags;
To override flags, use the orgparse_state_flags_set
function. To get the internal flags, use
orgparse_state_flags_get
.
orgparse_state_flags *orgparse_state_flags_get(orgparse_state *s);
void orgparse_state_flags_set(orgparse_state *s,
orgparse_state_flags *f);
orgparse_state_flags *orgparse_state_flags_get(orgparse_state *s)
{
return s->flags;
}
void orgparse_state_flags_set(orgparse_state *s,
orgparse_state_flags *f)
{
s->flags = f;
}
6.2.1.3. Struct Init
void orgparse_state_init(orgparse_state *state,
orgparse *op,
const char *txt,
size_t sz,
void *ud);
void orgparse_state_init(orgparse_state *state,
orgparse *op,
const char *txt,
size_t sz,
void *ud)
{
state->op = op;
state->txt = txt;
state->sz = sz;
state->pos = 0;
state->flags = &state->iflags;
state->flags->mode = 0;
state->flags->txtmode = 0;
state->len = 0;
state->ud = ud;
state->blk = NULL;
state->flags->newline = 0;
}
6.2.1.4. Size
size_t orgparse_state_size(void);
size_t orgparse_state_size(void)
{
return sizeof(orgparse_state);
}
6.2.2. State Run
void orgparse_state_run(orgparse_state *state);
void orgparse_state_run(orgparse_state *state)
{
int txtmode;
size_t sz;
txtmode = 0;
sz = state->sz;
if (state->txt[sz - 1] == '\0') {
sz--;
}
for (state->pos = 0; state->pos < sz; state->pos++) {
txtmode = 0;
if (state->flags->mode == 0) {
if (parse_comment(state)) {
continue;
}
if (parse_codeblock(state)) {
continue;
} else if (parse_header(state)) {
continue;
} else if (is_cmd(state)) {
if (parse_name(state)) {
continue;
} else if (parse_title(state)) {
continue;
} else {
parse_ucmd(state);
continue;
}
} else {
if (!state->flags->txtmode) {
if (state->pos < (state->sz - 1)) {
orgparse_pgrph_begin(state->op,
state->ud);
state->flags->newline = 0;
}
}
txtmode = 1;
parse_text(state);
}
} else if (state->flags->mode == 1) {
parse_codeblock(state);
} else if (state->flags->mode == 2) {
if (state->flags->txtmode) txtmode = 1;
if (parse_aux_doiend(state)) {
<<end_the_aux_block>>
} else {
<<increment_aux_line>>
}
}
state->flags->txtmode = txtmode;
}
}
6.3. Parse Header
static int parse_header(orgparse_state *state);
static int parse_header(orgparse_state *state)
{
const char *header;
size_t n;
size_t start;
int mode;
int lvl;
size_t hsz;
int rc;
orgparse *op;
const char *str;
size_t sz;
void *ud;
size_t *pos;
op = state->op;
str = state->txt;
sz = state->sz;
ud = state->ud;
pos = &state->pos;
if(sz - *pos < 2) return 0;
if (str[*pos] != '*') return 0;
start = *pos;
mode = 0;
lvl = 0;
rc = 0;
for (n = start; n < sz; n++) {
if (mode == 2) break;
switch (mode) {
case 0:
if (str[n] == '*') {
lvl++;
continue;
} else if (str[n] == ' ') {
n++;
header = &str[n];
hsz = 1;
mode = 1;
rc = 1;
if (n == (sz - 1)) {
*pos += n - start;
if (op->header != NULL) {
if (state->flags->txtmode) {
orgparse_pgrph_end(op, ud);
state->flags->txtmode = 0;
}
op->header(ud, header, hsz, lvl);
}
}
} else {
mode = 2;
rc = 0;
}
break;
case 1:
if (str[n] == '\n') {
mode = 2;
*pos += n - start;
if (op->header != NULL) {
if (state->flags->txtmode) {
orgparse_pgrph_end(op, ud);
state->flags->txtmode = 0;
}
op->header(ud, header, hsz, lvl);
}
rc = 1;
break;
}
hsz++;
break;
}
}
return rc;
}
6.4. Parse Text
6.4.1. Parse Text Callback
Text is defined by what it isn't. If it isn't a header or code block, it is probably text. For this reason, the text block is a "last resort".
Anything that is not a header or code block is generally considered to be a text block. Text can span multiple lines, and can contain special formatting tags, for things like bold text, italic text, and text as well. In addition, text can also contain links.
When a line is determined to not be anything else (header, codeblock, aux block, etc), it is sent in to be parsed as text.
The text block parser will read lines until it hits stuff that is non-text. Along the way, the text will check for formatting tags and links on a line by line basis.
6.4.1.1. Main Callback
static int parse_text(orgparse_state *state);
Some clarification details on what mark
variable does.
mark
is used to save the beginning of a chunk
of unformatted text. It is called "mark" because it
is marking a location. When formatted text is found, it
will use this marked position to handle the text preceding
the formatted text, before handling the formatted text. The
marked position can then be updated to point to text after
the formatted text.
The mark
variable is also used in line break logic. When
a line break is found and the mark and text positions match,
which means a line break has been found at the start of a
line. For some reason, I decided to use mark
instead of
start
. I think this has to do with the logic found in
check_and_parse
, but I'm unsure.
static int parse_text(orgparse_state *state)
{
size_t n;
size_t start;
size_t mark;
int rc;
orgparse *op;
const char *str;
size_t sz;
void *ud;
size_t *pos;
int *mode;
const char **buf;
size_t *len;
op = state->op;
str = state->txt;
sz = state->sz;
ud = state->ud;
pos = &state->pos;
mode = &state->flags->mode;
buf = &state->blk;
len = &state->len;
rc = 0;
start = *pos;
mark = start;
/* check for end of file */
if (start == sz) return 0;
for (n = start; n < sz; n++) {
if (str[n] == '\n' || n == (sz - 1)) {
rc = 1;
if (n == mark) {
<<handle_newline_logic>>
} else {
if (op->text != NULL) {
/* +1 includes line break*/
op->text(ud,
&str[mark],
(n - mark) + 1);
}
}
break;
} else if (parse_aux_check(str, sz, &n)) {
<<break_and_begin_aux_block>>
} else if (state->flags->newline) {
<<newline_begin_paragraph>>
} else if (parse_bold(op, str, ud, sz, &n, &mark)) {
continue;
} else if (parse_code(op, str, ud, sz, &n, &mark)) {
continue;
} else if (parse_ul(op, str, ud, sz, &n, &mark)) {
continue;
} else if (parse_link(op, str, ud, sz, &n, &mark)) {
continue;
}
}
*pos += (n - start);
return rc;
}
6.4.1.2. Handling Newline Logic
In org mode, line break characters "\n" can trigger
different behaviors. This is managed via the newline
flag.
The newline at the end of some text is used to indicate the end of a line. The text parser reads things a line at a time, so at that point it is an indicator to break out of text parser routine.
Newline behavior changes when the character is found without any other characters. The first newline is considered to mark the end of a paragraph block. Subsequent newlines are registered as line breaks.
if (!state->flags->newline) {
orgparse_pgrph_end(op, ud);
state->flags->newline = 1;
} else {
if (op->newline != NULL) {
op->newline(ud, NULL, 0);
}
}
Text that begins after a single empty line break is considered to be a new paragraph. This is determined by checking to see if the newline flag has been set.
A bug was discovered where the parser skips the first character of the paragraph. This bug becomes noticeable when any kind of formatting is used at the beginning of a paragraph (such as bold text). To mitigate this, the character pointer (n) moves back one character. While this may introduce other bugs later (not sure), it does not seem to break any tests in the current test suite.
state->flags->newline = 0;
orgparse_pgrph_begin(op, ud);
n--; /* go backwards 1 */
6.4.2. Tag Check
This functionality will generically check a line for matching tags. If a tag is found, the end position is returned.
static int tag_check(const char *txt,
size_t sz,
char tag,
size_t *len);
A small but important detail: tags in org mode work on a single line. If a line break is found before a matching tag, it breaks out.
Another detail: tags can't have spaces immediately after or before the tags. * this* is not a tag. *this * is also not a tag.
static int tag_check(const char *txt,
size_t sz,
char tag,
size_t *len)
{
size_t n;
int rc;
if (sz <= 2) return 0;
if (txt[0] != tag) return 0;
if (txt[1] == ' ') return 0;
rc = 0;
for (n = 1; n < sz; n++) {
if (txt[n] == '\n') {
rc = 0;
break;
}
if (txt[n] == tag) {
if (txt[n - 1] == ' ') rc = 0;
else rc = 1;
*len = n - 1;
break;
}
}
return rc;
}
6.4.3. Check and Parse
Checks AND parses particular tag.
static int check_and_parse(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark,
char tag,
orgparse_text f);
Setting the mark
and pos
variables for mark and position
took some trial and error to get right. At some point,
I'll need to put in the time and figure this out better.
The gist of it as I understand it now: mark
is
used to handle text preceding formatted text found
with check_and_parse
, and pos
is pointer to
the current character in the parser function
parse_text
.
When a line break is found, the marker is shifted forward by
one. In the next loop iteration in parse_text
, the text
position will also shift forward to match the marker, which
is important, as this equality is needed to trigger a
paragraph end if a consecutive line break is found.
static int check_and_parse(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark,
char tag,
orgparse_text f)
{
size_t n, m;
int rc;
size_t len;
n = *pos;
m = *mark;
rc = 0;
len = 0;
if (tag_check(&str[n],
sz - m,
tag,
&len)) {
if (op->text != NULL && n != m) {
op->text(ud, &str[m], n - m);
}
if (f != NULL) {
f(ud, &str[n + 1], len);
}
n += len + 2;
m = n;
/* move the marker up if current character is line break */
if (n < sz && str[n] == '\n') m++;
*pos = n;
*mark = m;
rc = 1;
}
return rc;
}
6.4.4. Parse Bold
This will check and parse bold text.
static int parse_bold(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark);
static int parse_bold(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark)
{
return check_and_parse(op,
str,
ud,
sz,
pos,
mark,
'*',
op->bold);
}
6.4.5. Parse Code.
This will check and parse code
text.
static int parse_code(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark);
static int parse_code(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark)
{
return check_and_parse(op,
str,
ud,
sz,
pos,
mark,
'=',
op->code);
}
6.4.6. Parse Underline.
This will check and parse underline
text.
static int parse_ul(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark);
static int parse_ul(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark)
{
return check_and_parse(op,
str,
ud,
sz,
pos,
mark,
'_',
op->underline);
}
6.4.7. Parse Link
6.4.7.1. Top Level Function
static int parse_link(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark);
static int parse_link(orgparse *op,
const char *str,
void *ud,
size_t sz,
size_t *pos,
size_t *mark)
{
int rc;
const char *link;
size_t link_sz;
const char *name;
size_t name_sz;
size_t len;
size_t n, m;
rc = check_link(str, *pos, sz);
if (!rc) return 0;
link_sz = 0;
name_sz = 0;
len = 0;
n = *pos;
m = *mark;
extract_link(str, n, sz,
&link, &link_sz,
&name, &name_sz,
&len);
if (op->text != NULL && n != m) {
op->text(ud, &str[m], n - m);
}
if (op->link != NULL) {
op->link(ud, link, link_sz, name, name_sz);
}
n += len + 2;
m = n;
/* move the marker up if current character is line break */
if (n < sz && str[n] == '\n') m++;
*pos = n;
*mark = m;
return 1;
}
6.4.7.2. Check For Link
static int check_link(const char *str,
size_t pos,
size_t sz);
static int check_link(const char *str,
size_t pos,
size_t sz)
{
size_t n;
if ((pos - sz) < 5) return 0;
if (str[pos] != '[' || str[pos+1] != '[') return 0;
sz -= 1; /* for lookahead */
pos += 2;
for (n = pos; n < sz; n++) {
if (str[n] == '\n') return 0;
if (str[n] == ']' && str[n + 1] == ']') return 1;
}
return 0;
}
6.4.7.3. Extract Link
static void extract_link(const char *str,
size_t pos,
size_t sz,
const char **plink,
size_t *link_sz,
const char **pname,
size_t *name_sz,
size_t *len);
static void extract_link(const char *str,
size_t pos,
size_t sz,
const char **plink,
size_t *link_sz,
const char **pname,
size_t *name_sz,
size_t *len)
{
size_t off;
size_t n;
const char *link;
size_t link_size;
const char *name;
size_t name_size;
size_t tmp;
size_t start;
sz -= 1; /* for lookahead */
start = pos;
pos += 2;
off = 2;
name_size = 0;
link_size = 0;
tmp = 0;
link = NULL;
name = NULL;
link = &str[pos];
for(n = pos; n < sz; n++) {
off++;
tmp++;
if(str[n] == ']' && str[n + 1] == '[') {
link_size = tmp - 1;
tmp = 0;
name = &str[n + 2];
n++;
off++;
} else if(str[n] == ']' && str[n + 1] == ']') {
name_size = tmp - 1;
if(name == NULL) { /* name not set, assume type 2 link */
link_size = name_size;
name = link;
}
off++;
break;
}
}
*name_sz = name_size;
*pname = name;
*link_sz = link_size;
*plink = link;
*len = n - start;
}
6.5. Parse Comment
Any thing that starts with '# ' (hash + space) is considered to be a comment, and the line will be ignored.
static int parse_comment(orgparse_state *state);
static int parse_comment(orgparse_state *state)
{
size_t start;
size_t n;
const char *txt;
size_t sz;
size_t *pos;
txt = state->txt;
sz = state->sz;
pos = &state->pos;
if ((sz - *pos) < 2) return 0;
if (txt[*pos] != '#') return 0;
if (txt[*pos + 1] != ' ') return 0;
start = *pos;
for (n = start; n < sz; n++) {
if (txt[n] == '\n') break;
}
/* TODO: this might need to be (n - start) + 1
* I'm pretty sure this offset needs to include
* the line break. Otherwise, it will print weird
* characters sometimes in ww-server.
* AKA: the "extra period" bug
*/
*pos += (n - start);
return 1;
}
6.6. Parse Command
A 'command' in org mode refers to any line that starts with '#+'. After this, a string of alphanumeric non-space characters creates the command name. Following this is an arbitray number of spaces, followed by the command string.
The parse_cmd
function will parse and extract the
command, and command string (assuming it is indeed
a command).
static int parse_cmd(const char *str,
size_t sz,
const char **cmd,
size_t *cmd_len,
const char **cmdstr,
size_t *cmdstr_len,
size_t *total_len);
static int parse_cmd(const char *str,
size_t sz,
const char **cmd,
size_t *cmd_len,
const char **cmdstr,
size_t *cmdstr_len,
size_t *total_len)
{
size_t n;
const char *pcmdstr;
const char *pcmd;
size_t len;
int mode;
int rc;
if (sz < 3) return 0;
if (str[0] != '#') return 0;
if (str[1] != '+') return 0;
if (str[2] == ' ') return 0;
rc = 0;
mode = 0;
pcmd = &str[2];
pcmdstr = NULL;
len = 0;
/* zero out lengths */
*total_len = 0;
*cmdstr_len = 0;
*cmd_len = 0;
for (n = 2; n < sz; n++) {
if (str[n] == '\n' || (n == (sz - 1))) {
*cmdstr_len = len;
if (mode == 0) {
*cmd_len = len;
*cmdstr_len = 0;
rc = 1;
}
break;
}
switch (mode) {
case 0: /* command string */
if (str[n] == ' ') {
mode = 1;
*cmd_len = len;
len = 0;
rc = 1;
break;
}
len++;
break;
case 1:
if (str[n] != ' ') {
mode = 2;
len = 1;
pcmdstr = &str[n];
}
break;
case 2:
len++;
break;
}
}
*cmdstr = pcmdstr;
*cmd = pcmd;
*total_len = n * rc;
return rc;
}
is_cmd
is used to check if the line is a command to begin
with. This is used in the parser so that general command
statements are ignored.
static int is_cmd(orgparse_state *state);
static int is_cmd(orgparse_state *state)
{
const char *str;
size_t sz;
sz = state->sz;
str = &state->txt[state->pos];
if (sz < 3) return 0;
if (str[0] != '#') return 0;
if (str[1] != '+') return 0;
if (str[2] == ' ') return 0;
return 1;
}
6.7. Parse User Command
Any command that isn't handled by Orgparse will get sent here. Eventually, a user-defined callback will optionally be able to parse this.
static int parse_ucmd(orgparse_state *state);
static int parse_ucmd(orgparse_state *state)
{
int rc;
size_t start;
size_t cmdlen;
size_t cmdstrlen;
const char *cmd;
const char *cmdstr;
size_t totallen;
const char *txt;
size_t sz;
size_t *pos;
txt = state->txt;
sz = state->sz;
pos = &state->pos;
start = *pos;
cmdlen = 0;
cmdstrlen = 0;
totallen = 0;
rc = parse_cmd(&txt[start],
sz - start,
&cmd,
&cmdlen,
&cmdstr,
&cmdstrlen,
&totallen);
if (rc) {
/* TODO: add user-defined callback */
(*pos) += totallen;
}
return rc;
}
6.8. Parse Codeblock
A codeblock is a set of lines smooshed between '#+BEGINSRC' tags.
static int parse_codeblock(orgparse_state *state);
static int parse_codeblock(orgparse_state *state)
{
int rc;
size_t start;
size_t cmdlen;
size_t cmdstrlen;
const char *cmd;
const char *cmdstr;
size_t totallen;
size_t n;
int new_block;
orgparse *op;
const char *txt;
size_t sz;
void *ud;
size_t *pos;
int *mode;
const char **blk;
size_t *blklen;
op = state->op;
txt = state->txt;
sz = state->sz;
ud = state->ud;
pos = &state->pos;
mode = &state->flags->mode;
blk = &state->blk;
blklen = &state->len;
start = *pos;
cmdlen = 0;
cmdstrlen = 0;
totallen = 0;
rc = parse_cmd(&txt[start],
sz - start,
&cmd,
&cmdlen,
&cmdstr,
&cmdstrlen,
&totallen);
new_block = 0;
if (rc) {
if (*mode == 0) {
if (!strncmp(cmd, "BEGIN_SRC", cmdlen)) {
*pos += totallen;
*blk = &txt[*pos + 1];
*blklen = 0;
*mode = 1;
new_block = 1;
} else {
/* another command, not a codeblock */
rc = 0;
}
} else if (*mode == 1) {
if (!strncmp(cmd, "END_SRC", cmdlen)) {
*pos += totallen;
*mode = 0;
if (op->codeblock != NULL) {
op->codeblock(ud, *blk, *blklen - 1);
}
*blk = NULL;
*blklen = 0;
}
}
}
/* Read a line */
if (*mode == 1 && !new_block) {
rc = 1;
for (n = start; n < sz; n++) {
if (txt[n] == '\n') break;
}
*pos += (n - start);
*blklen += (n - start) + 1;
}
return rc;
}
6.9. Parse Name
static int parse_name(orgparse_state *state);
static int parse_name(orgparse_state *state)
{
int rc;
size_t start;
size_t cmdlen;
size_t cmdstrlen;
const char *cmd;
const char *cmdstr;
size_t totallen;
orgparse *op;
const char *txt;
size_t sz;
void *ud;
size_t *pos;
op = state->op;
txt = state->txt;
sz = state->sz;
ud = state->ud;
pos = &state->pos;
start = *pos;
cmdlen = 0;
cmdstrlen = 0;
totallen = 0;
rc = parse_cmd(&txt[start],
sz - start,
&cmd,
&cmdlen,
&cmdstr,
&cmdstrlen,
&totallen);
if (rc) {
if (!strncmp(cmd, "NAME:", cmdlen)) {
if (op->name != NULL) {
op->name(ud, cmdstr, cmdstrlen);
}
*pos += totallen;
} else rc = 0;
}
return rc;
}
6.10. Parsing an Aux Block
An aux block is anything inside of a @!
and !@
.
For WeeWiki, the idea here is to execute janet code for
dynamic page content. Aux blocks can be both in-line and
multiline. They start out inside of a text block, but have
their own mode for multi-line processing (similar to code
blocks).
6.10.1. Beginning a Block
6.10.1.1. Checking for an Aux Block
An aux block can be started at anytime inside of a text
block. The parsing process here must check for any @!
roaming around. This is wrapped inside of a function called
parse_aux_check
. It is called inside of parse_text
.
static int parse_aux_check(const char *str,
size_t sz,
size_t *pos);
At one point, this once also processed the aux block code, but the function ended up taking too many arguments! Now it only checks.
static int parse_aux_check(const char *str,
size_t sz,
size_t *pos)
{
size_t n;
n = *pos;
if ((sz - n) < 4) return 0;
if (str[n] != '@') return 0;
if (str[n + 1] != '!') return 0;
return 1;
}
6.10.1.2. Breaking Out of The Loop
If indeed an aux block has been found, the parsing routine will immediately break out of the loop and set itself up to be in aux block mode (mode 2). Any text up to this point is processed as well.
Return values aren't really being used right now, but the return value is being set to be 0 (false) to indicate that the the text has been short-circuited by an aux block.
The code below is done inside of the parse_text
function.
if (op->text != NULL && n != mark) {
op->text(ud, &str[mark], n - mark);
}
if (state->flags->newline) {
state->flags->newline = 0;
orgparse_pgrph_begin(op, ud);
}
*mode = 2;
*len = 0;
*buf = &str[n + 2];
rc = 0;
break;
6.10.2. Ending it
Once the parse state machine is set to be in aux block mode, it can only be ended by finding a matching '!@' tag.
6.10.2.1. Do I end?
The only way a aux block ends is with the magic tag '!@'.
This is checked with the function parse_aux_doiend
.
static int parse_aux_doiend(orgparse_state *state);
static int parse_aux_doiend(orgparse_state *state)
{
if ((state->sz - state->pos) < 2) return 0;
return (state->txt[state->pos] == '!' &&
state->txt[state->pos + 1] == '@');
}
6.10.2.2. Ending The Aux Block
state->flags->mode = 0;
state->pos++;
if (state->op->aux != NULL) {
state->op->aux(state->ud, state->blk, state->len - 1);
}
<<handle_auxblock_endofline>>
An edge case to handle: what happens when an aux block is at the end of a line? When this happens, the parser absorbs the newline character. If this doesn't happen, the text parser adds an extra newline.
if ((state->pos + 1) < state->sz &&
state->txt[state->pos + 1] == '\n') {
state->pos++;
}
6.10.2.3. Adding to aux line
While in aux block mode, the character block length is increased one character at a time.
state->len++;
6.11. Parse Title
static int parse_title(orgparse_state *state);
static int parse_title(orgparse_state *state)
{
int rc;
size_t start;
size_t cmdlen;
size_t cmdstrlen;
const char *cmd;
const char *cmdstr;
size_t totallen;
orgparse *op;
const char *txt;
size_t sz;
void *ud;
size_t *pos;
op = state->op;
txt = state->txt;
sz = state->sz;
ud = state->ud;
pos = &state->pos;
start = *pos;
cmdlen = 0;
cmdstrlen = 0;
totallen = 0;
rc = parse_cmd(&txt[start],
sz - start,
&cmd,
&cmdlen,
&cmdstr,
&cmdstrlen,
&totallen);
if (rc) {
if (!strncmp(cmd, "TITLE:", cmdlen)) {
if (op->title != NULL) {
op->title(ud, cmdstr, cmdstrlen);
}
*pos += totallen;
if ((*pos + 1) < sz && txt[*pos + 1] == '\n') {
(*pos) = (*pos) + 1;
}
return 1;
}
}
return 0;
}
prev | home | next