4. Dictionary

The dictionary in textvm is used to store and retrieve functions that interact with the stack. The implementation of this is based off of a hashmap, but instead of using string values as keys, integer values (and therefore no hash values) will be used.

A function is wrapped inside what will be referred to as an entry.

<<typedefs>>=
typedef struct txtvm_dict_entry txtvm_dict_entry;
<<entry_struct>>=
struct txtvm_dict_entry {
<<entry>>
};

It is initialized with txtvm_dict_entry_init.

<<static_funcdefs>>=
static void entry_init(txtvm_dict_entry *e);
<<funcs>>=
static void entry_init(txtvm_dict_entry *e)
{
<<entry_init>>
}

Every entry will be identified with an integer value, which in this case will be a 16-bit value.

<<entry>>=
short id;

A negative value is considered to be an uninitialized id.

<<entry_init>>=
e->id = -1;

The main function callback for an entry takes in two arguments: a txtvm instance, and a void pointer for userdata.

<<entry>>=
int (*fun)(txtvm *, void *);

When NULL, the function is disabled.

<<entry_init>>=
e->fun = NULL;

User data can be optionally bound to the entry. A cleanup callback is also provided.

<<entry>>=
void *ud;
void (*clean)(void *);
<<entry_init>>=
e->clean = NULL;

Entries exist in a doubly linked list, so the usual prev/next pointers are included.

<<entry>>=
txtvm_dict_entry *prev;
txtvm_dict_entry *next;
<<entry_init>>=
e->prev = NULL;
e->next = NULL;

Entries are wrapped up inside of an entry list, which is a core component in the dictionary. It contains a head, tail, and size.

<<typedefs>>=
typedef struct txtvm_dict_entrylist txtvm_dict_entrylist;
<<entrylist_struct>>=
<<entry_struct>>
struct txtvm_dict_entrylist {
    txtvm_dict_entry *head;
    txtvm_dict_entry *tail;
    short size;
};
<<static_funcdefs>>=
static void entrylist_init(txtvm_dict_entrylist *el);
<<funcs>>=
static void entrylist_init(txtvm_dict_entrylist *el)
{
    el->head = NULL;
    el->tail = NULL;
    el->size = 0;
}

When an entry is appeneded to the doubly-linked list, things need to be considered in two directions. The next entry of the current tail is set to point to the new entry like a single linked list. In the other direction, the prev entry of the new entry is set to be the tail of the current entry.

After linking, the tail is then updated.

The edge case to handle is when the entry list is empty. In this case, the head and the tail will be set to be the current entry, and then things will carry on normally. This will set the initial head of the list to be circular, but this should be an okay side effect.

<<static_funcdefs>>=
static void entry_append(txtvm_dict_entrylist *el,
                         txtvm_dict_entry *e);
<<funcs>>=
static void entry_append(txtvm_dict_entrylist *el,
                         txtvm_dict_entry *e)
{
    if (el->head == NULL) {
        el->size = 0;
        el->head = e;
        el->tail = e;
    }

    el->tail->next = e;
    e->prev = el->tail;
    el->tail = e;
    el->size++;
}

The key advantage to a doubly-linked list is that items can be removed, or unlinked, from a list. To do this, only the entry in question needs to be present.

<<static_funcdefs>>=
static void entry_unlink(txtvm_dict_entry *ent);

The unlinking process is a matter of fusing the entries prev and next values together. prev->next needs to be updated to point to next. next->prev needs to be updated to point to prev. This operation should check for NULL values before attempting to read anything. After unlinking, the entry prev/next pointers will be cleared.

<<funcs>>=
static void entry_unlink(txtvm_dict_entry *ent)
{
    txtvm_dict_entry *prev, *next;

    prev = ent->prev;
    next = ent->next;

    if (prev != NULL) {
        prev->next = next;
    }

    if (next != NULL) {
        next->prev = prev;
    }

    ent->prev = NULL;
    ent->next = NULL;
}

A dictionary implementation is an array of entry lists, along with a word count. It is called txtvm_dict.

<<typedefs>>=
typedef struct txtvm_dict txtvm_dict;
<<structs>>=
<<entrylist_struct>>
#define TXTVM_DICT_SIZE 32
struct txtvm_dict {
    txtvm_dict_entrylist lists[TXTVM_DICT_SIZE];
    short nwords;
};
<<funcdefs>>=
void txtvm_dict_init(txtvm_dict *dict);
<<funcs>>=
void txtvm_dict_init(txtvm_dict *dict)
{
    int i;

    dict->nwords = 0;

    for (i = 0; i < TXTVM_DICT_SIZE; i++) {
        entrylist_init(&dict->lists[i]);
    }
}
<<funcdefs>>=
void txtvm_dict_free(txtvm_dict *dict);
<<funcs>>=
void txtvm_dict_free(txtvm_dict *dict)
{
    int i;

    for (i = 0; i < TXTVM_DICT_SIZE; i++) {
        entrylist_free(&dict->lists[i]);
    }
}
<<static_funcdefs>>=
static void entrylist_free(txtvm_dict_entrylist *el);
<<funcs>>=
static void entrylist_free(txtvm_dict_entrylist *el)
{
    int i;
    txtvm_dict_entry *ent, *next;

    ent = el->head;
    for (i = 0; i < el->size; i++) {
        next = ent->next;
        if (ent->clean != NULL) ent->clean(ent->ud);
        free(ent);
        ent = next;
    }
}

A new entry is appended to the dictionary by supplying it's ID. It will first attempt to find a previously allocated entry with that ID and re-initialize it. Short of that, a new entry is allocated, initialized, and appended to the dictionary. This entry is then returned, where it can be further populated with a function and optional user data.

<<funcdefs>>=
txtvm_dict_entry* txtvm_dict_new_entry(txtvm_dict *d, short id);
<<funcs>>=
txtvm_dict_entry* txtvm_dict_new_entry(txtvm_dict *d, short id)
{
    txtvm_dict_entrylist *l;

    l = &d->lists[id % TXTVM_DICT_SIZE];

    return entrylist_new(l, id);
}
<<static_funcdefs>>=
static txtvm_dict_entry* entrylist_new(txtvm_dict_entrylist *l, short id);
<<funcs>>=
static txtvm_dict_entry* entrylist_new(txtvm_dict_entrylist *l, short id)
{
    txtvm_dict_entry *e;
    e = NULL;
    e = malloc(sizeof(txtvm_dict_entry));

    entry_init(e);
    e->id = id;
    entry_append(l, e);
    return e;
}

Looking up an entry in a dictionary by id is a matter of first finding which list the entry would belong to (a modulo operation), then traversing through that list until an entry is found or it reaches the end. If no entry is found, an error code is returned. Otherwise, the entry is stored in the supplied pointer.

<<funcdefs>>=
int txtvm_dict_lookup(txtvm_dict *d,
                      short id,
                      txtvm_dict_entry **e);
<<funcs>>=
int txtvm_dict_lookup(txtvm_dict *d,
                      short id,
                      txtvm_dict_entry **e)
{
    int i;
    txtvm_dict_entrylist *l;
    txtvm_dict_entry *ent;

    l = &d->lists[id % TXTVM_DICT_SIZE];

    ent = l->head;
    for (i = 0; i < l->size; i++) {
        if (ent->id == id) {
            *e = ent;
            return TXTVM_OK;
        }
    }

    return TXTVM_NOTFOUND;
}
<<errorlist>>=
ERRORCODE(TXTVM_NOTFOUND, "entry not found.")

An entry is removed by looking up it's id in the dictionary and unlinking it. If no entry is found, an error code is returned.



prev | home | next