summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--wv_dom.c7
-rw-r--r--wv_dom.h12
-rw-r--r--wv_mem.c12
-rw-r--r--wv_mem.h11
-rw-r--r--wv_parse.c207
-rw-r--r--wv_parse.h54
-rw-r--r--wv_vec.c46
-rw-r--r--wv_vec.h21
9 files changed, 360 insertions, 14 deletions
diff --git a/Makefile b/Makefile
index f10ad4c..c396173 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@ CC = cc
CFLAGS = -std=c11 -Wall -Wextra -Wpedantic -g -O0
LDFLAGS =
-HDRS = wv_mem.h wv_err.h wv_dom.h
-SRCS = wv_mem.c wv_dom.c wv_main.c
+HDRS = wv_mem.h wv_err.h wv_vec.h wv_dom.h wv_parse.h
+SRCS = wv_mem.c wv_vec.c wv_dom.c wv_parse.c wv_main.c
OBJS = $(SRCS:.c=.o)
TARGET = webview
diff --git a/wv_dom.c b/wv_dom.c
index ee24e98..a17c1bc 100644
--- a/wv_dom.c
+++ b/wv_dom.c
@@ -8,7 +8,7 @@ wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type)
wv_ref ref;
struct wv_node *node;
- ref = wv_alloc(arena, sizeof(struct wv_node));
+ ref = wv_arena_alloc(arena, sizeof(struct wv_node));
node = (struct wv_node *)WV_ADDR(arena, ref);
memset(node, 0, sizeof(struct wv_node));
node->type = type;
@@ -50,9 +50,10 @@ void wv_attr_set(struct wv_arena *arena, wv_ref node_ref,
struct wv_attr *a;
n = (struct wv_node *)WV_ADDR(arena, node_ref);
- if (n->type != WV_NODE_ELEMENT) return;
+ if (n->type != WV_NODE_ELEMENT)
+ return;
- attr_ref = wv_alloc(arena, sizeof(struct wv_attr));
+ attr_ref = wv_arena_alloc(arena, sizeof(struct wv_attr));
a = (struct wv_attr *)WV_ADDR(arena, attr_ref);
a->key = key_str;
diff --git a/wv_dom.h b/wv_dom.h
index 9a2c281..b5bf576 100644
--- a/wv_dom.h
+++ b/wv_dom.h
@@ -13,6 +13,7 @@ typedef enum {
WV_TAG_LINK,
WV_TAG_BODY,
WV_TAG_HEADER,
+ WV_TAG_DIV,
WV_TAG_H1,
WV_TAG_A,
WV_TAG_ARTICLE,
@@ -58,12 +59,15 @@ struct wv_node {
};
/* DOM operations */
-wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type);
-void wv_node_append(struct wv_arena *arena, wv_ref parent, wv_ref child);
+wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type);
+void wv_node_append(struct wv_arena *arena, wv_ref parent,
+ wv_ref child);
/* Attribute operations */
-wv_ref wv_attr_get(struct wv_arena *arena, wv_ref node_ref, const char *key_name);
-void wv_attr_set(struct wv_arena *arena, wv_ref node_ref, wv_ref key_str, wv_ref val_str);
+wv_ref wv_attr_get(struct wv_arena *arena, wv_ref node_ref,
+ const char *key_name);
+void wv_attr_set(struct wv_arena *arena, wv_ref node_ref,
+ wv_ref key_str, wv_ref val_str);
#endif /* WV_DOM_H */
diff --git a/wv_mem.c b/wv_mem.c
index 7bf54d6..c1d810b 100644
--- a/wv_mem.c
+++ b/wv_mem.c
@@ -33,7 +33,7 @@ struct wv_arena *wv_arena_create(size_t n)
return arena;
}
-wv_ref wv_alloc(struct wv_arena *arena, size_t n)
+wv_ref wv_arena_alloc(struct wv_arena *arena, size_t n)
{
wv_ref ref;
unsigned char *new_buf;
@@ -89,3 +89,13 @@ void wv_arena_destroy(struct wv_arena *arena)
free(arena);
}
+wv_ref wv_arena_push_string(struct wv_arena *arena, const char *src,
+ size_t len)
+{
+ wv_ref ref = wv_arena_alloc(arena, len + 1);
+ char *dst = (char *)WV_ADDR(arena, ref);
+ memcpy(dst, src, len);
+ dst[len] = '\0';
+ return ref;
+}
+
diff --git a/wv_mem.h b/wv_mem.h
index dc95c7c..4b914ef 100644
--- a/wv_mem.h
+++ b/wv_mem.h
@@ -16,9 +16,12 @@ struct wv_arena {
unsigned char *buf;
};
-struct wv_arena* wv_arena_create(size_t n);
-wv_ref wv_alloc(struct wv_arena *arena, size_t n);
-void wv_arena_reset(struct wv_arena *arena);
-void wv_arena_destroy(struct wv_arena *arena);
+struct wv_arena *wv_arena_create(size_t n);
+wv_ref wv_arena_alloc(struct wv_arena *arena, size_t n);
+void wv_arena_reset(struct wv_arena *arena);
+void wv_arena_destroy(struct wv_arena *arena);
+
+wv_ref wv_arena_push_string(struct wv_arena *arena, const char *src,
+ size_t len);
#endif /* WV_MEM_H */
diff --git a/wv_parse.c b/wv_parse.c
new file mode 100644
index 0000000..a94131e
--- /dev/null
+++ b/wv_parse.c
@@ -0,0 +1,207 @@
+#include <ctype.h>
+#include <string.h>
+
+#include "wv_parse.h"
+#include "wv_vec.h"
+
+static void skip_whitespace(struct wv_tokenizer *t)
+{
+ while (t->pos < t->len && isspace(t->src[t->pos]))
+ t->pos++;
+}
+
+static int is_delim(char c)
+{
+ return isspace(c) || c == '=' || c == '>' || c == '/';
+}
+
+static wv_tag_id map_tag_name(const char *name, size_t len)
+{
+ if (len == 3 && strncmp(name, "div", 3) == 0)
+ return WV_TAG_DIV;
+ if (len == 1 && strncmp(name, "a", 1) == 0)
+ return WV_TAG_A;
+ if (len == 2 && strncmp(name, "li", 2) == 0)
+ return WV_TAG_LI;
+ if (len == 2 && strncmp(name, "ul", 2) == 0)
+ return WV_TAG_UL;
+ if (len == 2 && strncmp(name, "h1", 2) == 0)
+ return WV_TAG_H1;
+
+ return WV_TAG_UNKNOWN;
+}
+
+void wv_tokenizer_init(struct wv_tokenizer *t, const char *src,
+ size_t len)
+{
+ t->src = src;
+ t->len = len;
+ t->pos = 0;
+ t->state = WV_STATE_DATA;
+ t->quote_char = '\0';
+}
+
+struct wv_token wv_tokenizer_next(struct wv_tokenizer *t)
+{
+ struct wv_token tok = {WV_TOK_EOF, NULL, 0};
+
+ if (t->pos >= t->len)
+ return tok;
+
+ switch (t->state) {
+ case WV_STATE_DATA:
+ if (t->src[t->pos] == '<') {
+ t->pos++;
+ skip_whitespace(t);
+
+ if (t->pos < t->len && t->src[t->pos] == '/') {
+ t->pos++;
+ tok.type = WV_TOK_TAG_CLOSE;
+ } else {
+ tok.type = WV_TOK_TAG_OPEN;
+ }
+
+ tok.start = &t->src[t->pos];
+ while (t->pos < t->len && !is_delim(t->src[t->pos]))
+ t->pos++;
+ tok.len = &t->src[t->pos] - tok.start;
+
+ t->state = WV_STATE_TAG;
+ return tok;
+ }
+
+ /* Extract text until the next tag starts */
+ tok.type = WV_TOK_TEXT;
+ tok.start = &t->src[t->pos];
+ while (t->pos < t->len && t->src[t->pos] != '<')
+ t->pos++;
+ tok.len = &t->src[t->pos] - tok.start;
+ return tok;
+
+ case WV_STATE_TAG:
+ skip_whitespace(t);
+
+ if (t->pos >= t->len)
+ return tok;
+
+ /* End of tag or self-closing tag */
+ if (t->src[t->pos] == '>' || (t->src[t->pos] == '/' &&
+ t->pos + 1 < t->len && t->src[t->pos+1] == '>')) {
+
+ if (t->src[t->pos] == '/')
+ t->pos++;
+ t->pos++;
+ t->state = WV_STATE_DATA;
+ return wv_tokenizer_next(t);
+ }
+
+ /* Attribute Key */
+ tok.type = WV_TOK_ATTR_KEY;
+ tok.start = &t->src[t->pos];
+ while (t->pos < t->len && !is_delim(t->src[t->pos]))
+ t->pos++;
+ tok.len = &t->src[t->pos] - tok.start;
+
+ skip_whitespace(t);
+ if (t->pos < t->len && t->src[t->pos] == '=') {
+ t->pos++;
+ t->state = WV_STATE_ATTR_VAL;
+ }
+ return tok;
+
+ case WV_STATE_ATTR_VAL:
+ skip_whitespace(t);
+
+ if (t->pos < t->len && (t->src[t->pos] == '"' ||
+ t->src[t->pos] == '\'')) {
+ t->quote_char = t->src[t->pos++];
+ tok.start = &t->src[t->pos];
+ while (t->pos < t->len && t->src[t->pos] != t->quote_char)
+ t->pos++;
+ tok.len = &t->src[t->pos] - tok.start;
+ if (t->pos < t->len) t->pos++;
+ } else {
+ tok.start = &t->src[t->pos];
+ while (t->pos < t->len && !isspace(t->src[t->pos]) &&
+ t->src[t->pos] != '>')
+ t->pos++;
+ tok.len = &t->src[t->pos] - tok.start;
+ }
+
+ tok.type = WV_TOK_ATTR_VAL;
+ t->state = WV_STATE_TAG;
+ return tok;
+ }
+
+ return tok;
+}
+
+wv_ref wv_parse_document(struct wv_arena *arena, const char *src)
+{
+ struct wv_parser p;
+ struct wv_token tok;
+ wv_ref root = 0;
+ wv_ref last_el = 0;
+
+ wv_tokenizer_init(&p.tokenizer, src, strlen(src));
+ p.arena = arena;
+ wv_vec_init(&p.stack, sizeof(wv_ref));
+
+ while ((tok = wv_tokenizer_next(&p.tokenizer)).type != WV_TOK_EOF) {
+ switch (tok.type) {
+ case WV_TOK_TAG_OPEN: {
+ wv_ref new_node = wv_node_new(p.arena, WV_NODE_ELEMENT);
+ struct wv_node *n = (struct wv_node *)WV_ADDR(p.arena, new_node);
+ n->u.element.tag_id = map_tag_name(tok.start, tok.len);
+
+ /* Determine parent: either top of stack or this is the root */
+ wv_ref *parent = (wv_ref *)wv_vec_last(&p.stack);
+ if (parent)
+ wv_node_append(p.arena, *parent, new_node);
+ else if (root == 0)
+ root = new_node;
+
+ wv_vec_push(&p.stack, &new_node);
+ last_el = new_node; /* Store for potential attribute keys */
+ break;
+ }
+
+ case WV_TOK_TAG_CLOSE:
+ wv_vec_pop(&p.stack);
+ break;
+
+ case WV_TOK_ATTR_KEY: {
+ /* Next token MUST be ATTR_VAL if state changed */
+ struct wv_token val_tok = wv_tokenizer_next(&p.tokenizer);
+ if (val_tok.type == WV_TOK_ATTR_VAL && last_el != 0) {
+ wv_ref k = wv_arena_push_string(p.arena, tok.start, tok.len);
+ wv_ref v = wv_arena_push_string(p.arena, val_tok.start, val_tok.len);
+ wv_attr_set(p.arena, last_el, k, v);
+ }
+ break;
+ }
+
+ case WV_TOK_TEXT: {
+ wv_ref *parent = (wv_ref *)wv_vec_last(&p.stack);
+ if (!parent)
+ break;
+
+ wv_ref txt_node = wv_node_new(p.arena, WV_NODE_TEXT);
+ struct wv_node *n = (struct wv_node *)WV_ADDR(p.arena, txt_node);
+
+ n->u.text.str = wv_arena_push_string(p.arena, tok.start, tok.len);
+ n->u.text.len = tok.len;
+
+ wv_node_append(p.arena, *parent, txt_node);
+ break;
+ }
+
+ default: break;
+ }
+ }
+
+ wv_vec_free(&p.stack);
+
+ return root;
+}
+
diff --git a/wv_parse.h b/wv_parse.h
new file mode 100644
index 0000000..ca3f8ac
--- /dev/null
+++ b/wv_parse.h
@@ -0,0 +1,54 @@
+#ifndef WV_PARSE_H
+#define WV_PARSE_H
+
+#include <stddef.h>
+
+#include "wv_mem.h"
+#include "wv_dom.h"
+#include "wv_vec.h"
+
+typedef enum {
+ WV_TOK_EOF = 0,
+ WV_TOK_TAG_OPEN, /* E.g., "div" from <div */
+ WV_TOK_TAG_CLOSE, /* E.g., "div" from </div> or just > */
+ WV_TOK_ATTR_KEY,
+ WV_TOK_ATTR_VAL,
+ WV_TOK_TEXT,
+ WV_TOK_ERROR
+} wv_token_type;
+
+struct wv_token {
+ wv_token_type type;
+ const char *start;
+ size_t len;
+};
+
+typedef enum {
+ WV_STATE_DATA, /* Outside of any tags, looking for < */
+ WV_STATE_TAG, /* In <...>, looking for tag name or attr keys */
+ WV_STATE_ATTR_VAL /* Found a '=', looking for quoted value */
+} wv_state;
+
+struct wv_tokenizer {
+ const char *src;
+ size_t pos;
+ size_t len;
+ wv_state state;
+ char quote_char; /* Keeps track of " vs ' for current value */
+};
+
+struct wv_parser {
+ struct wv_tokenizer tokenizer;
+ struct wv_arena *arena;
+ struct wv_vec stack;
+ wv_ref doc_ref;
+ wv_ref current_node;
+};
+
+void wv_tokenizer_init(struct wv_tokenizer *t, const char *src,
+ size_t len);
+struct wv_token wv_tokenizer_next(struct wv_tokenizer *t);
+wv_ref wv_parse_document(struct wv_arena *arena, const char *src);
+
+#endif /* WV_PARSE_H */
+
diff --git a/wv_vec.c b/wv_vec.c
new file mode 100644
index 0000000..1d1eaad
--- /dev/null
+++ b/wv_vec.c
@@ -0,0 +1,46 @@
+#include <string.h>
+
+#include "wv_vec.h"
+
+void wv_vec_init(struct wv_vec *v, size_t unit_size)
+{
+ v->data = NULL;
+ v->len = 0;
+ v->cap = 0;
+ v->unit_size = unit_size;
+}
+
+void wv_vec_push(struct wv_vec *v, const void *item)
+{
+ if (v->len >= v->cap) {
+ v->cap = (v->cap == 0) ? 8 : v->cap * 2;
+ v->data = realloc(v->data, v->cap * v->unit_size);
+ }
+ void *target = (char *)v->data + (v->len * v->unit_size);
+ memcpy(target, item, v->unit_size);
+ v->len++;
+}
+
+void *wv_vec_pop(struct wv_vec *v)
+{
+ if (v->len == 0)
+ return NULL;
+
+ v->len--;
+ return (char *)v->data + (v->len * v->unit_size);
+}
+
+void *wv_vec_last(struct wv_vec *v)
+{
+ if (v->len == 0)
+ return NULL;
+ return (char *)v->data + ((v->len - 1) * v->unit_size);
+}
+
+void wv_vec_free(struct wv_vec *v)
+{
+ free(v->data);
+ v->data = NULL;
+ v->len = v->cap = 0;
+}
+
diff --git a/wv_vec.h b/wv_vec.h
new file mode 100644
index 0000000..721856b
--- /dev/null
+++ b/wv_vec.h
@@ -0,0 +1,21 @@
+#ifndef WV_VEC_H
+#define WV_VEC_H
+
+#include <stddef.h>
+#include <stdlib.h>
+
+struct wv_vec {
+ void *data;
+ size_t len;
+ size_t cap;
+ size_t unit_size;
+};
+
+void wv_vec_init(struct wv_vec *v, size_t unit_size);
+void wv_vec_push(struct wv_vec *v, const void *item);
+void *wv_vec_pop(struct wv_vec *v);
+void *wv_vec_last(struct wv_vec *v);
+void wv_vec_free(struct wv_vec *v);
+
+#endif /* WV_VEC_H */
+