diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-05-06 17:46:49 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-05-06 17:46:49 +0800 |
| commit | 8f0c3d4697742fb64cb1af8ba28fa2bb6f99de5a (patch) | |
| tree | 1822d135ec879620361e1d80cb54a63d2d8d3602 | |
| parent | fd2d93f4a97ab5a3bc18764c353b971b4035ac6a (diff) | |
| download | web-view-8f0c3d4697742fb64cb1af8ba28fa2bb6f99de5a.tar.gz | |
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | wv_dom.c | 7 | ||||
| -rw-r--r-- | wv_dom.h | 12 | ||||
| -rw-r--r-- | wv_mem.c | 12 | ||||
| -rw-r--r-- | wv_mem.h | 11 | ||||
| -rw-r--r-- | wv_parse.c | 207 | ||||
| -rw-r--r-- | wv_parse.h | 54 | ||||
| -rw-r--r-- | wv_vec.c | 46 | ||||
| -rw-r--r-- | wv_vec.h | 21 |
9 files changed, 360 insertions, 14 deletions
@@ -2,8 +2,8 @@ CC = cc CFLAGS = -std=c11 -Wall -Wextra -Wpedantic -g -O0 LDFLAGS = -HDRS = wv_mem.h wv_err.h wv_dom.h -SRCS = wv_mem.c wv_dom.c wv_main.c +HDRS = wv_mem.h wv_err.h wv_vec.h wv_dom.h wv_parse.h +SRCS = wv_mem.c wv_vec.c wv_dom.c wv_parse.c wv_main.c OBJS = $(SRCS:.c=.o) TARGET = webview @@ -8,7 +8,7 @@ wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type) wv_ref ref; struct wv_node *node; - ref = wv_alloc(arena, sizeof(struct wv_node)); + ref = wv_arena_alloc(arena, sizeof(struct wv_node)); node = (struct wv_node *)WV_ADDR(arena, ref); memset(node, 0, sizeof(struct wv_node)); node->type = type; @@ -50,9 +50,10 @@ void wv_attr_set(struct wv_arena *arena, wv_ref node_ref, struct wv_attr *a; n = (struct wv_node *)WV_ADDR(arena, node_ref); - if (n->type != WV_NODE_ELEMENT) return; + if (n->type != WV_NODE_ELEMENT) + return; - attr_ref = wv_alloc(arena, sizeof(struct wv_attr)); + attr_ref = wv_arena_alloc(arena, sizeof(struct wv_attr)); a = (struct wv_attr *)WV_ADDR(arena, attr_ref); a->key = key_str; @@ -13,6 +13,7 @@ typedef enum { WV_TAG_LINK, WV_TAG_BODY, WV_TAG_HEADER, + WV_TAG_DIV, WV_TAG_H1, WV_TAG_A, WV_TAG_ARTICLE, @@ -58,12 +59,15 @@ struct wv_node { }; /* DOM operations */ -wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type); -void wv_node_append(struct wv_arena *arena, wv_ref parent, wv_ref child); +wv_ref wv_node_new(struct wv_arena *arena, wv_node_type type); +void wv_node_append(struct wv_arena *arena, wv_ref parent, + wv_ref child); /* Attribute operations */ -wv_ref wv_attr_get(struct wv_arena *arena, wv_ref node_ref, const char *key_name); -void wv_attr_set(struct wv_arena *arena, wv_ref node_ref, wv_ref key_str, wv_ref val_str); +wv_ref wv_attr_get(struct wv_arena *arena, wv_ref node_ref, + const char *key_name); +void wv_attr_set(struct wv_arena *arena, wv_ref node_ref, + wv_ref key_str, wv_ref val_str); #endif /* WV_DOM_H */ @@ -33,7 +33,7 @@ struct wv_arena *wv_arena_create(size_t n) return arena; } -wv_ref wv_alloc(struct wv_arena *arena, size_t n) +wv_ref wv_arena_alloc(struct wv_arena *arena, size_t n) { wv_ref ref; unsigned char *new_buf; @@ -89,3 +89,13 @@ void wv_arena_destroy(struct wv_arena *arena) free(arena); } +wv_ref wv_arena_push_string(struct wv_arena *arena, const char *src, + size_t len) +{ + wv_ref ref = wv_arena_alloc(arena, len + 1); + char *dst = (char *)WV_ADDR(arena, ref); + memcpy(dst, src, len); + dst[len] = '\0'; + return ref; +} + @@ -16,9 +16,12 @@ struct wv_arena { unsigned char *buf; }; -struct wv_arena* wv_arena_create(size_t n); -wv_ref wv_alloc(struct wv_arena *arena, size_t n); -void wv_arena_reset(struct wv_arena *arena); -void wv_arena_destroy(struct wv_arena *arena); +struct wv_arena *wv_arena_create(size_t n); +wv_ref wv_arena_alloc(struct wv_arena *arena, size_t n); +void wv_arena_reset(struct wv_arena *arena); +void wv_arena_destroy(struct wv_arena *arena); + +wv_ref wv_arena_push_string(struct wv_arena *arena, const char *src, + size_t len); #endif /* WV_MEM_H */ diff --git a/wv_parse.c b/wv_parse.c new file mode 100644 index 0000000..a94131e --- /dev/null +++ b/wv_parse.c @@ -0,0 +1,207 @@ +#include <ctype.h> +#include <string.h> + +#include "wv_parse.h" +#include "wv_vec.h" + +static void skip_whitespace(struct wv_tokenizer *t) +{ + while (t->pos < t->len && isspace(t->src[t->pos])) + t->pos++; +} + +static int is_delim(char c) +{ + return isspace(c) || c == '=' || c == '>' || c == '/'; +} + +static wv_tag_id map_tag_name(const char *name, size_t len) +{ + if (len == 3 && strncmp(name, "div", 3) == 0) + return WV_TAG_DIV; + if (len == 1 && strncmp(name, "a", 1) == 0) + return WV_TAG_A; + if (len == 2 && strncmp(name, "li", 2) == 0) + return WV_TAG_LI; + if (len == 2 && strncmp(name, "ul", 2) == 0) + return WV_TAG_UL; + if (len == 2 && strncmp(name, "h1", 2) == 0) + return WV_TAG_H1; + + return WV_TAG_UNKNOWN; +} + +void wv_tokenizer_init(struct wv_tokenizer *t, const char *src, + size_t len) +{ + t->src = src; + t->len = len; + t->pos = 0; + t->state = WV_STATE_DATA; + t->quote_char = '\0'; +} + +struct wv_token wv_tokenizer_next(struct wv_tokenizer *t) +{ + struct wv_token tok = {WV_TOK_EOF, NULL, 0}; + + if (t->pos >= t->len) + return tok; + + switch (t->state) { + case WV_STATE_DATA: + if (t->src[t->pos] == '<') { + t->pos++; + skip_whitespace(t); + + if (t->pos < t->len && t->src[t->pos] == '/') { + t->pos++; + tok.type = WV_TOK_TAG_CLOSE; + } else { + tok.type = WV_TOK_TAG_OPEN; + } + + tok.start = &t->src[t->pos]; + while (t->pos < t->len && !is_delim(t->src[t->pos])) + t->pos++; + tok.len = &t->src[t->pos] - tok.start; + + t->state = WV_STATE_TAG; + return tok; + } + + /* Extract text until the next tag starts */ + tok.type = WV_TOK_TEXT; + tok.start = &t->src[t->pos]; + while (t->pos < t->len && t->src[t->pos] != '<') + t->pos++; + tok.len = &t->src[t->pos] - tok.start; + return tok; + + case WV_STATE_TAG: + skip_whitespace(t); + + if (t->pos >= t->len) + return tok; + + /* End of tag or self-closing tag */ + if (t->src[t->pos] == '>' || (t->src[t->pos] == '/' && + t->pos + 1 < t->len && t->src[t->pos+1] == '>')) { + + if (t->src[t->pos] == '/') + t->pos++; + t->pos++; + t->state = WV_STATE_DATA; + return wv_tokenizer_next(t); + } + + /* Attribute Key */ + tok.type = WV_TOK_ATTR_KEY; + tok.start = &t->src[t->pos]; + while (t->pos < t->len && !is_delim(t->src[t->pos])) + t->pos++; + tok.len = &t->src[t->pos] - tok.start; + + skip_whitespace(t); + if (t->pos < t->len && t->src[t->pos] == '=') { + t->pos++; + t->state = WV_STATE_ATTR_VAL; + } + return tok; + + case WV_STATE_ATTR_VAL: + skip_whitespace(t); + + if (t->pos < t->len && (t->src[t->pos] == '"' || + t->src[t->pos] == '\'')) { + t->quote_char = t->src[t->pos++]; + tok.start = &t->src[t->pos]; + while (t->pos < t->len && t->src[t->pos] != t->quote_char) + t->pos++; + tok.len = &t->src[t->pos] - tok.start; + if (t->pos < t->len) t->pos++; + } else { + tok.start = &t->src[t->pos]; + while (t->pos < t->len && !isspace(t->src[t->pos]) && + t->src[t->pos] != '>') + t->pos++; + tok.len = &t->src[t->pos] - tok.start; + } + + tok.type = WV_TOK_ATTR_VAL; + t->state = WV_STATE_TAG; + return tok; + } + + return tok; +} + +wv_ref wv_parse_document(struct wv_arena *arena, const char *src) +{ + struct wv_parser p; + struct wv_token tok; + wv_ref root = 0; + wv_ref last_el = 0; + + wv_tokenizer_init(&p.tokenizer, src, strlen(src)); + p.arena = arena; + wv_vec_init(&p.stack, sizeof(wv_ref)); + + while ((tok = wv_tokenizer_next(&p.tokenizer)).type != WV_TOK_EOF) { + switch (tok.type) { + case WV_TOK_TAG_OPEN: { + wv_ref new_node = wv_node_new(p.arena, WV_NODE_ELEMENT); + struct wv_node *n = (struct wv_node *)WV_ADDR(p.arena, new_node); + n->u.element.tag_id = map_tag_name(tok.start, tok.len); + + /* Determine parent: either top of stack or this is the root */ + wv_ref *parent = (wv_ref *)wv_vec_last(&p.stack); + if (parent) + wv_node_append(p.arena, *parent, new_node); + else if (root == 0) + root = new_node; + + wv_vec_push(&p.stack, &new_node); + last_el = new_node; /* Store for potential attribute keys */ + break; + } + + case WV_TOK_TAG_CLOSE: + wv_vec_pop(&p.stack); + break; + + case WV_TOK_ATTR_KEY: { + /* Next token MUST be ATTR_VAL if state changed */ + struct wv_token val_tok = wv_tokenizer_next(&p.tokenizer); + if (val_tok.type == WV_TOK_ATTR_VAL && last_el != 0) { + wv_ref k = wv_arena_push_string(p.arena, tok.start, tok.len); + wv_ref v = wv_arena_push_string(p.arena, val_tok.start, val_tok.len); + wv_attr_set(p.arena, last_el, k, v); + } + break; + } + + case WV_TOK_TEXT: { + wv_ref *parent = (wv_ref *)wv_vec_last(&p.stack); + if (!parent) + break; + + wv_ref txt_node = wv_node_new(p.arena, WV_NODE_TEXT); + struct wv_node *n = (struct wv_node *)WV_ADDR(p.arena, txt_node); + + n->u.text.str = wv_arena_push_string(p.arena, tok.start, tok.len); + n->u.text.len = tok.len; + + wv_node_append(p.arena, *parent, txt_node); + break; + } + + default: break; + } + } + + wv_vec_free(&p.stack); + + return root; +} + diff --git a/wv_parse.h b/wv_parse.h new file mode 100644 index 0000000..ca3f8ac --- /dev/null +++ b/wv_parse.h @@ -0,0 +1,54 @@ +#ifndef WV_PARSE_H +#define WV_PARSE_H + +#include <stddef.h> + +#include "wv_mem.h" +#include "wv_dom.h" +#include "wv_vec.h" + +typedef enum { + WV_TOK_EOF = 0, + WV_TOK_TAG_OPEN, /* E.g., "div" from <div */ + WV_TOK_TAG_CLOSE, /* E.g., "div" from </div> or just > */ + WV_TOK_ATTR_KEY, + WV_TOK_ATTR_VAL, + WV_TOK_TEXT, + WV_TOK_ERROR +} wv_token_type; + +struct wv_token { + wv_token_type type; + const char *start; + size_t len; +}; + +typedef enum { + WV_STATE_DATA, /* Outside of any tags, looking for < */ + WV_STATE_TAG, /* In <...>, looking for tag name or attr keys */ + WV_STATE_ATTR_VAL /* Found a '=', looking for quoted value */ +} wv_state; + +struct wv_tokenizer { + const char *src; + size_t pos; + size_t len; + wv_state state; + char quote_char; /* Keeps track of " vs ' for current value */ +}; + +struct wv_parser { + struct wv_tokenizer tokenizer; + struct wv_arena *arena; + struct wv_vec stack; + wv_ref doc_ref; + wv_ref current_node; +}; + +void wv_tokenizer_init(struct wv_tokenizer *t, const char *src, + size_t len); +struct wv_token wv_tokenizer_next(struct wv_tokenizer *t); +wv_ref wv_parse_document(struct wv_arena *arena, const char *src); + +#endif /* WV_PARSE_H */ + diff --git a/wv_vec.c b/wv_vec.c new file mode 100644 index 0000000..1d1eaad --- /dev/null +++ b/wv_vec.c @@ -0,0 +1,46 @@ +#include <string.h> + +#include "wv_vec.h" + +void wv_vec_init(struct wv_vec *v, size_t unit_size) +{ + v->data = NULL; + v->len = 0; + v->cap = 0; + v->unit_size = unit_size; +} + +void wv_vec_push(struct wv_vec *v, const void *item) +{ + if (v->len >= v->cap) { + v->cap = (v->cap == 0) ? 8 : v->cap * 2; + v->data = realloc(v->data, v->cap * v->unit_size); + } + void *target = (char *)v->data + (v->len * v->unit_size); + memcpy(target, item, v->unit_size); + v->len++; +} + +void *wv_vec_pop(struct wv_vec *v) +{ + if (v->len == 0) + return NULL; + + v->len--; + return (char *)v->data + (v->len * v->unit_size); +} + +void *wv_vec_last(struct wv_vec *v) +{ + if (v->len == 0) + return NULL; + return (char *)v->data + ((v->len - 1) * v->unit_size); +} + +void wv_vec_free(struct wv_vec *v) +{ + free(v->data); + v->data = NULL; + v->len = v->cap = 0; +} + diff --git a/wv_vec.h b/wv_vec.h new file mode 100644 index 0000000..721856b --- /dev/null +++ b/wv_vec.h @@ -0,0 +1,21 @@ +#ifndef WV_VEC_H +#define WV_VEC_H + +#include <stddef.h> +#include <stdlib.h> + +struct wv_vec { + void *data; + size_t len; + size_t cap; + size_t unit_size; +}; + +void wv_vec_init(struct wv_vec *v, size_t unit_size); +void wv_vec_push(struct wv_vec *v, const void *item); +void *wv_vec_pop(struct wv_vec *v); +void *wv_vec_last(struct wv_vec *v); +void wv_vec_free(struct wv_vec *v); + +#endif /* WV_VEC_H */ + |
