summaryrefslogtreecommitdiffstats
path: root/wv_parse.h
diff options
context:
space:
mode:
Diffstat (limited to 'wv_parse.h')
-rw-r--r--wv_parse.h54
1 files changed, 54 insertions, 0 deletions
diff --git a/wv_parse.h b/wv_parse.h
new file mode 100644
index 0000000..ca3f8ac
--- /dev/null
+++ b/wv_parse.h
@@ -0,0 +1,54 @@
+#ifndef WV_PARSE_H
+#define WV_PARSE_H
+
+#include <stddef.h>
+
+#include "wv_mem.h"
+#include "wv_dom.h"
+#include "wv_vec.h"
+
+typedef enum {
+ WV_TOK_EOF = 0,
+ WV_TOK_TAG_OPEN, /* E.g., "div" from <div */
+ WV_TOK_TAG_CLOSE, /* E.g., "div" from </div> or just > */
+ WV_TOK_ATTR_KEY,
+ WV_TOK_ATTR_VAL,
+ WV_TOK_TEXT,
+ WV_TOK_ERROR
+} wv_token_type;
+
+struct wv_token {
+ wv_token_type type;
+ const char *start;
+ size_t len;
+};
+
+typedef enum {
+ WV_STATE_DATA, /* Outside of any tags, looking for < */
+ WV_STATE_TAG, /* In <...>, looking for tag name or attr keys */
+ WV_STATE_ATTR_VAL /* Found a '=', looking for quoted value */
+} wv_state;
+
+struct wv_tokenizer {
+ const char *src;
+ size_t pos;
+ size_t len;
+ wv_state state;
+ char quote_char; /* Keeps track of " vs ' for current value */
+};
+
+struct wv_parser {
+ struct wv_tokenizer tokenizer;
+ struct wv_arena *arena;
+ struct wv_vec stack;
+ wv_ref doc_ref;
+ wv_ref current_node;
+};
+
+void wv_tokenizer_init(struct wv_tokenizer *t, const char *src,
+ size_t len);
+struct wv_token wv_tokenizer_next(struct wv_tokenizer *t);
+wv_ref wv_parse_document(struct wv_arena *arena, const char *src);
+
+#endif /* WV_PARSE_H */
+