summaryrefslogtreecommitdiffstats
path: root/wv_parse.h
blob: ca3f8acc337fb5883bb6138318c8983237369fd7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#ifndef WV_PARSE_H
#define WV_PARSE_H

#include <stddef.h>

#include "wv_mem.h"
#include "wv_dom.h"
#include "wv_vec.h"

typedef enum {
	WV_TOK_EOF = 0,
	WV_TOK_TAG_OPEN,    /* E.g., "div" from <div */
	WV_TOK_TAG_CLOSE,   /* E.g., "div" from </div> or just > */
	WV_TOK_ATTR_KEY,
	WV_TOK_ATTR_VAL,
	WV_TOK_TEXT,
	WV_TOK_ERROR
} wv_token_type;

struct wv_token {
	wv_token_type type;
	const char *start;
	size_t len;
};

typedef enum {
	WV_STATE_DATA,      /* Outside of any tags, looking for < */
	WV_STATE_TAG,       /* In <...>, looking for tag name or attr keys */
	WV_STATE_ATTR_VAL   /* Found a '=', looking for quoted value */
} wv_state;

struct wv_tokenizer {
	const char *src;
	size_t pos;
	size_t len;
	wv_state state;
	char quote_char;    /* Keeps track of " vs ' for current value */
};

struct wv_parser {
	struct wv_tokenizer tokenizer;
	struct wv_arena *arena;
	struct wv_vec stack;
	wv_ref doc_ref;
	wv_ref current_node;
};

void wv_tokenizer_init(struct wv_tokenizer *t, const char *src, 
	size_t len);
struct wv_token wv_tokenizer_next(struct wv_tokenizer *t);
wv_ref wv_parse_document(struct wv_arena *arena, const char *src);

#endif /* WV_PARSE_H */