1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
#ifndef WV_PARSE_H
#define WV_PARSE_H
#include <stddef.h>
#include "wv_mem.h"
#include "wv_dom.h"
#include "wv_vec.h"
typedef enum {
WV_TOK_EOF = 0,
WV_TOK_TAG_OPEN, /* E.g., "div" from <div */
WV_TOK_TAG_CLOSE, /* E.g., "div" from </div> or just > */
WV_TOK_ATTR_KEY,
WV_TOK_ATTR_VAL,
WV_TOK_TEXT,
WV_TOK_ERROR
} wv_token_type;
struct wv_token {
wv_token_type type;
const char *start;
size_t len;
};
typedef enum {
WV_STATE_DATA, /* Outside of any tags, looking for < */
WV_STATE_TAG, /* In <...>, looking for tag name or attr keys */
WV_STATE_ATTR_VAL /* Found a '=', looking for quoted value */
} wv_state;
struct wv_tokenizer {
const char *src;
size_t pos;
size_t len;
wv_state state;
char quote_char; /* Keeps track of " vs ' for current value */
};
struct wv_parser {
struct wv_tokenizer tokenizer;
struct wv_arena *arena;
struct wv_vec stack;
wv_ref doc_ref;
wv_ref current_node;
};
void wv_tokenizer_init(struct wv_tokenizer *t, const char *src,
size_t len);
struct wv_token wv_tokenizer_next(struct wv_tokenizer *t);
wv_ref wv_parse_document(struct wv_arena *arena, const char *src);
#endif /* WV_PARSE_H */
|