diff options
Diffstat (limited to 'parse.c')
| -rw-r--r-- | parse.c | 195 |
1 files changed, 195 insertions, 0 deletions
@@ -0,0 +1,195 @@ +#include <ctype.h> +#include <stddef.h> + +#include "parse.h" + +#define ADVANCE(s, n) do { \ + size_t i_; \ + for (i_ = 0; i_ < (n); i_++) { \ + if (!*(++(s))) \ + goto end; \ + } \ +} while (0) + +typedef enum { + DATA = 1, + TAG_OPEN, + TAG_CLOSE, + ATTR_NAME, + ATTR_VALUE, + DOCTYPE, + COMMENT +} parse_mode; + +extern void on_open(const char *tag, size_t n); +extern void on_close(const char *tag, size_t n); +extern void on_text(const char *text, size_t n); +extern void on_attr(const char *name, size_t nname, const char *val, size_t nval); + +void parse(const char *s) +{ + int blank; + parse_mode mode; + size_t n, attr_name_len; + const char *p, *attr_name; + + if (!s) + return; + + mode = DATA; + + while (*s) { + switch (mode) { + case DATA: + p = s; + n = 0; + blank = 1; + while (*s) { + if (*s == '<') { + if (isalpha((unsigned char)s[1])) { + mode = TAG_OPEN; + ADVANCE(s, 1); + break; + } else if (s[1] == '/') { + mode = TAG_CLOSE; + ADVANCE(s, 2); + break; + } else if (s[1] == '!') { + if (s[2] == '-' && s[3] == '-') { + mode = COMMENT; + ADVANCE(s, 3); + } else { + mode = DOCTYPE; + ADVANCE(s, 2); + } + break; + } + } + + if (!isspace((unsigned char)*s)) + blank = 0; + + n++; + ADVANCE(s, 1); + } + + if (n > 0 && !blank) + on_text(p, n); + + break; + case TAG_OPEN: + p = s; + n = 0; + while (*s) { + if (*s == '>') { + mode = DATA; + if (n > 0 && s[-1] == '/') + n--; + ADVANCE(s, 1); + break; + } + + if (*s == ' ') { + while (*s == ' ') + ADVANCE(s, 1); + + if (isalpha((unsigned char)*s)) + mode = ATTR_NAME; + else if (*s == '/' && s[1] == '>') { + ADVANCE(s, 2); // ignore self-closing tags + mode = DATA; + } + break; + } + + n++; + ADVANCE(s, 1); + } + + if (n > 0) + on_open(p, n); + + break; + case TAG_CLOSE: + p = s; + n = 0; + while (*s) { + if (*s == '>') { + on_close(p, n); + mode = DATA; + ADVANCE(s, 1); + break; + } + n++; + ADVANCE(s, 1); + } + break; + case ATTR_NAME: + p = s; + n = 0; + while (*s) { + if (*s == '=') { + attr_name = p; + attr_name_len = n; + mode = ATTR_VALUE; + ADVANCE(s, 1); + if (*s == '"' || *s == '\'') + ADVANCE(s, 1); + break; + } + + if (*s == '>') { // <input disabled> + on_attr(p, n, NULL, 0); + mode = DATA; + ADVANCE(s, 1); + break; + } + + n++; + ADVANCE(s, 1); + } + break; + case ATTR_VALUE: + p = s; + n = 0; + while (*s) { + if (*s == '"' || *s == '\'' || *s == '>') { + on_attr(attr_name, attr_name_len, p, n); + mode = *s == '>' ? DATA : TAG_OPEN; + ADVANCE(s, 1); + break; + } + n++; + ADVANCE(s, 1); + } + break; + case DOCTYPE: + while (*s) { + if (*s == '>') { + mode = DATA; + ADVANCE(s, 1); + break; + } + ADVANCE(s, 1); + } + break; + case COMMENT: + n = 0; + while (*s) { + if (*s == '>' && n >= 2 && s[-1] == '-' && s[-2] == '-') { + mode = DATA; + ADVANCE(s, 1); + break; + } + n++; + ADVANCE(s, 1); + } + break; + default: + break; + } + } + +end: + return; +} |
