#include #include #include #include "parse.h" #define ADVANCE(s, n) do { \ size_t i_; \ for (i_ = 0; i_ < (n); i_++) { \ if (!*(++(s))) \ goto end; \ } \ } while (0) typedef enum { DATA = 1, TAG_OPEN, TAG_CLOSE, ATTR_NAME, ATTR_VALUE, DOCTYPE, COMMENT } parse_mode; extern void on_open(const char *tag, size_t n); extern void on_open_end(void); extern void on_close(const char *tag, size_t n); extern void on_text(const char *text, size_t n); extern void on_attr(const char *name, size_t nname, const char *val, size_t nval); void parse(const char *s) { int blank; parse_mode mode; size_t n, attr_name_len; const char *p, *attr_name; if (!s) return; mode = DATA; while (*s) { switch (mode) { case DATA: p = s; n = 0; blank = 1; while (*s) { if (*s == '<') { if (isalpha((unsigned char)s[1])) { mode = TAG_OPEN; ADVANCE(s, 1); break; } else if (s[1] == '/') { mode = TAG_CLOSE; ADVANCE(s, 2); break; } else if (s[1] == '!') { if (s[2] == '-' && s[3] == '-') { mode = COMMENT; ADVANCE(s, 3); } else { mode = DOCTYPE; ADVANCE(s, 2); } break; } } if (!isspace((unsigned char)*s)) blank = 0; n++; ADVANCE(s, 1); } if (n > 0 && !blank) on_text(p, n); break; case TAG_OPEN: p = s; n = 0; while (*s) { if (*s == '>') { if (n > 0 && s[-1] == '/') n--; if (n > 0) on_open(p, n); on_open_end(); ADVANCE(s, 1); mode = DATA; break; } if (*s == ' ') { if (n > 0) on_open(p, n); while (*s == ' ') ADVANCE(s, 1); if (isalpha((unsigned char)*s)) mode = ATTR_NAME; else if (*s == '>') { on_open_end(); ADVANCE(s, 1); mode = DATA; } else if (*s == '/' && s[1] == '>') { on_open_end(); ADVANCE(s, 2); mode = DATA; } else errx(1, "Invalid character in open tag: %c", *s); break; } n++; ADVANCE(s, 1); } break; case TAG_CLOSE: p = s; n = 0; while (*s) { if (*s == '>') { on_close(p, n); mode = DATA; ADVANCE(s, 1); break; } n++; ADVANCE(s, 1); } break; case ATTR_NAME: p = s; n = 0; while (*s) { if (*s == '=') { attr_name = p; attr_name_len = n; ADVANCE(s, 1); if (*s == '"' || *s == '\'') ADVANCE(s, 1); mode = ATTR_VALUE; break; } if (*s == '>') { // on_attr(p, n, NULL, 0); on_open_end(); ADVANCE(s, 1); mode = DATA; break; } n++; ADVANCE(s, 1); } break; case ATTR_VALUE: p = s; n = 0; while (*s) { if (*s == '"' || *s == '\'' || *s == '>') { on_attr(attr_name, attr_name_len, p, n); mode = *s == '>' ? DATA : TAG_OPEN; ADVANCE(s, 1); break; } n++; ADVANCE(s, 1); } break; case DOCTYPE: while (*s) { if (*s == '>') { mode = DATA; ADVANCE(s, 1); break; } ADVANCE(s, 1); } break; case COMMENT: n = 0; while (*s) { if (*s == '>' && n >= 2 && s[-1] == '-' && s[-2] == '-') { mode = DATA; ADVANCE(s, 1); break; } n++; ADVANCE(s, 1); } break; default: break; } } end: return; }