summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--dom.c37
-rw-r--r--dom.h6
-rw-r--r--main.c33
-rw-r--r--parse.c195
-rw-r--r--parse.h6
-rw-r--r--test.html9
7 files changed, 287 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 35d0540..c3b4451 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@ CC = cc
CFLAGS = -std=c11 -Wall -Wextra -Wpedantic -g -O0
LDFLAGS =
-HDRS = mem.h
-SRCS = main.c
+HDRS = mem.h parse.h dom.h
+SRCS = parse.c dom.c main.c
OBJS = $(SRCS:.c=.o)
TARGET = glacier
diff --git a/dom.c b/dom.c
new file mode 100644
index 0000000..a4d75e1
--- /dev/null
+++ b/dom.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+
+#include "dom.h"
+#include "parse.h"
+
+void init_dom(const char *html)
+{
+ parse(html);
+}
+
+/* Parser event handlers */
+extern void on_open(const char *tag, size_t n)
+{
+ printf("Tag opened: %.*s\n", (int)n, tag);
+}
+
+extern void on_close(const char *tag, size_t n)
+{
+ printf("Tag closed: %.*s\n", (int)n, tag);
+}
+
+extern void on_text(const char *text, size_t n)
+{
+ printf("Text: %.*s\n", (int)n, text);
+}
+
+extern void on_attr(const char *name, size_t nname, const char *val,
+ size_t nval)
+{
+ printf("Attribute: name=%.*s", (int)nname, name);
+
+ if (val && nval > 0)
+ printf(", value=%.*s", (int)nval, val);
+
+ printf("\n");
+}
+
diff --git a/dom.h b/dom.h
new file mode 100644
index 0000000..d336019
--- /dev/null
+++ b/dom.h
@@ -0,0 +1,6 @@
+#ifndef DOM_H
+#define DOM_H
+
+void init_dom(const char *html);
+
+#endif /* DOM_H */
diff --git a/main.c b/main.c
index 31dbf45..00a184a 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,35 @@
-int main(void)
+#include <stdio.h>
+#include <unistd.h>
+
+#include "mem.h"
+#include "dom.h"
+
+int main(int argc, char *argv[])
{
+ if (argc < 2)
+ errx(1, "usage: glacier <file>");
+
+ unveil(argv[1], "r");
+ unveil(NULL, NULL);
+ pledge("stdio rpath", NULL);
+
+ FILE *file;
+ char *html;
+ long len;
+
+ file = fopen("test.html", "rb");
+ fseek(file, 0, SEEK_END);
+ len = ftell(file);
+ fseek(file, 0, SEEK_SET);
+
+ html = MALLOC((size_t)len + 1);
+ fread(html, 1, len, file);
+ html[len] = '\0';
+ fclose(file);
+
+ init_dom(html);
+
+ free(html);
+
return 0;
}
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..a9998e3
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,195 @@
+#include <ctype.h>
+#include <stddef.h>
+
+#include "parse.h"
+
+#define ADVANCE(s, n) do { \
+ size_t i_; \
+ for (i_ = 0; i_ < (n); i_++) { \
+ if (!*(++(s))) \
+ goto end; \
+ } \
+} while (0)
+
+typedef enum {
+ DATA = 1,
+ TAG_OPEN,
+ TAG_CLOSE,
+ ATTR_NAME,
+ ATTR_VALUE,
+ DOCTYPE,
+ COMMENT
+} parse_mode;
+
+extern void on_open(const char *tag, size_t n);
+extern void on_close(const char *tag, size_t n);
+extern void on_text(const char *text, size_t n);
+extern void on_attr(const char *name, size_t nname, const char *val, size_t nval);
+
+void parse(const char *s)
+{
+ int blank;
+ parse_mode mode;
+ size_t n, attr_name_len;
+ const char *p, *attr_name;
+
+ if (!s)
+ return;
+
+ mode = DATA;
+
+ while (*s) {
+ switch (mode) {
+ case DATA:
+ p = s;
+ n = 0;
+ blank = 1;
+ while (*s) {
+ if (*s == '<') {
+ if (isalpha((unsigned char)s[1])) {
+ mode = TAG_OPEN;
+ ADVANCE(s, 1);
+ break;
+ } else if (s[1] == '/') {
+ mode = TAG_CLOSE;
+ ADVANCE(s, 2);
+ break;
+ } else if (s[1] == '!') {
+ if (s[2] == '-' && s[3] == '-') {
+ mode = COMMENT;
+ ADVANCE(s, 3);
+ } else {
+ mode = DOCTYPE;
+ ADVANCE(s, 2);
+ }
+ break;
+ }
+ }
+
+ if (!isspace((unsigned char)*s))
+ blank = 0;
+
+ n++;
+ ADVANCE(s, 1);
+ }
+
+ if (n > 0 && !blank)
+ on_text(p, n);
+
+ break;
+ case TAG_OPEN:
+ p = s;
+ n = 0;
+ while (*s) {
+ if (*s == '>') {
+ mode = DATA;
+ if (n > 0 && s[-1] == '/')
+ n--;
+ ADVANCE(s, 1);
+ break;
+ }
+
+ if (*s == ' ') {
+ while (*s == ' ')
+ ADVANCE(s, 1);
+
+ if (isalpha((unsigned char)*s))
+ mode = ATTR_NAME;
+ else if (*s == '/' && s[1] == '>') {
+ ADVANCE(s, 2); // ignore self-closing tags
+ mode = DATA;
+ }
+ break;
+ }
+
+ n++;
+ ADVANCE(s, 1);
+ }
+
+ if (n > 0)
+ on_open(p, n);
+
+ break;
+ case TAG_CLOSE:
+ p = s;
+ n = 0;
+ while (*s) {
+ if (*s == '>') {
+ on_close(p, n);
+ mode = DATA;
+ ADVANCE(s, 1);
+ break;
+ }
+ n++;
+ ADVANCE(s, 1);
+ }
+ break;
+ case ATTR_NAME:
+ p = s;
+ n = 0;
+ while (*s) {
+ if (*s == '=') {
+ attr_name = p;
+ attr_name_len = n;
+ mode = ATTR_VALUE;
+ ADVANCE(s, 1);
+ if (*s == '"' || *s == '\'')
+ ADVANCE(s, 1);
+ break;
+ }
+
+ if (*s == '>') { // <input disabled>
+ on_attr(p, n, NULL, 0);
+ mode = DATA;
+ ADVANCE(s, 1);
+ break;
+ }
+
+ n++;
+ ADVANCE(s, 1);
+ }
+ break;
+ case ATTR_VALUE:
+ p = s;
+ n = 0;
+ while (*s) {
+ if (*s == '"' || *s == '\'' || *s == '>') {
+ on_attr(attr_name, attr_name_len, p, n);
+ mode = *s == '>' ? DATA : TAG_OPEN;
+ ADVANCE(s, 1);
+ break;
+ }
+ n++;
+ ADVANCE(s, 1);
+ }
+ break;
+ case DOCTYPE:
+ while (*s) {
+ if (*s == '>') {
+ mode = DATA;
+ ADVANCE(s, 1);
+ break;
+ }
+ ADVANCE(s, 1);
+ }
+ break;
+ case COMMENT:
+ n = 0;
+ while (*s) {
+ if (*s == '>' && n >= 2 && s[-1] == '-' && s[-2] == '-') {
+ mode = DATA;
+ ADVANCE(s, 1);
+ break;
+ }
+ n++;
+ ADVANCE(s, 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+end:
+ return;
+}
diff --git a/parse.h b/parse.h
new file mode 100644
index 0000000..33d7cf4
--- /dev/null
+++ b/parse.h
@@ -0,0 +1,6 @@
+#ifndef PARSE_H
+#define PARSE_H
+
+void parse(const char *s);
+
+#endif /* PARSE_H */
diff --git a/test.html b/test.html
new file mode 100644
index 0000000..2642bfa
--- /dev/null
+++ b/test.html
@@ -0,0 +1,9 @@
+<html>
+ <div class="header">
+ <h1>My Journal</h1>
+ </div>
+ <div class="content" disabled>
+ <p>Hello World</p>
+ </div>
+
+</html>