#include #include #include #include #include #include #include #include #include struct XMLexer { Stream *stream; bool autofree; /* A temporary buffer */ int *buffer; ssize_t length; ssize_t top; enum { XML_STATE_NONE = 0, XML_STATE_COMMENT, XML_STATE_ATTR, XML_STATE_ATTRHEAD_PROP, XML_STATE_ATTRTAIL, XML_STATE_PI } state; struct { Array *elements; char *str; } data; }; /* "Looks ahead" in the XML stream for a NUL-terminated string. * If it was found and skip is set, then also skips over it. */ static bool XMLookahead(XMLexer *lexer, const char *str, bool skip); /* Parses an XML "name" */ static char * XMLParseName(XMLexer *lexer); static bool XMLSkipSpace(XMLexer *lexer); static char * XMLParseAttValue(XMLexer *lexer); static ssize_t XMLInitialiseBuffer(XMLexer *lexer); static int XMLGetc(XMLexer *lexer); static void XMLUngetc(XMLexer *lexer, int ch); static void XMLEndBuffer(XMLexer *lexer); static void XMLRollback(XMLexer *lexer); static void XMLReset(XMLexer *lexer, ssize_t loc); static char * XMLStringify(XMLexer *lexer, ssize_t point); static HashMap * XMLReadProps(XMLexer *l); static void XMLPushElement(XMLexer *lexer, char *e); static char * XMLPopElement(XMLexer *lexer); static XMLEvent * XMLCreateEmptyElem(XMLexer *lexer, HashMap *attrs); static XMLEvent * XMLCreateStart(XMLexer *lexer, HashMap *attrs); static XMLEvent * XMLCreateRelax(XMLexer *lexer); static XMLEvent * XMLCreateEnd(XMLexer *lexer, char *end); static XMLEvent * XMLCreateData(XMLexer *lexer); XMLexer * XMLCreateLexer(Stream *stream, bool autofree) { XMLexer *lexer; if (!stream) { return NULL; } lexer = Malloc(sizeof(*lexer)); lexer->stream = stream; lexer->autofree = autofree; lexer->state = XML_STATE_NONE; lexer->buffer = NULL; lexer->length = -1; lexer->top = -1; lexer->data.elements = ArrayCreate(); lexer->data.str = NULL; return lexer; } void XMLFreeLexer(XMLexer *lexer) { if (!lexer) { return; } if (lexer->autofree) { StreamClose(lexer->stream); } if (lexer->buffer) { Free(lexer->buffer); } if (lexer->data.str) { Free(lexer->data.str); } if (lexer->data.elements) { size_t i; Array *elems = lexer->data.elements; for (i = 0; i < ArraySize(elems); i++) { Free(ArrayGet(elems, i)); } ArrayFree(lexer->data.elements); } Free(lexer); } XMLEvent * XMLCrank(XMLexer *lexer) { XMLEvent *event = NULL; char *attrname; HashMap *props; char cbuf[2] = { 0 }; char *tmp; if (!lexer) { return NULL; } if (StreamEof(lexer->stream)) { return NULL; } event = XMLCreateRelax(lexer); //Log(LOG_INFO, "A %d", lexer->state); switch (lexer->state) { case XML_STATE_NONE: if (XMLookahead(lexer, "", true)) { lexer->state = XML_STATE_NONE; } else if (XMLookahead(lexer, "--", false)) { /* Throw error */ return NULL; } break; case XML_STATE_PI: if (XMLookahead(lexer, "?>", true)) { lexer->state = XML_STATE_NONE; break; } XMLGetc(lexer); break; case XML_STATE_ATTR: attrname = XMLParseName(lexer); //Log(LOG_INFO, "A %d %s", lexer->state, attrname); if (!attrname) { /* TODO: Throw error */ } XMLPushElement(lexer, attrname); //Log(LOG_INFO, "Reading props..."); props = XMLReadProps(lexer); //Log(LOG_INFO, "Read props!"); XMLSkipSpace(lexer); if (XMLookahead(lexer, "/>", true)) { lexer->state = XML_STATE_NONE; XMLFreeEvent(event); event = XMLCreateEmptyElem(lexer, props); Free(XMLPopElement(lexer)); break; } else if (XMLookahead(lexer, ">", true)) { lexer->state = XML_STATE_NONE; XMLFreeEvent(event); event = XMLCreateStart(lexer, props); break; } else if (XMLookahead(lexer, "'", true)) { while (true); } break; case XML_STATE_ATTRTAIL: attrname = XMLParseName(lexer); Free(XMLPopElement(lexer)); if (!XMLookahead(lexer, ">", true)) { /* TODO: Throw error. */ break; } lexer->state = XML_STATE_NONE; XMLFreeEvent(event); event = XMLCreateEnd(lexer, attrname); break; default: /* TODO */ break; } /* TODO: Crank our XML parser. */ return event; } void XMLFreeEvent(XMLEvent *event) { if (!event) { return; } if (event->element) { Free(event->element); } if (event->attrs) { char *key; void *val; while (HashMapIterate(event->attrs, &key, &val)) { Free(val); } HashMapFree(event->attrs); } if (event->data) { Free(event->data); } Free(event); } static bool XMLookahead(XMLexer *lexer, const char *str, bool skip) { int *stack; size_t top, i; ssize_t ntop; bool ret = false; if (!lexer || !str) { return false; } top = 0; stack = Malloc(strlen(str) * sizeof(*stack)); for (i = 0; i < strlen(str); i++) { char c = str[i]; int getc = XMLGetc(lexer); stack[top++] = getc; if (getc != c || getc == EOF) { goto seekback; } } /* We have been able to seek the string properly */ ret = true; if (!skip) { goto seekback; } Free(stack); return ret; seekback: for (ntop = top - 1; ntop >= 0; ntop--) { XMLUngetc(lexer, stack[ntop]); } Free(stack); return ret; } #define IsNamestart(c) ((c == ':') || isalpha(c) || (c == '_')) #define IsNamepart(c) (IsNamestart(c) || (c == '-') || isdigit(c)) static char * XMLParseName(XMLexer *lexer) { int c; ssize_t point; point = XMLInitialiseBuffer(lexer); c = XMLGetc(lexer); if (!IsNamestart(c)) { XMLRollback(lexer); return NULL; } while ((c = XMLGetc(lexer))) { if (!IsNamepart(c)) { break; } } XMLUngetc(lexer, c); return XMLStringify(lexer, point); } static ssize_t XMLInitialiseBuffer(XMLexer *lexer) { if (!lexer) { return -1; } if (lexer->length != -1) { return lexer->top; } lexer->length = 0; lexer->top = 0; lexer->buffer = NULL; return 0; } static int XMLGetc(XMLexer *lexer) { int ch; if (lexer->length == -1) { return StreamGetc(lexer->stream); } ch = StreamGetc(lexer->stream); if (lexer->top >= lexer->length) { lexer->length += 8; lexer->buffer = Realloc( lexer->buffer, lexer->length * sizeof(*lexer->buffer) ); } lexer->buffer[lexer->top++] = ch; return ch; } static void XMLUngetc(XMLexer *lexer, int ch) { if (!lexer || lexer->length == -1) { StreamUngetc(lexer->stream, ch); return; } if (lexer->top == 0) { return; } StreamUngetc(lexer->stream, ch); lexer->top--; } static void XMLEndBuffer(XMLexer *lexer) { if (!lexer || lexer->length == -1) { return; } lexer->top = -1; lexer->length = -1; Free(lexer->buffer); lexer->buffer = NULL; } static void XMLRollback(XMLexer *lexer) { ssize_t i; if (!lexer || lexer->length == -1) { return; } for (i = lexer->top - 1; i >= 0; i--) { StreamUngetc(lexer->stream, lexer->buffer[i]); } XMLEndBuffer(lexer); } void XMLReset(XMLexer *lexer, ssize_t loc) { ssize_t i; if (!lexer || lexer->length == -1) { return; } for (i = lexer->top - 1; i >= loc; i--) { StreamUngetc(lexer->stream, lexer->buffer[i]); } lexer->top = loc; } char * XMLStringify(XMLexer *lexer, ssize_t point) { ssize_t i; char *str; if (!lexer || lexer->length == -1) { return NULL; } str = Malloc(lexer->top + 1); memset(str, '\0', lexer->top + 1); for (i = point; i < lexer->top; i++) { str[i - point] = (char) lexer->buffer[i]; } XMLEndBuffer(lexer); return str; } #define IsSpace(c) ((c == ' ') || (c == 0x09) || (c == 0x0D) || (c == 0x0A)) bool XMLSkipSpace(XMLexer *lexer) { int c; bool r = false; while ((c = XMLGetc(lexer)) != EOF) { if (!IsSpace(c)) { break; } r = true; } XMLUngetc(lexer, c); return r; } HashMap * XMLReadProps(XMLexer *lexer) { ssize_t point = XMLInitialiseBuffer(lexer); HashMap *map = NULL; bool error = false; while (true) { char *name; char *value = NULL; if (!XMLSkipSpace(lexer)) { /* A lack of space is totally excepted */ break; } name = XMLParseName(lexer); if (!name) { /* A lack of name is totally excepted */ break; } //Log(LOG_INFO, "K=%s...", name); value = StrDuplicate(""); if (XMLookahead(lexer, "=", true)) { /* TODO: Values aren't names. */ Free(value); value = XMLParseAttValue(lexer); if (!value) { error = true; Free(name); break; } } if (!map) { map = HashMapCreate(); } HashMapSet(map, name, value); Free(name); } if (error || !map) { XMLReset(lexer, point); } return map; } static XMLEvent * XMLCreateStart(XMLexer *lexer, HashMap *attrs) { XMLEvent *event = Malloc(sizeof(*event)); size_t elements = ArraySize(lexer->data.elements) - 1; char *h_element = StrDuplicate(ArrayGet(lexer->data.elements, elements)); event->type = XML_LEXER_STARTELEM; event->element = h_element; event->attrs = attrs; event->data = NULL; /* TODO */ event->line = 0; event->col = 0; event->offset = 0; return event; } XMLEvent * XMLCreateEnd(XMLexer *lexer, char *end) { XMLEvent *event = Malloc(sizeof(*event)); event->type = XML_LEXER_ENDELEM; event->element = end; event->attrs = NULL; event->data = NULL; /* TODO */ event->line = 0; event->col = 0; event->offset = 0; (void) lexer; return event; } static XMLEvent * XMLCreateEmptyElem(XMLexer *lexer, HashMap *attrs) { XMLEvent *event = Malloc(sizeof(*event)); size_t elements = ArraySize(lexer->data.elements) - 1; char *h_element = StrDuplicate(ArrayGet(lexer->data.elements, elements)); event->type = XML_LEXER_ELEM; event->element = h_element; event->attrs = attrs; event->data = NULL; /* TODO */ event->line = 0; event->col = 0; event->offset = 0; return event; } static char * XMLDecodeString(char *s) { char *ret = NULL, *tmp; char cs[2] = { 0 }; while (*s) { cs[0] = *s; if (!strncmp(s, "'", 6)) { cs[0] = '\''; s += 6; } else if (!strncmp(s, """, 6)) { cs[0] = '"'; s += 6; } else if (!strncmp(s, "<", 4)) { cs[0] = '<'; s += 4; } else if (!strncmp(s, ">", 4)) { cs[0] = '>'; s += 4; } else if (!strncmp(s, "&", 5)) { cs[0] = '&'; s += 5; } else if (!strncmp(s, "&#", 2)) { char *dec = s + 2; char *end = strchr(dec, ';'); if (!end) { s++; } else { /* TODO: Decode any Unicode glyph as UTF-8. */ int val = strtol(dec, &end, 10); cs[0] = val; s = end + 1; } } else { s++; } /* TODO: Support hexcodes */ tmp = ret; ret = StrConcat(2, ret, cs); Free(tmp); } return ret; } XMLEvent * XMLCreateData(XMLexer *lexer) { XMLEvent *event = Malloc(sizeof(*event)); size_t elements = ArraySize(lexer->data.elements); event->type = XML_LEXER_DATA; event->element = elements ? StrDuplicate(ArrayGet(lexer->data.elements, elements - 1)) : NULL; event->attrs = NULL; event->data = XMLDecodeString(lexer->data.str); /* TODO */ event->line = 0; event->col = 0; event->offset = 0; Free(lexer->data.str); lexer->data.str = NULL; return event; } static XMLEvent * XMLCreateRelax(XMLexer *lexer) { XMLEvent *event = Malloc(sizeof(*event)); size_t elements = ArraySize(lexer->data.elements); event->type = XML_RELAX; event->element = elements ? StrDuplicate(ArrayGet(lexer->data.elements, elements - 1)) : NULL; event->attrs = NULL; event->data = NULL; /* TODO */ event->line = 0; event->col = 0; event->offset = 0; return event; } static void XMLPushElement(XMLexer *lexer, char *e) { ArrayAdd(lexer->data.elements, e); } static char * XMLPopElement(XMLexer *lexer) { size_t n = ArraySize(lexer->data.elements); if (n == 0) { return NULL; } return ArrayDelete(lexer->data.elements, n - 1); } #define IsNormalQ(c) ((c != '<') && (c != '&') && (c != '\'')) #define IsNormalD(c) ((c != '<') && (c != '&') && (c != '"')) static char * XMLParseAttQuote(XMLexer *lexer) { int c; ssize_t point; char *str; point = XMLInitialiseBuffer(lexer); while ((c = XMLGetc(lexer))) { if (c == '&') { //char *code = NULL; int c2; int p2 = XMLInitialiseBuffer(lexer); int j = 0; while ((c2 = XMLGetc(lexer)) && c2 != EOF && j < 8) { if (c2 == ';') { break; } j++; } if (c2 != ';') { XMLReset(lexer, p2); } } else if (!IsNormalQ(c)) { break; } } if (c != '\'') { XMLUngetc(lexer, c); return NULL; } /* TODO: Decode the string */ str = XMLStringify(lexer, point); str[strlen(str) - 1] = '\0'; /* Trim the quote. */ return str; } static char * XMLParseAttDouble(XMLexer *lexer) { int c; ssize_t point; char *str; point = XMLInitialiseBuffer(lexer); while ((c = XMLGetc(lexer))) { //Log(LOG_INFO, "E2=%c", c); if (c == '&') { int c2; int p2 = XMLInitialiseBuffer(lexer); int j = 0; while ((c2 = XMLGetc(lexer)) && c2 != EOF && j < 8) { if (c2 == ';') { break; } j++; } if (c2 != ';') { XMLReset(lexer, p2); } continue; } else if (!IsNormalD(c)) { break; } } if (c != '"') { XMLUngetc(lexer, c); return NULL; } /* TODO: Decode the string */ str = XMLStringify(lexer, point); str[strlen(str) - 1] = '\0'; /* Trim the quote. */ return str; } static char * XMLParseAttValue(XMLexer *lexer) { XMLInitialiseBuffer(lexer); if (XMLookahead(lexer, "'", true)) { return XMLParseAttQuote(lexer); } else if (XMLookahead(lexer, "\"", true)) { return XMLParseAttDouble(lexer); } return NULL; }