diff options
author | Nikias Bassen | 2016-10-22 04:39:47 +0200 |
---|---|---|
committer | Nikias Bassen | 2016-10-22 04:39:47 +0200 |
commit | 392135c7db4d9cb4a14ff5935d7c4c6e21363847 (patch) | |
tree | 0e19125ed99b6b2ced754d1b9b3f4bc5245f8c39 /src/xplist.c | |
parent | a3263ad344ff315ac1cba96f0b84b9afff6da787 (diff) | |
download | libplist-392135c7db4d9cb4a14ff5935d7c4c6e21363847.tar.gz libplist-392135c7db4d9cb4a14ff5935d7c4c6e21363847.tar.bz2 |
Remove libxml2 dependency in favor of custom XML parsing
Diffstat (limited to 'src/xplist.c')
-rw-r--r-- | src/xplist.c | 1002 |
1 files changed, 652 insertions, 350 deletions
diff --git a/src/xplist.c b/src/xplist.c index fc665ac..87b21bb 100644 --- a/src/xplist.c +++ b/src/xplist.c @@ -2,8 +2,8 @@ * xplist.c * XML plist implementation * + * Copyright (c) 2010-2016 Nikias Bassen All Rights Reserved. * Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved. - * Copyright (c) 2010-2014 Nikias Bassen All Rights Reserved. * Copyright (c) 2008 Jonathan Beck All Rights Reserved. * * This library is free software; you can redistribute it and/or @@ -21,6 +21,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include <string.h> #include <assert.h> @@ -31,121 +34,55 @@ #include <inttypes.h> #include <math.h> -#include <libxml/xmlIO.h> -#include <libxml/parser.h> -#include <libxml/tree.h> - #include <node.h> #include <node_list.h> #include <node_iterator.h> #include "plist.h" #include "base64.h" +#include "strbuf.h" #include "time64.h" -#define XPLIST_TEXT BAD_CAST("text") -#define XPLIST_KEY BAD_CAST("key") -#define XPLIST_FALSE BAD_CAST("false") -#define XPLIST_TRUE BAD_CAST("true") -#define XPLIST_INT BAD_CAST("integer") -#define XPLIST_REAL BAD_CAST("real") -#define XPLIST_DATE BAD_CAST("date") -#define XPLIST_DATA BAD_CAST("data") -#define XPLIST_STRING BAD_CAST("string") -#define XPLIST_ARRAY BAD_CAST("array") -#define XPLIST_DICT BAD_CAST("dict") +#define XPLIST_TEXT "text" +#define XPLIST_KEY "key" +#define XPLIST_FALSE "false" +#define XPLIST_TRUE "true" +#define XPLIST_INT "integer" +#define XPLIST_REAL "real" +#define XPLIST_DATE "date" +#define XPLIST_DATA "data" +#define XPLIST_STRING "string" +#define XPLIST_ARRAY "array" +#define XPLIST_DICT "dict" #define MAC_EPOCH 978307200 -static const char *plist_base = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\ +static const char XML_PLIST_PROLOG[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\ <!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n\ -<plist version=\"1.0\">\n\ -</plist>\0"; +<plist version=\"1.0\">\n"; +static const char XML_PLIST_EPILOG[] = "</plist>\n"; +#ifdef DEBUG +static int plist_xml_debug = 0; +#define PLIST_XML_ERR(...) if (plist_xml_debug) { fprintf(stderr, "libplist[xmlparser] ERROR: " __VA_ARGS__); } +#else +#define PLIST_XML_ERR(...) +#endif -/** Formats a block of text to be a given indentation and width. - * - * The total width of the return string will be depth + cols. - * - * @param buf The string to format. - * @param cols The number of text columns for returned block of text. - * @param depth The number of tabs to indent the returned block of text. - * - * @return The formatted string. - */ -static char *format_string(const char *buf, size_t len, int cols, int depth) +void plist_xml_init(void) { - if (!buf || !(len > 0)) return NULL; - int colw = depth + cols + 1; - int nlines = len / cols + 1; - char *new_buf = NULL; - int i = 0; - int j = 0; - - assert(cols >= 0); - assert(depth >= 0); - - new_buf = (char*) malloc(nlines * colw + depth + 1); - assert(new_buf != 0); - memset(new_buf, 0, nlines * colw + depth + 1); - - // Inserts new lines and tabs at appropriate locations - for (i = 0; i < nlines; i++) - { - new_buf[i * colw] = '\n'; - for (j = 0; j < depth; j++) - new_buf[i * colw + 1 + j] = '\t'; - memcpy(new_buf + i * colw + 1 + depth, buf + i * cols, (size_t)(i + 1) * cols <= len ? (size_t)cols : len - i * cols); + /* init XML stuff */ +#ifdef DEBUG + char *env_debug = getenv("PLIST_XML_DEBUG"); + if (env_debug && !strcmp(env_debug, "1")) { + plist_xml_debug = 1; } - new_buf[len + (1 + depth) * nlines] = '\n'; - - // Inserts final row of indentation and termination character - for (j = 0; j < depth; j++) - new_buf[len + (1 + depth) * nlines + 1 + j] = '\t'; - new_buf[len + (1 + depth) * nlines + depth + 1] = '\0'; - - return new_buf; -} - - - -struct xml_node -{ - xmlNodePtr xml; - uint32_t depth; -}; - -/** Creates a new plist XML document. - * - * @return The plist XML document. - */ -static xmlDocPtr new_xml_plist(void) -{ - char *plist = strdup(plist_base); - xmlDocPtr plist_xml = xmlParseMemory(plist, strlen(plist)); - - free(plist); - - return plist_xml; -} - -static struct node_t* new_key_node(const char* name) -{ - plist_data_t data = plist_new_plist_data(); - data->type = PLIST_KEY; - int size = strlen(name); - data->strval = strdup(name); - data->length = size; - return node_create(NULL, data); +#endif } -static struct node_t* new_uint_node(uint64_t value) +void plist_xml_deinit(void) { - plist_data_t data = plist_new_plist_data(); - data->type = PLIST_UINT; - data->intval = value; - data->length = sizeof(uint64_t); - return node_create(NULL, data); + /* deinit XML stuff */ } static void dtostr(char *buf, size_t bufsize, double realval) @@ -178,27 +115,21 @@ static void dtostr(char *buf, size_t bufsize, double realval) buf[p] = '\0'; } -static void node_to_xml(node_t* node, void *xml_struct) +static void node_to_xml(node_t* node, bytearray_t **outbuf, uint32_t depth) { - struct xml_node *xstruct = NULL; plist_data_t node_data = NULL; - xmlNodePtr child_node = NULL; char isStruct = FALSE; - char isUIDNode = FALSE; + char tagOpen = FALSE; - const xmlChar *tag = NULL; + const char *tag = NULL; char *val = NULL; - //for base64 - char *valtmp = NULL; - uint32_t i = 0; if (!node) return; - xstruct = (struct xml_node *) xml_struct; node_data = plist_get_data(node); switch (node_data->type) @@ -230,23 +161,17 @@ static void node_to_xml(node_t* node, void *xml_struct) case PLIST_STRING: tag = XPLIST_STRING; - val = strdup((char*) node_data->strval); + /* contents processed directly below */ break; case PLIST_KEY: tag = XPLIST_KEY; - val = strdup((char*) node_data->strval); + /* contents processed directly below */ break; case PLIST_DATA: tag = XPLIST_DATA; - if (node_data->length) - { - size_t len = node_data->length; - valtmp = base64encode(node_data->buff, &len); - val = format_string(valtmp, len, 68, xstruct->depth); - free(valtmp); - } + /* contents processed directly below */ break; case PLIST_ARRAY: tag = XPLIST_ARRAY; @@ -275,77 +200,161 @@ static void node_to_xml(node_t* node, void *xml_struct) } break; case PLIST_UID: - // special case for keyed encoding tag = XPLIST_DICT; - isStruct = TRUE; - isUIDNode = TRUE; - node_data->type = PLIST_DICT; - node_attach(node, new_key_node("CF$UID")); - node_attach(node, new_uint_node(node_data->intval)); + val = (char*)malloc(64); + if (node_data->length == 16) { + (void)snprintf(val, 64, "%"PRIu64, node_data->intval); + } else { + (void)snprintf(val, 64, "%"PRIi64, node_data->intval); + } break; default: break; } - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(xstruct->xml, BAD_CAST("\t")); + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } + + /* append tag */ + str_buf_append(*outbuf, "<", 1); + str_buf_append(*outbuf, tag, strlen(tag)); if (node_data->type == PLIST_STRING || node_data->type == PLIST_KEY) { + size_t j; + size_t len; + off_t start = 0; + off_t cur = 0; + + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + /* make sure we convert the following predefined xml entities */ /* < = < > = > ' = ' " = " & = & */ - child_node = xmlNewTextChild(xstruct->xml, NULL, tag, BAD_CAST(val)); - } else - child_node = xmlNewChild(xstruct->xml, NULL, tag, BAD_CAST(val)); - xmlNodeAddContent(xstruct->xml, BAD_CAST("\n")); - if (val) { - free(val); - } + len = strlen(node_data->strval); + for (j = 0; j < len; j++) { + switch (node_data->strval[j]) { + case '<': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "<", 4); + start = cur+1; + break; + case '>': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, ">", 4); + start = cur+1; + break; + case '\'': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "'", 6); + start = cur+1; + break; + case '"': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, """, 6); + start = cur+1; + break; + case '&': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "&", 5); + start = cur+1; + break; + default: + break; + } + cur++; + } + str_buf_append(*outbuf, node_data->strval + start, cur - start); + } else if (node_data->type == PLIST_DATA) { + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, "\n", 1); + if (node_data->length > 0) { + char *buf = malloc(80); + uint32_t j = 0; + uint32_t indent = (depth > 8) ? 8 : depth; + uint32_t maxread = ((76 - indent*8) / 4) * 3; + size_t count = 0; + size_t b64count = 0; + while (j < node_data->length) { + for (i = 0; i < indent; i++) { + str_buf_append(*outbuf, "\t", 1); + } + count = (node_data->length-j < maxread) ? node_data->length-j : maxread; + b64count = base64encode(buf, node_data->buff + j, count); + str_buf_append(*outbuf, buf, b64count); + str_buf_append(*outbuf, "\n", 1); + j+=count; + } + free(buf); + } + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); + } + } else if (node_data->type == PLIST_UID) { + /* special case for UID nodes: create a DICT */ + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, "\n", 1); + + /* add CF$UID key */ + for (i = 0; i < depth+1; i++) { + str_buf_append(*outbuf, "\t", 1); + } + str_buf_append(*outbuf, "<key>CF$UID</key>", 17); + str_buf_append(*outbuf, "\n", 1); - //add return for structured types - if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) - xmlNodeAddContent(child_node, BAD_CAST("\n")); + /* add UID value */ + for (i = 0; i < depth+1; i++) { + str_buf_append(*outbuf, "\t", 1); + } + str_buf_append(*outbuf, "<integer>", 9); + str_buf_append(*outbuf, val, strlen(val)); + str_buf_append(*outbuf, "</integer>", 10); + str_buf_append(*outbuf, "\n", 1); - //make sure we don't produce <data/> if it's empty - if ((node_data->type == PLIST_DATA) && !val) { - xmlNodeAddContent(child_node, BAD_CAST("\n")); - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(child_node, BAD_CAST("\t")); + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } + } else if (val) { + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, val, strlen(val)); + } else if (isStruct) { + tagOpen = TRUE; + str_buf_append(*outbuf, ">", 1); + } else { + tagOpen = FALSE; + str_buf_append(*outbuf, "/>", 2); } + free(val); - if (isStruct) - { - struct xml_node child = { child_node, xstruct->depth + 1 }; + /* add return for structured types */ + if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) + str_buf_append(*outbuf, "\n", 1); + + if (isStruct) { node_iterator_t *ni = node_iterator_create(node->children); node_t *ch; while ((ch = node_iterator_next(ni))) { - node_to_xml(ch, &child); + node_to_xml(ch, outbuf, depth+1); } node_iterator_destroy(ni); } - //fix indent for structured types - if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) - { - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(child_node, BAD_CAST("\t")); + /* fix indent for structured types */ + if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) { + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } } - if (isUIDNode) - { - unsigned int num = node_n_children(node); - unsigned int j; - for (j = num; j > 0; j--) { - node_t* ch = node_nth_child(node, j-1); - node_detach(node, ch); - plist_free_data((plist_data_t)((node_t*)ch)->data); - node_destroy(ch); - } - node_data->type = PLIST_UID; + + if (tagOpen) { + /* add closing tag */ + str_buf_append(*outbuf, "</", 2); + str_buf_append(*outbuf, tag, strlen(tag)); + str_buf_append(*outbuf, ">", 1); } + str_buf_append(*outbuf, "\n", 1); return; } @@ -370,233 +379,526 @@ static void parse_date(const char *strval, struct TM *btime) btime->tm_isdst=0; } -static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node) +PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length) { - xmlNodePtr node = NULL; - plist_data_t data = NULL; - plist_t subnode = NULL; + strbuf_t *outbuf = str_buf_new(); - //for string - long len = 0; - int type = 0; + str_buf_append(outbuf, XML_PLIST_PROLOG, sizeof(XML_PLIST_PROLOG)-1); - if (!xml_node) - return; + node_to_xml(plist, &outbuf, 0); - for (node = xml_node->children; node; node = node->next) - { - - while (node && !xmlStrcmp(node->name, XPLIST_TEXT)) - node = node->next; - if (!node) - break; + str_buf_append(outbuf, XML_PLIST_EPILOG, sizeof(XML_PLIST_EPILOG)); - if (!xmlStrcmp(node->name, BAD_CAST("comment"))) { - continue; - } + *plist_xml = outbuf->data; + *length = outbuf->len - 1; - data = plist_new_plist_data(); - subnode = plist_new_node(data); - if (*plist_node) - node_attach(*plist_node, subnode); - else - *plist_node = subnode; + outbuf->data = NULL; + str_buf_free(outbuf); +} - if (!xmlStrcmp(node->name, XPLIST_TRUE)) - { - data->boolval = TRUE; - data->type = PLIST_BOOLEAN; - data->length = 1; - continue; - } +struct _parse_ctx { + const char *pos; + const char *end; + int err; +}; +typedef struct _parse_ctx* parse_ctx; - if (!xmlStrcmp(node->name, XPLIST_FALSE)) - { - data->boolval = FALSE; - data->type = PLIST_BOOLEAN; - data->length = 1; - continue; - } +static void parse_skip_ws(parse_ctx ctx) +{ + while (ctx->pos < ctx->end && ((*(ctx->pos) == ' ') || (*(ctx->pos) == '\t') || (*(ctx->pos) == '\r') || (*(ctx->pos) == '\n'))) { + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_INT)) - { - xmlChar *strval = xmlNodeGetContent(node); - int is_negative = 0; - char *str = (char*)strval; - if ((str[0] == '-') || (str[0] == '+')) { - if (str[0] == '-') { - is_negative = 1; - } - str++; - } - char* endp = NULL; - data->intval = strtoull((char*)str, &endp, 0); - if ((endp != NULL) && (strlen(endp) > 0)) { - fprintf(stderr, "%s: integer parse error: string contains invalid characters: '%s'\n", __func__, endp); - } - if (is_negative || (data->intval <= INT64_MAX)) { - int64_t v = data->intval; - if (is_negative) { - v = -v; - } - data->intval = (uint64_t)v; - data->length = 8; - } else { - data->length = 16; +static void find_char(parse_ctx ctx, char c, int skip_quotes) +{ + while (ctx->pos < ctx->end && (*(ctx->pos) != c)) { + if (skip_quotes && (c != '"') && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - data->type = PLIST_UINT; - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_REAL)) - { - xmlChar *strval = xmlNodeGetContent(node); - data->realval = atof((char *) strval); - data->type = PLIST_REAL; - data->length = 8; - xmlFree(strval); - continue; +static void find_str(parse_ctx ctx, const char *str, int skip_quotes) +{ + size_t len = strlen(str); + while (ctx->pos < (ctx->end - len)) { + if (!strncmp(ctx->pos, str, len)) { + break; } - - if (!xmlStrcmp(node->name, XPLIST_DATE)) - { - xmlChar *strval = xmlNodeGetContent(node); - Time64_T timev = 0; - if (strlen((const char*)strval) >= 11) { - struct TM btime; - parse_date((const char*)strval, &btime); - timev = timegm64(&btime); + if (skip_quotes && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - data->realval = (double)(timev - MAC_EPOCH); - data->type = PLIST_DATE; - data->length = sizeof(double); - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_STRING)) - { - xmlChar *strval = xmlNodeGetContent(node); - len = strlen((char *) strval); - type = xmlDetectCharEncoding(strval, len); - - if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) - { - data->strval = strdup((char *) strval); - data->type = PLIST_STRING; - data->length = strlen(data->strval); +static void find_next(parse_ctx ctx, const char *nextchars, int skip_quotes) +{ + int numchars = strlen(nextchars); + int i = 0; + while (ctx->pos < ctx->end) { + if (skip_quotes && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - xmlFree(strval); - continue; } - - if (!xmlStrcmp(node->name, XPLIST_KEY)) - { - xmlChar *strval = xmlNodeGetContent(node); - len = strlen((char *) strval); - type = xmlDetectCharEncoding(strval, len); - - if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) - { - data->strval = strdup((char *) strval); - data->type = PLIST_KEY; - data->length = strlen(data->strval); + for (i = 0; i < numchars; i++) { + if (*(ctx->pos) == nextchars[i]) { + return; } - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_DATA)) - { - xmlChar *strval = xmlNodeGetContent(node); - size_t size = 0; - unsigned char *dec = base64decode((char*)strval, &size); - data->buff = (uint8_t *) malloc(size * sizeof(uint8_t)); - memcpy(data->buff, dec, size * sizeof(uint8_t)); - free(dec); - data->length = size; - data->type = PLIST_DATA; - xmlFree(strval); - continue; - } +static char* get_text_content(parse_ctx ctx, const char* tag, int skip_ws, int unescape_entities) +{ + const char *p; + const char *q; + int taglen; + char *str; + int i = 0; - if (!xmlStrcmp(node->name, XPLIST_ARRAY)) - { - data->type = PLIST_ARRAY; - xml_to_node(node, &subnode); - continue; - } + if (skip_ws) { + parse_skip_ws(ctx); + } + p = ctx->pos; + find_char(ctx, '<', 1); + if (*ctx->pos != '<') { PLIST_XML_ERR("didn't find <\n"); return NULL; } + q = ctx->pos; + ctx->pos++; + if (ctx->pos >= ctx->end || *ctx->pos != '/') { PLIST_XML_ERR("EOF or empty tag while parsing '%s'\n",p); return NULL; } + ctx->pos++; + taglen = strlen(tag); + if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) { PLIST_XML_ERR("EOF or end tag mismatch\n"); return NULL;} + ctx->pos+=taglen; + if (ctx->pos >= ctx->end || *ctx->pos != '>') { PLIST_XML_ERR("EOF or no '>' after tag name\n"); return NULL;} + ctx->pos++; + int len = q - p; + if (len < 0) { + PLIST_XML_ERR("Couldn't find matching '%s' end tag\n", tag); + return NULL; + } + str = malloc(len+1); + strncpy(str, p, len); + str[len] = 0; - if (!xmlStrcmp(node->name, XPLIST_DICT)) - { - data->type = PLIST_DICT; - xml_to_node(node, &subnode); - if (plist_get_node_type(subnode) == PLIST_DICT) { - if (plist_dict_get_size(subnode) == 1) { - plist_t uid = plist_dict_get_item(subnode, "CF$UID"); - if (uid) { - uint64_t val = 0; - plist_get_uint_val(uid, &val); - plist_dict_remove_item(subnode, "CF$UID"); - plist_data_t nodedata = plist_get_data((node_t*)subnode); - free(nodedata->buff); - nodedata->type = PLIST_UID; - nodedata->length = sizeof(uint64_t); - nodedata->intval = val; - } + if (!unescape_entities) { + return str; + } + + /* unescape entities */ + while (i < len-1) { + if (str[i] == '&') { + char *entp = str + i + 1; + while (i < len && str[i] != ';') { + i++; + } + if (i >= len) { + break; + } + if (str+i > entp+1) { + int entlen = str+i - entp; + if (!strncmp(entp, "amp", 3)) { + /* the '&' is already there */ + } else if (!strncmp(entp, "apos", 4)) { + *(entp-1) = '\''; + } else if (!strncmp(entp, "quot", 4)) { + *(entp-1) = '"'; + } else if (!strncmp(entp, "lt", 2)) { + *(entp-1) = '<'; + } else if (!strncmp(entp, "gt", 2)) { + *(entp-1) = '>'; + } else { + /* unexpected entity, replace with ? */ + *(entp-1) = '?'; } + memmove(entp, str+i+1, len - i); + i -= entlen; + continue; } - continue; } + i++; } + + return str; } -PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length) +static void skip_text_content(parse_ctx ctx, const char* tag) { - xmlDocPtr plist_doc = NULL; - xmlNodePtr root_node = NULL; - struct xml_node root = { NULL, 0 }; - int size = 0; + int taglen; + find_char(ctx, '<', 1); + if (*ctx->pos != '<') return; + ctx->pos++; + taglen = strlen(tag); + if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) return; + ctx->pos+=taglen; + if (ctx->pos >= ctx->end || *ctx->pos != '>') return; + ctx->pos++; +} - if (!plist || !plist_xml || *plist_xml) - return; - plist_doc = new_xml_plist(); - root_node = xmlDocGetRootElement(plist_doc); - root.xml = root_node; +static void node_from_xml(parse_ctx ctx, plist_t *plist) +{ + char *keyname = NULL; + while (ctx->pos < ctx->end && !ctx->err) { +#ifdef DEBUG + const char *start = ctx->pos; +#endif + parse_skip_ws(ctx); + if (ctx->pos >= ctx->end) { + break; + } + if (*ctx->pos != '<') { + PLIST_XML_ERR("Failed to parse XML. Expected: opening tag, found: '%s', pos: %s\n", start, ctx->pos); + ctx->pos = ctx->end; + break; + } + ctx->pos++; + if (ctx->pos >= ctx->end) { + break; + } - node_to_xml(plist, &root); + if (*(ctx->pos) == '?') { + find_str(ctx, "?>", 1); + if (ctx->pos >= ctx->end) { + break; + } + if (strncmp(ctx->pos, "?>", 2)) { + PLIST_XML_ERR("Couldn't find <? tag closing marker\n"); + ctx->pos = ctx->end; + return; + } + ctx->pos += 2; + continue; + } else if (*(ctx->pos) == '!') { + /* comment or DTD */ + if (((ctx->end - ctx->pos) > 3) && !strncmp(ctx->pos, "!--", 3)) { + ctx->pos += 3; + find_str(ctx,"-->", 0); + if (strncmp(ctx->pos, "-->", 3)) { + PLIST_XML_ERR("Couldn't find end of comment\n"); + ctx->pos = ctx->end; + return; + } + ctx->pos+=3; + } else if (((ctx->end - ctx->pos) > 8) && !strncmp(ctx->pos, "!DOCTYPE", 8)) { + int embedded_dtd = 0; + ctx->pos+=8; + while (ctx->pos < ctx->end) { + find_next(ctx, " \t\r\n[>", 1); + if (*ctx->pos == '[') { + embedded_dtd = 1; + break; + } else if (*ctx->pos == '>') { + /* end of DOCTYPE found already */ + ctx->pos++; + break; + } else { + parse_skip_ws(ctx); + } + } + if (embedded_dtd) { + find_str(ctx, "]>", 1); + if (strncmp(ctx->pos, "]>", 2)) { + PLIST_XML_ERR("Couldn't find end of DOCTYPE\n"); + ctx->pos = ctx->end; + return; + } + ctx->pos += 2; + } + } else { + PLIST_XML_ERR("Unknown ! special tag encountered\n"); + ctx->err++; + } + continue; + } else { + int is_empty = 0; + int closing_tag = 0; + const char *p = ctx->pos; + find_next(ctx," \r\n\t<>",0); + if (ctx->pos >= ctx->end) { + PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); + ctx->pos = ctx->end; + ctx->err++; + free(keyname); + return; + } + int taglen = ctx->pos - p; + char *tag = malloc(taglen + 1); + strncpy(tag, p, taglen); + tag[taglen] = '\0'; + if (*ctx->pos != '>') { + find_next(ctx, "<>", 1); + } + if (ctx->pos >= ctx->end) { + PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (*ctx->pos != '>') { + PLIST_XML_ERR("Missing '>' for tag '%s'\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (*(ctx->pos-1) == '/') { + tag[ctx->pos - p - 1] = '\0'; + is_empty = 1; + } + ctx->pos++; + if (!strcmp(tag, "plist")) { + free(tag); + if (is_empty) { + return; + } + if (!*plist) { + /* only process first plist node found */ + node_from_xml(ctx, plist); + } + continue; + } - xmlChar* tmp = NULL; - xmlDocDumpMemory(plist_doc, &tmp, &size); - if (size >= 0 && tmp) - { - /* make sure to copy the terminating 0-byte */ - *plist_xml = (char*)malloc((size+1) * sizeof(char)); - memcpy(*plist_xml, tmp, size+1); - *length = size; - xmlFree(tmp); - tmp = NULL; + plist_data_t data = plist_new_plist_data(); + plist_t subnode = plist_new_node(data); + + if (!strcmp(tag, XPLIST_DICT)) { + data->type = PLIST_DICT; + } else if (!strcmp(tag, XPLIST_ARRAY)) { + data->type = PLIST_ARRAY; + } else if (!strcmp(tag, XPLIST_INT)) { + if (!is_empty) { + char *str_content = get_text_content(ctx, tag, 1, 0); + if (!str_content) { + PLIST_XML_ERR("Couldn't find end tag for '%s'\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + char *str = str_content; + int is_negative = 0; + if ((str[0] == '-') || (str[0] == '+')) { + if (str[0] == '-') { + is_negative = 1; + } + str++; + } + char* endp = NULL; + data->intval = strtoull((char*)str, &endp, 0); + if ((endp != NULL) && (strlen(endp) > 0)) { + PLIST_XML_ERR("integer parse error: string contains invalid characters: '%s'\n", endp); + } + if (is_negative || (data->intval <= INT64_MAX)) { + int64_t v = data->intval; + if (is_negative) { + v = -v; + } + data->intval = (uint64_t)v; + data->length = 8; + } else { + data->length = 16; + } + free(str_content); + } else { + data->intval = 0; + data->length = 8; + } + data->type = PLIST_UINT; + } else if (!strcmp(tag, XPLIST_REAL)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + data->realval = atof((char *) strval); + free(strval); + } + data->type = PLIST_REAL; + data->length = 8; + } else if (!strcmp(tag, XPLIST_TRUE)) { + if (!is_empty) { + skip_text_content(ctx, tag); + } + data->type = PLIST_BOOLEAN; + data->boolval = 1; + data->length = 1; + } else if (!strcmp(tag, XPLIST_FALSE)) { + if (!is_empty) { + skip_text_content(ctx, tag); + } + data->type = PLIST_BOOLEAN; + data->boolval = 0; + data->length = 1; + } else if (!strcmp(tag, XPLIST_STRING) || !strcmp(tag, XPLIST_KEY)) { + if (!is_empty) { + char *str = get_text_content(ctx, tag, 0, 1); + if (!str) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (!strcmp(tag, "key") && !keyname && *plist && (plist_get_node_type(*plist) == PLIST_DICT)) { + keyname = str; + free(tag); + plist_free(subnode); + subnode = NULL; + ctx->pos++; + continue; + } else { + data->strval = str; + data->length = strlen(str); + } + } else { + data->strval = strdup(""); + data->length = 0; + } + data->type = PLIST_STRING; + } else if (!strcmp(tag, XPLIST_DATA)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + if (!strval) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + size_t size = 0; + data->buff = base64decode((char*)strval, &size); + free(strval); + data->length = size; + } + data->type = PLIST_DATA; + } else if (!strcmp(tag, XPLIST_DATE)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + if (!strval) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + Time64_T timev = 0; + if (strlen((const char*)strval) >= 11) { + struct TM btime; + parse_date((const char*)strval, &btime); + timev = timegm64(&btime); + } + data->realval = (double)(timev - MAC_EPOCH); + free(strval); + } + data->length = sizeof(double); + data->type = PLIST_DATE; + } else if (tag[0] == '/') { + closing_tag = 1; + } else { + PLIST_XML_ERR("Unexpected tag '<%s%s>' encountered while parsing XML\n", tag, (is_empty) ? "/" : ""); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + free(tag); + if (subnode && !closing_tag) { + /* parse sub nodes for structured types */ + if (data->type == PLIST_DICT || data->type == PLIST_ARRAY) { + if (!is_empty) { + /* only if not empty */ + node_from_xml(ctx, &subnode); + if ((data->type == PLIST_DICT) && (plist_dict_get_size(subnode) == 1)) { + /* convert XML CF$UID dictionaries to PLIST_UID nodes */ + plist_t uid = plist_dict_get_item(subnode, "CF$UID"); + if (uid) { + uint64_t val = 0; + plist_get_uint_val(uid, &val); + plist_dict_remove_item(subnode, "CF$UID"); + plist_data_t nodedata = plist_get_data((node_t*)subnode); + free(nodedata->buff); + nodedata->type = PLIST_UID; + nodedata->length = sizeof(uint64_t); + nodedata->intval = val; + } + } + } + } + if (!*plist) { + /* no parent? make this node the new parent node */ + *plist = subnode; + subnode = NULL; + } else { + switch (plist_get_node_type(*plist)) { + case PLIST_DICT: + if (!keyname || *keyname == '\0') { + PLIST_XML_ERR("missing or empty key name while adding dict item\n"); + ctx->err++; + break; + } + plist_dict_set_item(*plist, keyname, subnode); + subnode = NULL; + break; + case PLIST_ARRAY: + plist_array_append_item(*plist, subnode); + subnode = NULL; + break; + default: + /* should not happen */ + PLIST_XML_ERR("while parsing XML plist: parent is not a structered node.\n"); + ctx->err++; + break; + } + } + } else if (closing_tag && *plist && plist_get_node_type(*plist) == PLIST_DICT && keyname) { + PLIST_XML_ERR("missing value node in dict\n"); + ctx->err++; + } + free(keyname); + keyname = NULL; + plist_free(subnode); + if (closing_tag) { + break; + } + } + ctx->pos++; + } + if (ctx->err) { + plist_free(*plist); + *plist = NULL; } - xmlFreeDoc(plist_doc); -} - -static xmlParserInputPtr plist_xml_external_entity_loader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt) -{ - return NULL; } PLIST_API void plist_from_xml(const char *plist_xml, uint32_t length, plist_t * plist) { - /* CVE-2013-0339: disable external entity loading to prevent XXE vulnerability */ - xmlSetExternalEntityLoader(plist_xml_external_entity_loader); + if (!plist_xml || (length == 0)) { + *plist = NULL; + return; + } - /* read XML from memory and disable network access for security reasons */ - xmlDocPtr plist_doc = xmlReadMemory(plist_xml, length, "plist_from_xml:memory", NULL, XML_PARSE_NONET); - if (plist_doc) { - xmlNodePtr root_node = xmlDocGetRootElement(plist_doc); + struct _parse_ctx ctx = { plist_xml, plist_xml + length, 0 }; - xml_to_node(root_node, plist); - xmlFreeDoc(plist_doc); - } + node_from_xml(&ctx, plist); } |