summaryrefslogtreecommitdiffstats
path: root/src/xplist.c
diff options
context:
space:
mode:
authorGravatar Nikias Bassen2016-10-22 04:39:47 +0200
committerGravatar Nikias Bassen2016-10-22 04:39:47 +0200
commit392135c7db4d9cb4a14ff5935d7c4c6e21363847 (patch)
tree0e19125ed99b6b2ced754d1b9b3f4bc5245f8c39 /src/xplist.c
parenta3263ad344ff315ac1cba96f0b84b9afff6da787 (diff)
downloadlibplist-392135c7db4d9cb4a14ff5935d7c4c6e21363847.tar.gz
libplist-392135c7db4d9cb4a14ff5935d7c4c6e21363847.tar.bz2
Remove libxml2 dependency in favor of custom XML parsing
Diffstat (limited to 'src/xplist.c')
-rw-r--r--src/xplist.c1002
1 files changed, 652 insertions, 350 deletions
diff --git a/src/xplist.c b/src/xplist.c
index fc665ac..87b21bb 100644
--- a/src/xplist.c
+++ b/src/xplist.c
@@ -2,8 +2,8 @@
* xplist.c
* XML plist implementation
*
+ * Copyright (c) 2010-2016 Nikias Bassen All Rights Reserved.
* Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved.
- * Copyright (c) 2010-2014 Nikias Bassen All Rights Reserved.
* Copyright (c) 2008 Jonathan Beck All Rights Reserved.
*
* This library is free software; you can redistribute it and/or
@@ -21,6 +21,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <string.h>
#include <assert.h>
@@ -31,121 +34,55 @@
#include <inttypes.h>
#include <math.h>
-#include <libxml/xmlIO.h>
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-
#include <node.h>
#include <node_list.h>
#include <node_iterator.h>
#include "plist.h"
#include "base64.h"
+#include "strbuf.h"
#include "time64.h"
-#define XPLIST_TEXT BAD_CAST("text")
-#define XPLIST_KEY BAD_CAST("key")
-#define XPLIST_FALSE BAD_CAST("false")
-#define XPLIST_TRUE BAD_CAST("true")
-#define XPLIST_INT BAD_CAST("integer")
-#define XPLIST_REAL BAD_CAST("real")
-#define XPLIST_DATE BAD_CAST("date")
-#define XPLIST_DATA BAD_CAST("data")
-#define XPLIST_STRING BAD_CAST("string")
-#define XPLIST_ARRAY BAD_CAST("array")
-#define XPLIST_DICT BAD_CAST("dict")
+#define XPLIST_TEXT "text"
+#define XPLIST_KEY "key"
+#define XPLIST_FALSE "false"
+#define XPLIST_TRUE "true"
+#define XPLIST_INT "integer"
+#define XPLIST_REAL "real"
+#define XPLIST_DATE "date"
+#define XPLIST_DATA "data"
+#define XPLIST_STRING "string"
+#define XPLIST_ARRAY "array"
+#define XPLIST_DICT "dict"
#define MAC_EPOCH 978307200
-static const char *plist_base = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
+static const char XML_PLIST_PROLOG[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n\
-<plist version=\"1.0\">\n\
-</plist>\0";
+<plist version=\"1.0\">\n";
+static const char XML_PLIST_EPILOG[] = "</plist>\n";
+#ifdef DEBUG
+static int plist_xml_debug = 0;
+#define PLIST_XML_ERR(...) if (plist_xml_debug) { fprintf(stderr, "libplist[xmlparser] ERROR: " __VA_ARGS__); }
+#else
+#define PLIST_XML_ERR(...)
+#endif
-/** Formats a block of text to be a given indentation and width.
- *
- * The total width of the return string will be depth + cols.
- *
- * @param buf The string to format.
- * @param cols The number of text columns for returned block of text.
- * @param depth The number of tabs to indent the returned block of text.
- *
- * @return The formatted string.
- */
-static char *format_string(const char *buf, size_t len, int cols, int depth)
+void plist_xml_init(void)
{
- if (!buf || !(len > 0)) return NULL;
- int colw = depth + cols + 1;
- int nlines = len / cols + 1;
- char *new_buf = NULL;
- int i = 0;
- int j = 0;
-
- assert(cols >= 0);
- assert(depth >= 0);
-
- new_buf = (char*) malloc(nlines * colw + depth + 1);
- assert(new_buf != 0);
- memset(new_buf, 0, nlines * colw + depth + 1);
-
- // Inserts new lines and tabs at appropriate locations
- for (i = 0; i < nlines; i++)
- {
- new_buf[i * colw] = '\n';
- for (j = 0; j < depth; j++)
- new_buf[i * colw + 1 + j] = '\t';
- memcpy(new_buf + i * colw + 1 + depth, buf + i * cols, (size_t)(i + 1) * cols <= len ? (size_t)cols : len - i * cols);
+ /* init XML stuff */
+#ifdef DEBUG
+ char *env_debug = getenv("PLIST_XML_DEBUG");
+ if (env_debug && !strcmp(env_debug, "1")) {
+ plist_xml_debug = 1;
}
- new_buf[len + (1 + depth) * nlines] = '\n';
-
- // Inserts final row of indentation and termination character
- for (j = 0; j < depth; j++)
- new_buf[len + (1 + depth) * nlines + 1 + j] = '\t';
- new_buf[len + (1 + depth) * nlines + depth + 1] = '\0';
-
- return new_buf;
-}
-
-
-
-struct xml_node
-{
- xmlNodePtr xml;
- uint32_t depth;
-};
-
-/** Creates a new plist XML document.
- *
- * @return The plist XML document.
- */
-static xmlDocPtr new_xml_plist(void)
-{
- char *plist = strdup(plist_base);
- xmlDocPtr plist_xml = xmlParseMemory(plist, strlen(plist));
-
- free(plist);
-
- return plist_xml;
-}
-
-static struct node_t* new_key_node(const char* name)
-{
- plist_data_t data = plist_new_plist_data();
- data->type = PLIST_KEY;
- int size = strlen(name);
- data->strval = strdup(name);
- data->length = size;
- return node_create(NULL, data);
+#endif
}
-static struct node_t* new_uint_node(uint64_t value)
+void plist_xml_deinit(void)
{
- plist_data_t data = plist_new_plist_data();
- data->type = PLIST_UINT;
- data->intval = value;
- data->length = sizeof(uint64_t);
- return node_create(NULL, data);
+ /* deinit XML stuff */
}
static void dtostr(char *buf, size_t bufsize, double realval)
@@ -178,27 +115,21 @@ static void dtostr(char *buf, size_t bufsize, double realval)
buf[p] = '\0';
}
-static void node_to_xml(node_t* node, void *xml_struct)
+static void node_to_xml(node_t* node, bytearray_t **outbuf, uint32_t depth)
{
- struct xml_node *xstruct = NULL;
plist_data_t node_data = NULL;
- xmlNodePtr child_node = NULL;
char isStruct = FALSE;
- char isUIDNode = FALSE;
+ char tagOpen = FALSE;
- const xmlChar *tag = NULL;
+ const char *tag = NULL;
char *val = NULL;
- //for base64
- char *valtmp = NULL;
-
uint32_t i = 0;
if (!node)
return;
- xstruct = (struct xml_node *) xml_struct;
node_data = plist_get_data(node);
switch (node_data->type)
@@ -230,23 +161,17 @@ static void node_to_xml(node_t* node, void *xml_struct)
case PLIST_STRING:
tag = XPLIST_STRING;
- val = strdup((char*) node_data->strval);
+ /* contents processed directly below */
break;
case PLIST_KEY:
tag = XPLIST_KEY;
- val = strdup((char*) node_data->strval);
+ /* contents processed directly below */
break;
case PLIST_DATA:
tag = XPLIST_DATA;
- if (node_data->length)
- {
- size_t len = node_data->length;
- valtmp = base64encode(node_data->buff, &len);
- val = format_string(valtmp, len, 68, xstruct->depth);
- free(valtmp);
- }
+ /* contents processed directly below */
break;
case PLIST_ARRAY:
tag = XPLIST_ARRAY;
@@ -275,77 +200,161 @@ static void node_to_xml(node_t* node, void *xml_struct)
}
break;
case PLIST_UID:
- // special case for keyed encoding
tag = XPLIST_DICT;
- isStruct = TRUE;
- isUIDNode = TRUE;
- node_data->type = PLIST_DICT;
- node_attach(node, new_key_node("CF$UID"));
- node_attach(node, new_uint_node(node_data->intval));
+ val = (char*)malloc(64);
+ if (node_data->length == 16) {
+ (void)snprintf(val, 64, "%"PRIu64, node_data->intval);
+ } else {
+ (void)snprintf(val, 64, "%"PRIi64, node_data->intval);
+ }
break;
default:
break;
}
- for (i = 0; i < xstruct->depth; i++)
- {
- xmlNodeAddContent(xstruct->xml, BAD_CAST("\t"));
+ for (i = 0; i < depth; i++) {
+ str_buf_append(*outbuf, "\t", 1);
}
+
+ /* append tag */
+ str_buf_append(*outbuf, "<", 1);
+ str_buf_append(*outbuf, tag, strlen(tag));
if (node_data->type == PLIST_STRING || node_data->type == PLIST_KEY) {
+ size_t j;
+ size_t len;
+ off_t start = 0;
+ off_t cur = 0;
+
+ str_buf_append(*outbuf, ">", 1);
+ tagOpen = TRUE;
+
/* make sure we convert the following predefined xml entities */
/* < = &lt; > = &gt; ' = &apos; " = &quot; & = &amp; */
- child_node = xmlNewTextChild(xstruct->xml, NULL, tag, BAD_CAST(val));
- } else
- child_node = xmlNewChild(xstruct->xml, NULL, tag, BAD_CAST(val));
- xmlNodeAddContent(xstruct->xml, BAD_CAST("\n"));
- if (val) {
- free(val);
- }
+ len = strlen(node_data->strval);
+ for (j = 0; j < len; j++) {
+ switch (node_data->strval[j]) {
+ case '<':
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ str_buf_append(*outbuf, "&lt;", 4);
+ start = cur+1;
+ break;
+ case '>':
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ str_buf_append(*outbuf, "&gt;", 4);
+ start = cur+1;
+ break;
+ case '\'':
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ str_buf_append(*outbuf, "&apos;", 6);
+ start = cur+1;
+ break;
+ case '"':
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ str_buf_append(*outbuf, "&quot;", 6);
+ start = cur+1;
+ break;
+ case '&':
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ str_buf_append(*outbuf, "&amp;", 5);
+ start = cur+1;
+ break;
+ default:
+ break;
+ }
+ cur++;
+ }
+ str_buf_append(*outbuf, node_data->strval + start, cur - start);
+ } else if (node_data->type == PLIST_DATA) {
+ str_buf_append(*outbuf, ">", 1);
+ tagOpen = TRUE;
+ str_buf_append(*outbuf, "\n", 1);
+ if (node_data->length > 0) {
+ char *buf = malloc(80);
+ uint32_t j = 0;
+ uint32_t indent = (depth > 8) ? 8 : depth;
+ uint32_t maxread = ((76 - indent*8) / 4) * 3;
+ size_t count = 0;
+ size_t b64count = 0;
+ while (j < node_data->length) {
+ for (i = 0; i < indent; i++) {
+ str_buf_append(*outbuf, "\t", 1);
+ }
+ count = (node_data->length-j < maxread) ? node_data->length-j : maxread;
+ b64count = base64encode(buf, node_data->buff + j, count);
+ str_buf_append(*outbuf, buf, b64count);
+ str_buf_append(*outbuf, "\n", 1);
+ j+=count;
+ }
+ free(buf);
+ }
+ for (i = 0; i < depth; i++) {
+ str_buf_append(*outbuf, "\t", 1);
+ }
+ } else if (node_data->type == PLIST_UID) {
+ /* special case for UID nodes: create a DICT */
+ str_buf_append(*outbuf, ">", 1);
+ tagOpen = TRUE;
+ str_buf_append(*outbuf, "\n", 1);
+
+ /* add CF$UID key */
+ for (i = 0; i < depth+1; i++) {
+ str_buf_append(*outbuf, "\t", 1);
+ }
+ str_buf_append(*outbuf, "<key>CF$UID</key>", 17);
+ str_buf_append(*outbuf, "\n", 1);
- //add return for structured types
- if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT)
- xmlNodeAddContent(child_node, BAD_CAST("\n"));
+ /* add UID value */
+ for (i = 0; i < depth+1; i++) {
+ str_buf_append(*outbuf, "\t", 1);
+ }
+ str_buf_append(*outbuf, "<integer>", 9);
+ str_buf_append(*outbuf, val, strlen(val));
+ str_buf_append(*outbuf, "</integer>", 10);
+ str_buf_append(*outbuf, "\n", 1);
- //make sure we don't produce <data/> if it's empty
- if ((node_data->type == PLIST_DATA) && !val) {
- xmlNodeAddContent(child_node, BAD_CAST("\n"));
- for (i = 0; i < xstruct->depth; i++)
- {
- xmlNodeAddContent(child_node, BAD_CAST("\t"));
+ for (i = 0; i < depth; i++) {
+ str_buf_append(*outbuf, "\t", 1);
}
+ } else if (val) {
+ str_buf_append(*outbuf, ">", 1);
+ tagOpen = TRUE;
+ str_buf_append(*outbuf, val, strlen(val));
+ } else if (isStruct) {
+ tagOpen = TRUE;
+ str_buf_append(*outbuf, ">", 1);
+ } else {
+ tagOpen = FALSE;
+ str_buf_append(*outbuf, "/>", 2);
}
+ free(val);
- if (isStruct)
- {
- struct xml_node child = { child_node, xstruct->depth + 1 };
+ /* add return for structured types */
+ if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT)
+ str_buf_append(*outbuf, "\n", 1);
+
+ if (isStruct) {
node_iterator_t *ni = node_iterator_create(node->children);
node_t *ch;
while ((ch = node_iterator_next(ni))) {
- node_to_xml(ch, &child);
+ node_to_xml(ch, outbuf, depth+1);
}
node_iterator_destroy(ni);
}
- //fix indent for structured types
- if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT)
- {
- for (i = 0; i < xstruct->depth; i++)
- {
- xmlNodeAddContent(child_node, BAD_CAST("\t"));
+ /* fix indent for structured types */
+ if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) {
+ for (i = 0; i < depth; i++) {
+ str_buf_append(*outbuf, "\t", 1);
}
}
- if (isUIDNode)
- {
- unsigned int num = node_n_children(node);
- unsigned int j;
- for (j = num; j > 0; j--) {
- node_t* ch = node_nth_child(node, j-1);
- node_detach(node, ch);
- plist_free_data((plist_data_t)((node_t*)ch)->data);
- node_destroy(ch);
- }
- node_data->type = PLIST_UID;
+
+ if (tagOpen) {
+ /* add closing tag */
+ str_buf_append(*outbuf, "</", 2);
+ str_buf_append(*outbuf, tag, strlen(tag));
+ str_buf_append(*outbuf, ">", 1);
}
+ str_buf_append(*outbuf, "\n", 1);
return;
}
@@ -370,233 +379,526 @@ static void parse_date(const char *strval, struct TM *btime)
btime->tm_isdst=0;
}
-static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node)
+PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length)
{
- xmlNodePtr node = NULL;
- plist_data_t data = NULL;
- plist_t subnode = NULL;
+ strbuf_t *outbuf = str_buf_new();
- //for string
- long len = 0;
- int type = 0;
+ str_buf_append(outbuf, XML_PLIST_PROLOG, sizeof(XML_PLIST_PROLOG)-1);
- if (!xml_node)
- return;
+ node_to_xml(plist, &outbuf, 0);
- for (node = xml_node->children; node; node = node->next)
- {
-
- while (node && !xmlStrcmp(node->name, XPLIST_TEXT))
- node = node->next;
- if (!node)
- break;
+ str_buf_append(outbuf, XML_PLIST_EPILOG, sizeof(XML_PLIST_EPILOG));
- if (!xmlStrcmp(node->name, BAD_CAST("comment"))) {
- continue;
- }
+ *plist_xml = outbuf->data;
+ *length = outbuf->len - 1;
- data = plist_new_plist_data();
- subnode = plist_new_node(data);
- if (*plist_node)
- node_attach(*plist_node, subnode);
- else
- *plist_node = subnode;
+ outbuf->data = NULL;
+ str_buf_free(outbuf);
+}
- if (!xmlStrcmp(node->name, XPLIST_TRUE))
- {
- data->boolval = TRUE;
- data->type = PLIST_BOOLEAN;
- data->length = 1;
- continue;
- }
+struct _parse_ctx {
+ const char *pos;
+ const char *end;
+ int err;
+};
+typedef struct _parse_ctx* parse_ctx;
- if (!xmlStrcmp(node->name, XPLIST_FALSE))
- {
- data->boolval = FALSE;
- data->type = PLIST_BOOLEAN;
- data->length = 1;
- continue;
- }
+static void parse_skip_ws(parse_ctx ctx)
+{
+ while (ctx->pos < ctx->end && ((*(ctx->pos) == ' ') || (*(ctx->pos) == '\t') || (*(ctx->pos) == '\r') || (*(ctx->pos) == '\n'))) {
+ ctx->pos++;
+ }
+}
- if (!xmlStrcmp(node->name, XPLIST_INT))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- int is_negative = 0;
- char *str = (char*)strval;
- if ((str[0] == '-') || (str[0] == '+')) {
- if (str[0] == '-') {
- is_negative = 1;
- }
- str++;
- }
- char* endp = NULL;
- data->intval = strtoull((char*)str, &endp, 0);
- if ((endp != NULL) && (strlen(endp) > 0)) {
- fprintf(stderr, "%s: integer parse error: string contains invalid characters: '%s'\n", __func__, endp);
- }
- if (is_negative || (data->intval <= INT64_MAX)) {
- int64_t v = data->intval;
- if (is_negative) {
- v = -v;
- }
- data->intval = (uint64_t)v;
- data->length = 8;
- } else {
- data->length = 16;
+static void find_char(parse_ctx ctx, char c, int skip_quotes)
+{
+ while (ctx->pos < ctx->end && (*(ctx->pos) != c)) {
+ if (skip_quotes && (c != '"') && (*(ctx->pos) == '"')) {
+ ctx->pos++;
+ find_char(ctx, '"', 0);
+ if (*(ctx->pos) != '"') {
+ PLIST_XML_ERR("Unmatched double quote\n");
+ return;
}
- data->type = PLIST_UINT;
- xmlFree(strval);
- continue;
}
+ ctx->pos++;
+ }
+}
- if (!xmlStrcmp(node->name, XPLIST_REAL))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- data->realval = atof((char *) strval);
- data->type = PLIST_REAL;
- data->length = 8;
- xmlFree(strval);
- continue;
+static void find_str(parse_ctx ctx, const char *str, int skip_quotes)
+{
+ size_t len = strlen(str);
+ while (ctx->pos < (ctx->end - len)) {
+ if (!strncmp(ctx->pos, str, len)) {
+ break;
}
-
- if (!xmlStrcmp(node->name, XPLIST_DATE))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- Time64_T timev = 0;
- if (strlen((const char*)strval) >= 11) {
- struct TM btime;
- parse_date((const char*)strval, &btime);
- timev = timegm64(&btime);
+ if (skip_quotes && (*(ctx->pos) == '"')) {
+ ctx->pos++;
+ find_char(ctx, '"', 0);
+ if (*(ctx->pos) != '"') {
+ PLIST_XML_ERR("Unmatched double quote\n");
+ return;
}
- data->realval = (double)(timev - MAC_EPOCH);
- data->type = PLIST_DATE;
- data->length = sizeof(double);
- xmlFree(strval);
- continue;
}
+ ctx->pos++;
+ }
+}
- if (!xmlStrcmp(node->name, XPLIST_STRING))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- len = strlen((char *) strval);
- type = xmlDetectCharEncoding(strval, len);
-
- if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type)
- {
- data->strval = strdup((char *) strval);
- data->type = PLIST_STRING;
- data->length = strlen(data->strval);
+static void find_next(parse_ctx ctx, const char *nextchars, int skip_quotes)
+{
+ int numchars = strlen(nextchars);
+ int i = 0;
+ while (ctx->pos < ctx->end) {
+ if (skip_quotes && (*(ctx->pos) == '"')) {
+ ctx->pos++;
+ find_char(ctx, '"', 0);
+ if (*(ctx->pos) != '"') {
+ PLIST_XML_ERR("Unmatched double quote\n");
+ return;
}
- xmlFree(strval);
- continue;
}
-
- if (!xmlStrcmp(node->name, XPLIST_KEY))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- len = strlen((char *) strval);
- type = xmlDetectCharEncoding(strval, len);
-
- if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type)
- {
- data->strval = strdup((char *) strval);
- data->type = PLIST_KEY;
- data->length = strlen(data->strval);
+ for (i = 0; i < numchars; i++) {
+ if (*(ctx->pos) == nextchars[i]) {
+ return;
}
- xmlFree(strval);
- continue;
}
+ ctx->pos++;
+ }
+}
- if (!xmlStrcmp(node->name, XPLIST_DATA))
- {
- xmlChar *strval = xmlNodeGetContent(node);
- size_t size = 0;
- unsigned char *dec = base64decode((char*)strval, &size);
- data->buff = (uint8_t *) malloc(size * sizeof(uint8_t));
- memcpy(data->buff, dec, size * sizeof(uint8_t));
- free(dec);
- data->length = size;
- data->type = PLIST_DATA;
- xmlFree(strval);
- continue;
- }
+static char* get_text_content(parse_ctx ctx, const char* tag, int skip_ws, int unescape_entities)
+{
+ const char *p;
+ const char *q;
+ int taglen;
+ char *str;
+ int i = 0;
- if (!xmlStrcmp(node->name, XPLIST_ARRAY))
- {
- data->type = PLIST_ARRAY;
- xml_to_node(node, &subnode);
- continue;
- }
+ if (skip_ws) {
+ parse_skip_ws(ctx);
+ }
+ p = ctx->pos;
+ find_char(ctx, '<', 1);
+ if (*ctx->pos != '<') { PLIST_XML_ERR("didn't find <\n"); return NULL; }
+ q = ctx->pos;
+ ctx->pos++;
+ if (ctx->pos >= ctx->end || *ctx->pos != '/') { PLIST_XML_ERR("EOF or empty tag while parsing '%s'\n",p); return NULL; }
+ ctx->pos++;
+ taglen = strlen(tag);
+ if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) { PLIST_XML_ERR("EOF or end tag mismatch\n"); return NULL;}
+ ctx->pos+=taglen;
+ if (ctx->pos >= ctx->end || *ctx->pos != '>') { PLIST_XML_ERR("EOF or no '>' after tag name\n"); return NULL;}
+ ctx->pos++;
+ int len = q - p;
+ if (len < 0) {
+ PLIST_XML_ERR("Couldn't find matching '%s' end tag\n", tag);
+ return NULL;
+ }
+ str = malloc(len+1);
+ strncpy(str, p, len);
+ str[len] = 0;
- if (!xmlStrcmp(node->name, XPLIST_DICT))
- {
- data->type = PLIST_DICT;
- xml_to_node(node, &subnode);
- if (plist_get_node_type(subnode) == PLIST_DICT) {
- if (plist_dict_get_size(subnode) == 1) {
- plist_t uid = plist_dict_get_item(subnode, "CF$UID");
- if (uid) {
- uint64_t val = 0;
- plist_get_uint_val(uid, &val);
- plist_dict_remove_item(subnode, "CF$UID");
- plist_data_t nodedata = plist_get_data((node_t*)subnode);
- free(nodedata->buff);
- nodedata->type = PLIST_UID;
- nodedata->length = sizeof(uint64_t);
- nodedata->intval = val;
- }
+ if (!unescape_entities) {
+ return str;
+ }
+
+ /* unescape entities */
+ while (i < len-1) {
+ if (str[i] == '&') {
+ char *entp = str + i + 1;
+ while (i < len && str[i] != ';') {
+ i++;
+ }
+ if (i >= len) {
+ break;
+ }
+ if (str+i > entp+1) {
+ int entlen = str+i - entp;
+ if (!strncmp(entp, "amp", 3)) {
+ /* the '&' is already there */
+ } else if (!strncmp(entp, "apos", 4)) {
+ *(entp-1) = '\'';
+ } else if (!strncmp(entp, "quot", 4)) {
+ *(entp-1) = '"';
+ } else if (!strncmp(entp, "lt", 2)) {
+ *(entp-1) = '<';
+ } else if (!strncmp(entp, "gt", 2)) {
+ *(entp-1) = '>';
+ } else {
+ /* unexpected entity, replace with ? */
+ *(entp-1) = '?';
}
+ memmove(entp, str+i+1, len - i);
+ i -= entlen;
+ continue;
}
- continue;
}
+ i++;
}
+
+ return str;
}
-PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length)
+static void skip_text_content(parse_ctx ctx, const char* tag)
{
- xmlDocPtr plist_doc = NULL;
- xmlNodePtr root_node = NULL;
- struct xml_node root = { NULL, 0 };
- int size = 0;
+ int taglen;
+ find_char(ctx, '<', 1);
+ if (*ctx->pos != '<') return;
+ ctx->pos++;
+ taglen = strlen(tag);
+ if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) return;
+ ctx->pos+=taglen;
+ if (ctx->pos >= ctx->end || *ctx->pos != '>') return;
+ ctx->pos++;
+}
- if (!plist || !plist_xml || *plist_xml)
- return;
- plist_doc = new_xml_plist();
- root_node = xmlDocGetRootElement(plist_doc);
- root.xml = root_node;
+static void node_from_xml(parse_ctx ctx, plist_t *plist)
+{
+ char *keyname = NULL;
+ while (ctx->pos < ctx->end && !ctx->err) {
+#ifdef DEBUG
+ const char *start = ctx->pos;
+#endif
+ parse_skip_ws(ctx);
+ if (ctx->pos >= ctx->end) {
+ break;
+ }
+ if (*ctx->pos != '<') {
+ PLIST_XML_ERR("Failed to parse XML. Expected: opening tag, found: '%s', pos: %s\n", start, ctx->pos);
+ ctx->pos = ctx->end;
+ break;
+ }
+ ctx->pos++;
+ if (ctx->pos >= ctx->end) {
+ break;
+ }
- node_to_xml(plist, &root);
+ if (*(ctx->pos) == '?') {
+ find_str(ctx, "?>", 1);
+ if (ctx->pos >= ctx->end) {
+ break;
+ }
+ if (strncmp(ctx->pos, "?>", 2)) {
+ PLIST_XML_ERR("Couldn't find <? tag closing marker\n");
+ ctx->pos = ctx->end;
+ return;
+ }
+ ctx->pos += 2;
+ continue;
+ } else if (*(ctx->pos) == '!') {
+ /* comment or DTD */
+ if (((ctx->end - ctx->pos) > 3) && !strncmp(ctx->pos, "!--", 3)) {
+ ctx->pos += 3;
+ find_str(ctx,"-->", 0);
+ if (strncmp(ctx->pos, "-->", 3)) {
+ PLIST_XML_ERR("Couldn't find end of comment\n");
+ ctx->pos = ctx->end;
+ return;
+ }
+ ctx->pos+=3;
+ } else if (((ctx->end - ctx->pos) > 8) && !strncmp(ctx->pos, "!DOCTYPE", 8)) {
+ int embedded_dtd = 0;
+ ctx->pos+=8;
+ while (ctx->pos < ctx->end) {
+ find_next(ctx, " \t\r\n[>", 1);
+ if (*ctx->pos == '[') {
+ embedded_dtd = 1;
+ break;
+ } else if (*ctx->pos == '>') {
+ /* end of DOCTYPE found already */
+ ctx->pos++;
+ break;
+ } else {
+ parse_skip_ws(ctx);
+ }
+ }
+ if (embedded_dtd) {
+ find_str(ctx, "]>", 1);
+ if (strncmp(ctx->pos, "]>", 2)) {
+ PLIST_XML_ERR("Couldn't find end of DOCTYPE\n");
+ ctx->pos = ctx->end;
+ return;
+ }
+ ctx->pos += 2;
+ }
+ } else {
+ PLIST_XML_ERR("Unknown ! special tag encountered\n");
+ ctx->err++;
+ }
+ continue;
+ } else {
+ int is_empty = 0;
+ int closing_tag = 0;
+ const char *p = ctx->pos;
+ find_next(ctx," \r\n\t<>",0);
+ if (ctx->pos >= ctx->end) {
+ PLIST_XML_ERR("Unexpected EOF while parsing XML\n");
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(keyname);
+ return;
+ }
+ int taglen = ctx->pos - p;
+ char *tag = malloc(taglen + 1);
+ strncpy(tag, p, taglen);
+ tag[taglen] = '\0';
+ if (*ctx->pos != '>') {
+ find_next(ctx, "<>", 1);
+ }
+ if (ctx->pos >= ctx->end) {
+ PLIST_XML_ERR("Unexpected EOF while parsing XML\n");
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ if (*ctx->pos != '>') {
+ PLIST_XML_ERR("Missing '>' for tag '%s'\n", tag);
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ if (*(ctx->pos-1) == '/') {
+ tag[ctx->pos - p - 1] = '\0';
+ is_empty = 1;
+ }
+ ctx->pos++;
+ if (!strcmp(tag, "plist")) {
+ free(tag);
+ if (is_empty) {
+ return;
+ }
+ if (!*plist) {
+ /* only process first plist node found */
+ node_from_xml(ctx, plist);
+ }
+ continue;
+ }
- xmlChar* tmp = NULL;
- xmlDocDumpMemory(plist_doc, &tmp, &size);
- if (size >= 0 && tmp)
- {
- /* make sure to copy the terminating 0-byte */
- *plist_xml = (char*)malloc((size+1) * sizeof(char));
- memcpy(*plist_xml, tmp, size+1);
- *length = size;
- xmlFree(tmp);
- tmp = NULL;
+ plist_data_t data = plist_new_plist_data();
+ plist_t subnode = plist_new_node(data);
+
+ if (!strcmp(tag, XPLIST_DICT)) {
+ data->type = PLIST_DICT;
+ } else if (!strcmp(tag, XPLIST_ARRAY)) {
+ data->type = PLIST_ARRAY;
+ } else if (!strcmp(tag, XPLIST_INT)) {
+ if (!is_empty) {
+ char *str_content = get_text_content(ctx, tag, 1, 0);
+ if (!str_content) {
+ PLIST_XML_ERR("Couldn't find end tag for '%s'\n", tag);
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ char *str = str_content;
+ int is_negative = 0;
+ if ((str[0] == '-') || (str[0] == '+')) {
+ if (str[0] == '-') {
+ is_negative = 1;
+ }
+ str++;
+ }
+ char* endp = NULL;
+ data->intval = strtoull((char*)str, &endp, 0);
+ if ((endp != NULL) && (strlen(endp) > 0)) {
+ PLIST_XML_ERR("integer parse error: string contains invalid characters: '%s'\n", endp);
+ }
+ if (is_negative || (data->intval <= INT64_MAX)) {
+ int64_t v = data->intval;
+ if (is_negative) {
+ v = -v;
+ }
+ data->intval = (uint64_t)v;
+ data->length = 8;
+ } else {
+ data->length = 16;
+ }
+ free(str_content);
+ } else {
+ data->intval = 0;
+ data->length = 8;
+ }
+ data->type = PLIST_UINT;
+ } else if (!strcmp(tag, XPLIST_REAL)) {
+ if (!is_empty) {
+ char *strval = get_text_content(ctx, tag, 1, 0);
+ data->realval = atof((char *) strval);
+ free(strval);
+ }
+ data->type = PLIST_REAL;
+ data->length = 8;
+ } else if (!strcmp(tag, XPLIST_TRUE)) {
+ if (!is_empty) {
+ skip_text_content(ctx, tag);
+ }
+ data->type = PLIST_BOOLEAN;
+ data->boolval = 1;
+ data->length = 1;
+ } else if (!strcmp(tag, XPLIST_FALSE)) {
+ if (!is_empty) {
+ skip_text_content(ctx, tag);
+ }
+ data->type = PLIST_BOOLEAN;
+ data->boolval = 0;
+ data->length = 1;
+ } else if (!strcmp(tag, XPLIST_STRING) || !strcmp(tag, XPLIST_KEY)) {
+ if (!is_empty) {
+ char *str = get_text_content(ctx, tag, 0, 1);
+ if (!str) {
+ PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag);
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ if (!strcmp(tag, "key") && !keyname && *plist && (plist_get_node_type(*plist) == PLIST_DICT)) {
+ keyname = str;
+ free(tag);
+ plist_free(subnode);
+ subnode = NULL;
+ ctx->pos++;
+ continue;
+ } else {
+ data->strval = str;
+ data->length = strlen(str);
+ }
+ } else {
+ data->strval = strdup("");
+ data->length = 0;
+ }
+ data->type = PLIST_STRING;
+ } else if (!strcmp(tag, XPLIST_DATA)) {
+ if (!is_empty) {
+ char *strval = get_text_content(ctx, tag, 1, 0);
+ if (!strval) {
+ PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag);
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ size_t size = 0;
+ data->buff = base64decode((char*)strval, &size);
+ free(strval);
+ data->length = size;
+ }
+ data->type = PLIST_DATA;
+ } else if (!strcmp(tag, XPLIST_DATE)) {
+ if (!is_empty) {
+ char *strval = get_text_content(ctx, tag, 1, 0);
+ if (!strval) {
+ PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag);
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ Time64_T timev = 0;
+ if (strlen((const char*)strval) >= 11) {
+ struct TM btime;
+ parse_date((const char*)strval, &btime);
+ timev = timegm64(&btime);
+ }
+ data->realval = (double)(timev - MAC_EPOCH);
+ free(strval);
+ }
+ data->length = sizeof(double);
+ data->type = PLIST_DATE;
+ } else if (tag[0] == '/') {
+ closing_tag = 1;
+ } else {
+ PLIST_XML_ERR("Unexpected tag '<%s%s>' encountered while parsing XML\n", tag, (is_empty) ? "/" : "");
+ ctx->pos = ctx->end;
+ ctx->err++;
+ free(tag);
+ free(keyname);
+ return;
+ }
+ free(tag);
+ if (subnode && !closing_tag) {
+ /* parse sub nodes for structured types */
+ if (data->type == PLIST_DICT || data->type == PLIST_ARRAY) {
+ if (!is_empty) {
+ /* only if not empty */
+ node_from_xml(ctx, &subnode);
+ if ((data->type == PLIST_DICT) && (plist_dict_get_size(subnode) == 1)) {
+ /* convert XML CF$UID dictionaries to PLIST_UID nodes */
+ plist_t uid = plist_dict_get_item(subnode, "CF$UID");
+ if (uid) {
+ uint64_t val = 0;
+ plist_get_uint_val(uid, &val);
+ plist_dict_remove_item(subnode, "CF$UID");
+ plist_data_t nodedata = plist_get_data((node_t*)subnode);
+ free(nodedata->buff);
+ nodedata->type = PLIST_UID;
+ nodedata->length = sizeof(uint64_t);
+ nodedata->intval = val;
+ }
+ }
+ }
+ }
+ if (!*plist) {
+ /* no parent? make this node the new parent node */
+ *plist = subnode;
+ subnode = NULL;
+ } else {
+ switch (plist_get_node_type(*plist)) {
+ case PLIST_DICT:
+ if (!keyname || *keyname == '\0') {
+ PLIST_XML_ERR("missing or empty key name while adding dict item\n");
+ ctx->err++;
+ break;
+ }
+ plist_dict_set_item(*plist, keyname, subnode);
+ subnode = NULL;
+ break;
+ case PLIST_ARRAY:
+ plist_array_append_item(*plist, subnode);
+ subnode = NULL;
+ break;
+ default:
+ /* should not happen */
+ PLIST_XML_ERR("while parsing XML plist: parent is not a structered node.\n");
+ ctx->err++;
+ break;
+ }
+ }
+ } else if (closing_tag && *plist && plist_get_node_type(*plist) == PLIST_DICT && keyname) {
+ PLIST_XML_ERR("missing value node in dict\n");
+ ctx->err++;
+ }
+ free(keyname);
+ keyname = NULL;
+ plist_free(subnode);
+ if (closing_tag) {
+ break;
+ }
+ }
+ ctx->pos++;
+ }
+ if (ctx->err) {
+ plist_free(*plist);
+ *plist = NULL;
}
- xmlFreeDoc(plist_doc);
-}
-
-static xmlParserInputPtr plist_xml_external_entity_loader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt)
-{
- return NULL;
}
PLIST_API void plist_from_xml(const char *plist_xml, uint32_t length, plist_t * plist)
{
- /* CVE-2013-0339: disable external entity loading to prevent XXE vulnerability */
- xmlSetExternalEntityLoader(plist_xml_external_entity_loader);
+ if (!plist_xml || (length == 0)) {
+ *plist = NULL;
+ return;
+ }
- /* read XML from memory and disable network access for security reasons */
- xmlDocPtr plist_doc = xmlReadMemory(plist_xml, length, "plist_from_xml:memory", NULL, XML_PARSE_NONET);
- if (plist_doc) {
- xmlNodePtr root_node = xmlDocGetRootElement(plist_doc);
+ struct _parse_ctx ctx = { plist_xml, plist_xml + length, 0 };
- xml_to_node(root_node, plist);
- xmlFreeDoc(plist_doc);
- }
+ node_from_xml(&ctx, plist);
}