From 392135c7db4d9cb4a14ff5935d7c4c6e21363847 Mon Sep 17 00:00:00 2001 From: Nikias Bassen Date: Sat, 22 Oct 2016 04:39:47 +0200 Subject: Remove libxml2 dependency in favor of custom XML parsing --- configure.ac | 27 +- include/plist/plist.h | 13 - src/Makefile.am | 5 +- src/base64.c | 22 +- src/base64.h | 2 +- src/bplist.c | 3 +- src/bytearray.c | 12 +- src/plist.c | 86 ++++- src/strbuf.h | 33 ++ src/xplist.c | 1002 ++++++++++++++++++++++++++++++++----------------- 10 files changed, 798 insertions(+), 407 deletions(-) create mode 100644 src/strbuf.h diff --git a/configure.ac b/configure.ac index 5f99d95..10c7f9d 100644 --- a/configure.ac +++ b/configure.ac @@ -19,11 +19,6 @@ LIBPLIST_SO_VERSION=3:0:0 AC_SUBST(LIBPLIST_SO_VERSION) -dnl Minimum package versions -LIBXML2_VERSION=2.7.8 - -AC_SUBST(LIBXML2_VERSION) - # Checks for programs. AC_PROG_CC AC_PROG_CXX @@ -40,9 +35,6 @@ AC_LANG_POP AM_PROG_CC_C_O AC_PROG_LIBTOOL -# Checks for libraries. -PKG_CHECK_MODULES(libxml2, libxml-2.0 >= $LIBXML2_VERSION) - # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([stdint.h stdlib.h string.h]) @@ -139,6 +131,22 @@ AM_CONDITIONAL([HAVE_CYTHON],[test "x$CYTHON_SUB" = "xcython"]) AC_SUBST([CYTHON_SUB]) AS_COMPILER_FLAGS(GLOBAL_CFLAGS, "-Wall -Wextra -Wmissing-declarations -Wredundant-decls -Wshadow -Wpointer-arith -Wwrite-strings -Wswitch-default -Wno-unused-parameter -fvisibility=hidden") + +AC_ARG_ENABLE(debug, +AS_HELP_STRING([--enable-debug], + [enable debugging, default: no]), +[case "${enableval}" in + yes) debug=yes ;; + no) debug=no ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-debug]) ;; +esac], +[debug=no]) + +if (test "x$debug" = "xyes"); then + AC_DEFINE(DEBUG, 1, [Define if debug code should be enabled.]) + GLOBAL_CFLAGS+=" -g" +fi + AC_SUBST(GLOBAL_CFLAGS) case "$GLOBAL_CFLAGS" in @@ -165,7 +173,8 @@ echo " Configuration for $PACKAGE $VERSION: ------------------------------------------- - Install prefix: .........: $prefix + Install prefix ..........: $prefix + Debug code ..............: $debug Python bindings .........: $cython_python_bindings Now type 'make' to build $PACKAGE $VERSION, diff --git a/include/plist/plist.h b/include/plist/plist.h index 7e59acb..ace86f8 100644 --- a/include/plist/plist.h +++ b/include/plist/plist.h @@ -111,19 +111,6 @@ extern "C" } plist_type; - /******************************************** - * * - * Library Initialization & Cleanup * - * * - ********************************************/ - - /** - * Frees memory used globally by listplist, in - * particular the libxml parser - */ - - void plist_cleanup(void); - /******************************************** * * * Creation & Destruction * diff --git a/src/Makefile.am b/src/Makefile.am index b9117f3..2a4372b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,13 +1,14 @@ AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir) -I$(top_srcdir)/libcnary/include -AM_CFLAGS = $(GLOBAL_CFLAGS) $(libxml2_CFLAGS) -AM_LDFLAGS = $(libxml2_LIBS) +AM_CFLAGS = $(GLOBAL_CFLAGS) +AM_LDFLAGS = lib_LTLIBRARIES = libplist.la libplist++.la libplist_la_LIBADD = $(top_builddir)/libcnary/libcnary.la libplist_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBPLIST_SO_VERSION) -no-undefined libplist_la_SOURCES = base64.c base64.h \ bytearray.c bytearray.h \ + strbuf.h \ hashtable.c hashtable.h \ ptrarray.c ptrarray.h \ time64.c time64.h time64_limits.h \ diff --git a/src/base64.c b/src/base64.c index 7128a5a..e59d963 100644 --- a/src/base64.c +++ b/src/base64.c @@ -43,32 +43,32 @@ static const signed char base64_table[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; -char *base64encode(const unsigned char *buf, size_t *size) +size_t base64encode(char *outbuf, const unsigned char *buf, size_t size) { - if (!buf || !size || !(*size > 0)) return NULL; - int outlen = (*size / 3) * 4; - char *outbuf = (char*)malloc(outlen+5); // 4 spare bytes + 1 for '\0' + if (!outbuf || !buf || (size <= 0)) { + return 0; + } + size_t n = 0; size_t m = 0; unsigned char input[3]; unsigned int output[4]; - while (n < *size) { + while (n < size) { input[0] = buf[n]; - input[1] = (n+1 < *size) ? buf[n+1] : 0; - input[2] = (n+2 < *size) ? buf[n+2] : 0; + input[1] = (n+1 < size) ? buf[n+1] : 0; + input[2] = (n+2 < size) ? buf[n+2] : 0; output[0] = input[0] >> 2; output[1] = ((input[0] & 3) << 4) + (input[1] >> 4); output[2] = ((input[1] & 15) << 2) + (input[2] >> 6); output[3] = input[2] & 63; outbuf[m++] = base64_str[(int)output[0]]; outbuf[m++] = base64_str[(int)output[1]]; - outbuf[m++] = (n+1 < *size) ? base64_str[(int)output[2]] : base64_pad; - outbuf[m++] = (n+2 < *size) ? base64_str[(int)output[3]] : base64_pad; + outbuf[m++] = (n+1 < size) ? base64_str[(int)output[2]] : base64_pad; + outbuf[m++] = (n+2 < size) ? base64_str[(int)output[3]] : base64_pad; n+=3; } outbuf[m] = 0; // 0-termination! - *size = m; - return outbuf; + return m; } static int base64decode_block(unsigned char *target, const char *data, size_t data_size) diff --git a/src/base64.h b/src/base64.h index 6eee33b..58b8396 100644 --- a/src/base64.h +++ b/src/base64.h @@ -22,7 +22,7 @@ #define BASE64_H #include -char *base64encode(const unsigned char *buf, size_t *size); +size_t base64encode(char *outbuf, const unsigned char *buf, size_t size); unsigned char *base64decode(const char *buf, size_t *size); #endif diff --git a/src/bplist.c b/src/bplist.c index fbe1b63..78b8255 100644 --- a/src/bplist.c +++ b/src/bplist.c @@ -2,7 +2,7 @@ * bplist.c * Binary plist implementation * - * Copyright (c) 2011-2015 Nikias Bassen, All Rights Reserved. + * Copyright (c) 2011-2016 Nikias Bassen, All Rights Reserved. * Copyright (c) 2008-2010 Jonathan Beck, All Rights Reserved. * * This library is free software; you can redistribute it and/or @@ -28,7 +28,6 @@ #include #include -#include #include #include diff --git a/src/bytearray.c b/src/bytearray.c index 2c6ce4a..861890e 100644 --- a/src/bytearray.c +++ b/src/bytearray.c @@ -21,12 +21,14 @@ #include #include "bytearray.h" +#define PAGE_SIZE 4096 + bytearray_t *byte_array_new() { bytearray_t *a = (bytearray_t*)malloc(sizeof(bytearray_t)); - a->data = malloc(256); + a->capacity = PAGE_SIZE * 8; + a->data = malloc(a->capacity); a->len = 0; - a->capacity = 256; return a; } @@ -44,8 +46,10 @@ void byte_array_append(bytearray_t *ba, void *buf, size_t len) if (!ba || !ba->data || (len <= 0)) return; size_t remaining = ba->capacity-ba->len; if (len > remaining) { - ba->data = realloc(ba->data, ba->capacity + (len - remaining)); - ba->capacity += (len - remaining); + size_t needed = len - remaining; + size_t increase = (needed > PAGE_SIZE) ? (needed+(PAGE_SIZE-1)) & (~(PAGE_SIZE-1)) : PAGE_SIZE; + ba->data = realloc(ba->data, ba->capacity + increase); + ba->capacity += increase; } memcpy(((char*)ba->data) + ba->len, buf, len); ba->len += len; diff --git a/src/plist.c b/src/plist.c index af64ed1..a3d88b9 100644 --- a/src/plist.c +++ b/src/plist.c @@ -2,8 +2,8 @@ * plist.c * Builds plist XML structures * + * Copyright (c) 2009-2016 Nikias Bassen All Rights Reserved. * Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved. - * Copyright (c) 2009-2014 Nikias Bassen All Rights Reserved. * Copyright (c) 2008 Zach C. All Rights Reserved. * * This library is free software; you can redistribute it and/or @@ -29,27 +29,83 @@ #include #include +#ifdef WIN32 +#include +#else +#include +#endif + #include #include -#include -#include -#include -#include -#include -#include +extern void plist_xml_init(void); +extern void plist_xml_deinit(void); + +static void internal_plist_init(void) +{ + plist_xml_init(); +} + +static void internal_plist_deinit(void) +{ + plist_xml_deinit(); +} + +#ifdef WIN32 + +typedef volatile struct { + LONG lock; + int state; +} thread_once_t; + +static thread_once_t init_once = {0, 0}; +static thread_once_t deinit_once = {0, 0}; + +void thread_once(thread_once_t *once_control, void (*init_routine)(void)) +{ + while (InterlockedExchange(&(once_control->lock), 1) != 0) { + Sleep(1); + } + if (!once_control->state) { + once_control->state = 1; + init_routine(); + } + InterlockedExchange(&(once_control->lock), 0); +} + +BOOL WINAPI DllMain(HINSTANCE hModule, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + thread_once(&init_once, internal_plist_init); + break; + case DLL_PROCESS_DETACH: + thread_once(&deinit_once, internal_plist_deinit); + break; + default: + break; + } + return 1; +} -PLIST_API void plist_cleanup(void) +#else + +static pthread_once_t init_once = PTHREAD_ONCE_INIT; +static pthread_once_t deinit_once = PTHREAD_ONCE_INIT; + +static void __attribute__((constructor)) libplist_initialize(void) { - /* free memory from parser initialization */ - xmlCleanupCharEncodingHandlers(); - xmlDictCleanup(); - xmlResetLastError(); - xmlCleanupGlobals(); - xmlCleanupThreads(); - xmlCleanupMemory(); + pthread_once(&init_once, internal_plist_init); } +static void __attribute__((destructor)) libplist_deinitialize(void) +{ + pthread_once(&deinit_once, internal_plist_deinit); +} + +#endif + + PLIST_API int plist_is_binary(const char *plist_data, uint32_t length) { if (length < 8) { diff --git a/src/strbuf.h b/src/strbuf.h new file mode 100644 index 0000000..6b7f9d3 --- /dev/null +++ b/src/strbuf.h @@ -0,0 +1,33 @@ +/* + * strbuf.h + * header file for simple string buffer, using the bytearray as underlying + * structure. + * + * Copyright (c) 2016 Nikias Bassen, All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef STRBUF_H +#define STRBUF_H +#include +#include "bytearray.h" + +typedef struct bytearray_t strbuf_t; + +#define str_buf_new() byte_array_new() +#define str_buf_free(__ba) byte_array_free(__ba) +#define str_buf_append(__ba, __str, __len) byte_array_append(__ba, (void*)(__str), __len) + +#endif diff --git a/src/xplist.c b/src/xplist.c index fc665ac..87b21bb 100644 --- a/src/xplist.c +++ b/src/xplist.c @@ -2,8 +2,8 @@ * xplist.c * XML plist implementation * + * Copyright (c) 2010-2016 Nikias Bassen All Rights Reserved. * Copyright (c) 2010-2015 Martin Szulecki All Rights Reserved. - * Copyright (c) 2010-2014 Nikias Bassen All Rights Reserved. * Copyright (c) 2008 Jonathan Beck All Rights Reserved. * * This library is free software; you can redistribute it and/or @@ -21,6 +21,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifdef HAVE_CONFIG_H +#include +#endif #include #include @@ -31,121 +34,55 @@ #include #include -#include -#include -#include - #include #include #include #include "plist.h" #include "base64.h" +#include "strbuf.h" #include "time64.h" -#define XPLIST_TEXT BAD_CAST("text") -#define XPLIST_KEY BAD_CAST("key") -#define XPLIST_FALSE BAD_CAST("false") -#define XPLIST_TRUE BAD_CAST("true") -#define XPLIST_INT BAD_CAST("integer") -#define XPLIST_REAL BAD_CAST("real") -#define XPLIST_DATE BAD_CAST("date") -#define XPLIST_DATA BAD_CAST("data") -#define XPLIST_STRING BAD_CAST("string") -#define XPLIST_ARRAY BAD_CAST("array") -#define XPLIST_DICT BAD_CAST("dict") +#define XPLIST_TEXT "text" +#define XPLIST_KEY "key" +#define XPLIST_FALSE "false" +#define XPLIST_TRUE "true" +#define XPLIST_INT "integer" +#define XPLIST_REAL "real" +#define XPLIST_DATE "date" +#define XPLIST_DATA "data" +#define XPLIST_STRING "string" +#define XPLIST_ARRAY "array" +#define XPLIST_DICT "dict" #define MAC_EPOCH 978307200 -static const char *plist_base = "\n\ +static const char XML_PLIST_PROLOG[] = "\n\ \n\ -\n\ -\0"; +\n"; +static const char XML_PLIST_EPILOG[] = "\n"; +#ifdef DEBUG +static int plist_xml_debug = 0; +#define PLIST_XML_ERR(...) if (plist_xml_debug) { fprintf(stderr, "libplist[xmlparser] ERROR: " __VA_ARGS__); } +#else +#define PLIST_XML_ERR(...) +#endif -/** Formats a block of text to be a given indentation and width. - * - * The total width of the return string will be depth + cols. - * - * @param buf The string to format. - * @param cols The number of text columns for returned block of text. - * @param depth The number of tabs to indent the returned block of text. - * - * @return The formatted string. - */ -static char *format_string(const char *buf, size_t len, int cols, int depth) +void plist_xml_init(void) { - if (!buf || !(len > 0)) return NULL; - int colw = depth + cols + 1; - int nlines = len / cols + 1; - char *new_buf = NULL; - int i = 0; - int j = 0; - - assert(cols >= 0); - assert(depth >= 0); - - new_buf = (char*) malloc(nlines * colw + depth + 1); - assert(new_buf != 0); - memset(new_buf, 0, nlines * colw + depth + 1); - - // Inserts new lines and tabs at appropriate locations - for (i = 0; i < nlines; i++) - { - new_buf[i * colw] = '\n'; - for (j = 0; j < depth; j++) - new_buf[i * colw + 1 + j] = '\t'; - memcpy(new_buf + i * colw + 1 + depth, buf + i * cols, (size_t)(i + 1) * cols <= len ? (size_t)cols : len - i * cols); + /* init XML stuff */ +#ifdef DEBUG + char *env_debug = getenv("PLIST_XML_DEBUG"); + if (env_debug && !strcmp(env_debug, "1")) { + plist_xml_debug = 1; } - new_buf[len + (1 + depth) * nlines] = '\n'; - - // Inserts final row of indentation and termination character - for (j = 0; j < depth; j++) - new_buf[len + (1 + depth) * nlines + 1 + j] = '\t'; - new_buf[len + (1 + depth) * nlines + depth + 1] = '\0'; - - return new_buf; -} - - - -struct xml_node -{ - xmlNodePtr xml; - uint32_t depth; -}; - -/** Creates a new plist XML document. - * - * @return The plist XML document. - */ -static xmlDocPtr new_xml_plist(void) -{ - char *plist = strdup(plist_base); - xmlDocPtr plist_xml = xmlParseMemory(plist, strlen(plist)); - - free(plist); - - return plist_xml; -} - -static struct node_t* new_key_node(const char* name) -{ - plist_data_t data = plist_new_plist_data(); - data->type = PLIST_KEY; - int size = strlen(name); - data->strval = strdup(name); - data->length = size; - return node_create(NULL, data); +#endif } -static struct node_t* new_uint_node(uint64_t value) +void plist_xml_deinit(void) { - plist_data_t data = plist_new_plist_data(); - data->type = PLIST_UINT; - data->intval = value; - data->length = sizeof(uint64_t); - return node_create(NULL, data); + /* deinit XML stuff */ } static void dtostr(char *buf, size_t bufsize, double realval) @@ -178,27 +115,21 @@ static void dtostr(char *buf, size_t bufsize, double realval) buf[p] = '\0'; } -static void node_to_xml(node_t* node, void *xml_struct) +static void node_to_xml(node_t* node, bytearray_t **outbuf, uint32_t depth) { - struct xml_node *xstruct = NULL; plist_data_t node_data = NULL; - xmlNodePtr child_node = NULL; char isStruct = FALSE; - char isUIDNode = FALSE; + char tagOpen = FALSE; - const xmlChar *tag = NULL; + const char *tag = NULL; char *val = NULL; - //for base64 - char *valtmp = NULL; - uint32_t i = 0; if (!node) return; - xstruct = (struct xml_node *) xml_struct; node_data = plist_get_data(node); switch (node_data->type) @@ -230,23 +161,17 @@ static void node_to_xml(node_t* node, void *xml_struct) case PLIST_STRING: tag = XPLIST_STRING; - val = strdup((char*) node_data->strval); + /* contents processed directly below */ break; case PLIST_KEY: tag = XPLIST_KEY; - val = strdup((char*) node_data->strval); + /* contents processed directly below */ break; case PLIST_DATA: tag = XPLIST_DATA; - if (node_data->length) - { - size_t len = node_data->length; - valtmp = base64encode(node_data->buff, &len); - val = format_string(valtmp, len, 68, xstruct->depth); - free(valtmp); - } + /* contents processed directly below */ break; case PLIST_ARRAY: tag = XPLIST_ARRAY; @@ -275,77 +200,161 @@ static void node_to_xml(node_t* node, void *xml_struct) } break; case PLIST_UID: - // special case for keyed encoding tag = XPLIST_DICT; - isStruct = TRUE; - isUIDNode = TRUE; - node_data->type = PLIST_DICT; - node_attach(node, new_key_node("CF$UID")); - node_attach(node, new_uint_node(node_data->intval)); + val = (char*)malloc(64); + if (node_data->length == 16) { + (void)snprintf(val, 64, "%"PRIu64, node_data->intval); + } else { + (void)snprintf(val, 64, "%"PRIi64, node_data->intval); + } break; default: break; } - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(xstruct->xml, BAD_CAST("\t")); + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } + + /* append tag */ + str_buf_append(*outbuf, "<", 1); + str_buf_append(*outbuf, tag, strlen(tag)); if (node_data->type == PLIST_STRING || node_data->type == PLIST_KEY) { + size_t j; + size_t len; + off_t start = 0; + off_t cur = 0; + + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + /* make sure we convert the following predefined xml entities */ /* < = < > = > ' = ' " = " & = & */ - child_node = xmlNewTextChild(xstruct->xml, NULL, tag, BAD_CAST(val)); - } else - child_node = xmlNewChild(xstruct->xml, NULL, tag, BAD_CAST(val)); - xmlNodeAddContent(xstruct->xml, BAD_CAST("\n")); - if (val) { - free(val); - } + len = strlen(node_data->strval); + for (j = 0; j < len; j++) { + switch (node_data->strval[j]) { + case '<': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "<", 4); + start = cur+1; + break; + case '>': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, ">", 4); + start = cur+1; + break; + case '\'': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "'", 6); + start = cur+1; + break; + case '"': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, """, 6); + start = cur+1; + break; + case '&': + str_buf_append(*outbuf, node_data->strval + start, cur - start); + str_buf_append(*outbuf, "&", 5); + start = cur+1; + break; + default: + break; + } + cur++; + } + str_buf_append(*outbuf, node_data->strval + start, cur - start); + } else if (node_data->type == PLIST_DATA) { + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, "\n", 1); + if (node_data->length > 0) { + char *buf = malloc(80); + uint32_t j = 0; + uint32_t indent = (depth > 8) ? 8 : depth; + uint32_t maxread = ((76 - indent*8) / 4) * 3; + size_t count = 0; + size_t b64count = 0; + while (j < node_data->length) { + for (i = 0; i < indent; i++) { + str_buf_append(*outbuf, "\t", 1); + } + count = (node_data->length-j < maxread) ? node_data->length-j : maxread; + b64count = base64encode(buf, node_data->buff + j, count); + str_buf_append(*outbuf, buf, b64count); + str_buf_append(*outbuf, "\n", 1); + j+=count; + } + free(buf); + } + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); + } + } else if (node_data->type == PLIST_UID) { + /* special case for UID nodes: create a DICT */ + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, "\n", 1); + + /* add CF$UID key */ + for (i = 0; i < depth+1; i++) { + str_buf_append(*outbuf, "\t", 1); + } + str_buf_append(*outbuf, "CF$UID", 17); + str_buf_append(*outbuf, "\n", 1); - //add return for structured types - if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) - xmlNodeAddContent(child_node, BAD_CAST("\n")); + /* add UID value */ + for (i = 0; i < depth+1; i++) { + str_buf_append(*outbuf, "\t", 1); + } + str_buf_append(*outbuf, "", 9); + str_buf_append(*outbuf, val, strlen(val)); + str_buf_append(*outbuf, "", 10); + str_buf_append(*outbuf, "\n", 1); - //make sure we don't produce if it's empty - if ((node_data->type == PLIST_DATA) && !val) { - xmlNodeAddContent(child_node, BAD_CAST("\n")); - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(child_node, BAD_CAST("\t")); + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } + } else if (val) { + str_buf_append(*outbuf, ">", 1); + tagOpen = TRUE; + str_buf_append(*outbuf, val, strlen(val)); + } else if (isStruct) { + tagOpen = TRUE; + str_buf_append(*outbuf, ">", 1); + } else { + tagOpen = FALSE; + str_buf_append(*outbuf, "/>", 2); } + free(val); - if (isStruct) - { - struct xml_node child = { child_node, xstruct->depth + 1 }; + /* add return for structured types */ + if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) + str_buf_append(*outbuf, "\n", 1); + + if (isStruct) { node_iterator_t *ni = node_iterator_create(node->children); node_t *ch; while ((ch = node_iterator_next(ni))) { - node_to_xml(ch, &child); + node_to_xml(ch, outbuf, depth+1); } node_iterator_destroy(ni); } - //fix indent for structured types - if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) - { - for (i = 0; i < xstruct->depth; i++) - { - xmlNodeAddContent(child_node, BAD_CAST("\t")); + /* fix indent for structured types */ + if (node_data->type == PLIST_ARRAY || node_data->type == PLIST_DICT) { + for (i = 0; i < depth; i++) { + str_buf_append(*outbuf, "\t", 1); } } - if (isUIDNode) - { - unsigned int num = node_n_children(node); - unsigned int j; - for (j = num; j > 0; j--) { - node_t* ch = node_nth_child(node, j-1); - node_detach(node, ch); - plist_free_data((plist_data_t)((node_t*)ch)->data); - node_destroy(ch); - } - node_data->type = PLIST_UID; + + if (tagOpen) { + /* add closing tag */ + str_buf_append(*outbuf, "", 1); } + str_buf_append(*outbuf, "\n", 1); return; } @@ -370,233 +379,526 @@ static void parse_date(const char *strval, struct TM *btime) btime->tm_isdst=0; } -static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node) +PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length) { - xmlNodePtr node = NULL; - plist_data_t data = NULL; - plist_t subnode = NULL; + strbuf_t *outbuf = str_buf_new(); - //for string - long len = 0; - int type = 0; + str_buf_append(outbuf, XML_PLIST_PROLOG, sizeof(XML_PLIST_PROLOG)-1); - if (!xml_node) - return; + node_to_xml(plist, &outbuf, 0); - for (node = xml_node->children; node; node = node->next) - { - - while (node && !xmlStrcmp(node->name, XPLIST_TEXT)) - node = node->next; - if (!node) - break; + str_buf_append(outbuf, XML_PLIST_EPILOG, sizeof(XML_PLIST_EPILOG)); - if (!xmlStrcmp(node->name, BAD_CAST("comment"))) { - continue; - } + *plist_xml = outbuf->data; + *length = outbuf->len - 1; - data = plist_new_plist_data(); - subnode = plist_new_node(data); - if (*plist_node) - node_attach(*plist_node, subnode); - else - *plist_node = subnode; + outbuf->data = NULL; + str_buf_free(outbuf); +} - if (!xmlStrcmp(node->name, XPLIST_TRUE)) - { - data->boolval = TRUE; - data->type = PLIST_BOOLEAN; - data->length = 1; - continue; - } +struct _parse_ctx { + const char *pos; + const char *end; + int err; +}; +typedef struct _parse_ctx* parse_ctx; - if (!xmlStrcmp(node->name, XPLIST_FALSE)) - { - data->boolval = FALSE; - data->type = PLIST_BOOLEAN; - data->length = 1; - continue; - } +static void parse_skip_ws(parse_ctx ctx) +{ + while (ctx->pos < ctx->end && ((*(ctx->pos) == ' ') || (*(ctx->pos) == '\t') || (*(ctx->pos) == '\r') || (*(ctx->pos) == '\n'))) { + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_INT)) - { - xmlChar *strval = xmlNodeGetContent(node); - int is_negative = 0; - char *str = (char*)strval; - if ((str[0] == '-') || (str[0] == '+')) { - if (str[0] == '-') { - is_negative = 1; - } - str++; - } - char* endp = NULL; - data->intval = strtoull((char*)str, &endp, 0); - if ((endp != NULL) && (strlen(endp) > 0)) { - fprintf(stderr, "%s: integer parse error: string contains invalid characters: '%s'\n", __func__, endp); - } - if (is_negative || (data->intval <= INT64_MAX)) { - int64_t v = data->intval; - if (is_negative) { - v = -v; - } - data->intval = (uint64_t)v; - data->length = 8; - } else { - data->length = 16; +static void find_char(parse_ctx ctx, char c, int skip_quotes) +{ + while (ctx->pos < ctx->end && (*(ctx->pos) != c)) { + if (skip_quotes && (c != '"') && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - data->type = PLIST_UINT; - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_REAL)) - { - xmlChar *strval = xmlNodeGetContent(node); - data->realval = atof((char *) strval); - data->type = PLIST_REAL; - data->length = 8; - xmlFree(strval); - continue; +static void find_str(parse_ctx ctx, const char *str, int skip_quotes) +{ + size_t len = strlen(str); + while (ctx->pos < (ctx->end - len)) { + if (!strncmp(ctx->pos, str, len)) { + break; } - - if (!xmlStrcmp(node->name, XPLIST_DATE)) - { - xmlChar *strval = xmlNodeGetContent(node); - Time64_T timev = 0; - if (strlen((const char*)strval) >= 11) { - struct TM btime; - parse_date((const char*)strval, &btime); - timev = timegm64(&btime); + if (skip_quotes && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - data->realval = (double)(timev - MAC_EPOCH); - data->type = PLIST_DATE; - data->length = sizeof(double); - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_STRING)) - { - xmlChar *strval = xmlNodeGetContent(node); - len = strlen((char *) strval); - type = xmlDetectCharEncoding(strval, len); - - if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) - { - data->strval = strdup((char *) strval); - data->type = PLIST_STRING; - data->length = strlen(data->strval); +static void find_next(parse_ctx ctx, const char *nextchars, int skip_quotes) +{ + int numchars = strlen(nextchars); + int i = 0; + while (ctx->pos < ctx->end) { + if (skip_quotes && (*(ctx->pos) == '"')) { + ctx->pos++; + find_char(ctx, '"', 0); + if (*(ctx->pos) != '"') { + PLIST_XML_ERR("Unmatched double quote\n"); + return; } - xmlFree(strval); - continue; } - - if (!xmlStrcmp(node->name, XPLIST_KEY)) - { - xmlChar *strval = xmlNodeGetContent(node); - len = strlen((char *) strval); - type = xmlDetectCharEncoding(strval, len); - - if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) - { - data->strval = strdup((char *) strval); - data->type = PLIST_KEY; - data->length = strlen(data->strval); + for (i = 0; i < numchars; i++) { + if (*(ctx->pos) == nextchars[i]) { + return; } - xmlFree(strval); - continue; } + ctx->pos++; + } +} - if (!xmlStrcmp(node->name, XPLIST_DATA)) - { - xmlChar *strval = xmlNodeGetContent(node); - size_t size = 0; - unsigned char *dec = base64decode((char*)strval, &size); - data->buff = (uint8_t *) malloc(size * sizeof(uint8_t)); - memcpy(data->buff, dec, size * sizeof(uint8_t)); - free(dec); - data->length = size; - data->type = PLIST_DATA; - xmlFree(strval); - continue; - } +static char* get_text_content(parse_ctx ctx, const char* tag, int skip_ws, int unescape_entities) +{ + const char *p; + const char *q; + int taglen; + char *str; + int i = 0; - if (!xmlStrcmp(node->name, XPLIST_ARRAY)) - { - data->type = PLIST_ARRAY; - xml_to_node(node, &subnode); - continue; - } + if (skip_ws) { + parse_skip_ws(ctx); + } + p = ctx->pos; + find_char(ctx, '<', 1); + if (*ctx->pos != '<') { PLIST_XML_ERR("didn't find <\n"); return NULL; } + q = ctx->pos; + ctx->pos++; + if (ctx->pos >= ctx->end || *ctx->pos != '/') { PLIST_XML_ERR("EOF or empty tag while parsing '%s'\n",p); return NULL; } + ctx->pos++; + taglen = strlen(tag); + if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) { PLIST_XML_ERR("EOF or end tag mismatch\n"); return NULL;} + ctx->pos+=taglen; + if (ctx->pos >= ctx->end || *ctx->pos != '>') { PLIST_XML_ERR("EOF or no '>' after tag name\n"); return NULL;} + ctx->pos++; + int len = q - p; + if (len < 0) { + PLIST_XML_ERR("Couldn't find matching '%s' end tag\n", tag); + return NULL; + } + str = malloc(len+1); + strncpy(str, p, len); + str[len] = 0; - if (!xmlStrcmp(node->name, XPLIST_DICT)) - { - data->type = PLIST_DICT; - xml_to_node(node, &subnode); - if (plist_get_node_type(subnode) == PLIST_DICT) { - if (plist_dict_get_size(subnode) == 1) { - plist_t uid = plist_dict_get_item(subnode, "CF$UID"); - if (uid) { - uint64_t val = 0; - plist_get_uint_val(uid, &val); - plist_dict_remove_item(subnode, "CF$UID"); - plist_data_t nodedata = plist_get_data((node_t*)subnode); - free(nodedata->buff); - nodedata->type = PLIST_UID; - nodedata->length = sizeof(uint64_t); - nodedata->intval = val; - } + if (!unescape_entities) { + return str; + } + + /* unescape entities */ + while (i < len-1) { + if (str[i] == '&') { + char *entp = str + i + 1; + while (i < len && str[i] != ';') { + i++; + } + if (i >= len) { + break; + } + if (str+i > entp+1) { + int entlen = str+i - entp; + if (!strncmp(entp, "amp", 3)) { + /* the '&' is already there */ + } else if (!strncmp(entp, "apos", 4)) { + *(entp-1) = '\''; + } else if (!strncmp(entp, "quot", 4)) { + *(entp-1) = '"'; + } else if (!strncmp(entp, "lt", 2)) { + *(entp-1) = '<'; + } else if (!strncmp(entp, "gt", 2)) { + *(entp-1) = '>'; + } else { + /* unexpected entity, replace with ? */ + *(entp-1) = '?'; } + memmove(entp, str+i+1, len - i); + i -= entlen; + continue; } - continue; } + i++; } + + return str; } -PLIST_API void plist_to_xml(plist_t plist, char **plist_xml, uint32_t * length) +static void skip_text_content(parse_ctx ctx, const char* tag) { - xmlDocPtr plist_doc = NULL; - xmlNodePtr root_node = NULL; - struct xml_node root = { NULL, 0 }; - int size = 0; + int taglen; + find_char(ctx, '<', 1); + if (*ctx->pos != '<') return; + ctx->pos++; + taglen = strlen(tag); + if (ctx->pos >= ctx->end-taglen || strncmp(ctx->pos, tag, taglen)) return; + ctx->pos+=taglen; + if (ctx->pos >= ctx->end || *ctx->pos != '>') return; + ctx->pos++; +} - if (!plist || !plist_xml || *plist_xml) - return; - plist_doc = new_xml_plist(); - root_node = xmlDocGetRootElement(plist_doc); - root.xml = root_node; +static void node_from_xml(parse_ctx ctx, plist_t *plist) +{ + char *keyname = NULL; + while (ctx->pos < ctx->end && !ctx->err) { +#ifdef DEBUG + const char *start = ctx->pos; +#endif + parse_skip_ws(ctx); + if (ctx->pos >= ctx->end) { + break; + } + if (*ctx->pos != '<') { + PLIST_XML_ERR("Failed to parse XML. Expected: opening tag, found: '%s', pos: %s\n", start, ctx->pos); + ctx->pos = ctx->end; + break; + } + ctx->pos++; + if (ctx->pos >= ctx->end) { + break; + } - node_to_xml(plist, &root); + if (*(ctx->pos) == '?') { + find_str(ctx, "?>", 1); + if (ctx->pos >= ctx->end) { + break; + } + if (strncmp(ctx->pos, "?>", 2)) { + PLIST_XML_ERR("Couldn't find pos = ctx->end; + return; + } + ctx->pos += 2; + continue; + } else if (*(ctx->pos) == '!') { + /* comment or DTD */ + if (((ctx->end - ctx->pos) > 3) && !strncmp(ctx->pos, "!--", 3)) { + ctx->pos += 3; + find_str(ctx,"-->", 0); + if (strncmp(ctx->pos, "-->", 3)) { + PLIST_XML_ERR("Couldn't find end of comment\n"); + ctx->pos = ctx->end; + return; + } + ctx->pos+=3; + } else if (((ctx->end - ctx->pos) > 8) && !strncmp(ctx->pos, "!DOCTYPE", 8)) { + int embedded_dtd = 0; + ctx->pos+=8; + while (ctx->pos < ctx->end) { + find_next(ctx, " \t\r\n[>", 1); + if (*ctx->pos == '[') { + embedded_dtd = 1; + break; + } else if (*ctx->pos == '>') { + /* end of DOCTYPE found already */ + ctx->pos++; + break; + } else { + parse_skip_ws(ctx); + } + } + if (embedded_dtd) { + find_str(ctx, "]>", 1); + if (strncmp(ctx->pos, "]>", 2)) { + PLIST_XML_ERR("Couldn't find end of DOCTYPE\n"); + ctx->pos = ctx->end; + return; + } + ctx->pos += 2; + } + } else { + PLIST_XML_ERR("Unknown ! special tag encountered\n"); + ctx->err++; + } + continue; + } else { + int is_empty = 0; + int closing_tag = 0; + const char *p = ctx->pos; + find_next(ctx," \r\n\t<>",0); + if (ctx->pos >= ctx->end) { + PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); + ctx->pos = ctx->end; + ctx->err++; + free(keyname); + return; + } + int taglen = ctx->pos - p; + char *tag = malloc(taglen + 1); + strncpy(tag, p, taglen); + tag[taglen] = '\0'; + if (*ctx->pos != '>') { + find_next(ctx, "<>", 1); + } + if (ctx->pos >= ctx->end) { + PLIST_XML_ERR("Unexpected EOF while parsing XML\n"); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (*ctx->pos != '>') { + PLIST_XML_ERR("Missing '>' for tag '%s'\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (*(ctx->pos-1) == '/') { + tag[ctx->pos - p - 1] = '\0'; + is_empty = 1; + } + ctx->pos++; + if (!strcmp(tag, "plist")) { + free(tag); + if (is_empty) { + return; + } + if (!*plist) { + /* only process first plist node found */ + node_from_xml(ctx, plist); + } + continue; + } - xmlChar* tmp = NULL; - xmlDocDumpMemory(plist_doc, &tmp, &size); - if (size >= 0 && tmp) - { - /* make sure to copy the terminating 0-byte */ - *plist_xml = (char*)malloc((size+1) * sizeof(char)); - memcpy(*plist_xml, tmp, size+1); - *length = size; - xmlFree(tmp); - tmp = NULL; + plist_data_t data = plist_new_plist_data(); + plist_t subnode = plist_new_node(data); + + if (!strcmp(tag, XPLIST_DICT)) { + data->type = PLIST_DICT; + } else if (!strcmp(tag, XPLIST_ARRAY)) { + data->type = PLIST_ARRAY; + } else if (!strcmp(tag, XPLIST_INT)) { + if (!is_empty) { + char *str_content = get_text_content(ctx, tag, 1, 0); + if (!str_content) { + PLIST_XML_ERR("Couldn't find end tag for '%s'\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + char *str = str_content; + int is_negative = 0; + if ((str[0] == '-') || (str[0] == '+')) { + if (str[0] == '-') { + is_negative = 1; + } + str++; + } + char* endp = NULL; + data->intval = strtoull((char*)str, &endp, 0); + if ((endp != NULL) && (strlen(endp) > 0)) { + PLIST_XML_ERR("integer parse error: string contains invalid characters: '%s'\n", endp); + } + if (is_negative || (data->intval <= INT64_MAX)) { + int64_t v = data->intval; + if (is_negative) { + v = -v; + } + data->intval = (uint64_t)v; + data->length = 8; + } else { + data->length = 16; + } + free(str_content); + } else { + data->intval = 0; + data->length = 8; + } + data->type = PLIST_UINT; + } else if (!strcmp(tag, XPLIST_REAL)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + data->realval = atof((char *) strval); + free(strval); + } + data->type = PLIST_REAL; + data->length = 8; + } else if (!strcmp(tag, XPLIST_TRUE)) { + if (!is_empty) { + skip_text_content(ctx, tag); + } + data->type = PLIST_BOOLEAN; + data->boolval = 1; + data->length = 1; + } else if (!strcmp(tag, XPLIST_FALSE)) { + if (!is_empty) { + skip_text_content(ctx, tag); + } + data->type = PLIST_BOOLEAN; + data->boolval = 0; + data->length = 1; + } else if (!strcmp(tag, XPLIST_STRING) || !strcmp(tag, XPLIST_KEY)) { + if (!is_empty) { + char *str = get_text_content(ctx, tag, 0, 1); + if (!str) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + if (!strcmp(tag, "key") && !keyname && *plist && (plist_get_node_type(*plist) == PLIST_DICT)) { + keyname = str; + free(tag); + plist_free(subnode); + subnode = NULL; + ctx->pos++; + continue; + } else { + data->strval = str; + data->length = strlen(str); + } + } else { + data->strval = strdup(""); + data->length = 0; + } + data->type = PLIST_STRING; + } else if (!strcmp(tag, XPLIST_DATA)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + if (!strval) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + size_t size = 0; + data->buff = base64decode((char*)strval, &size); + free(strval); + data->length = size; + } + data->type = PLIST_DATA; + } else if (!strcmp(tag, XPLIST_DATE)) { + if (!is_empty) { + char *strval = get_text_content(ctx, tag, 1, 0); + if (!strval) { + PLIST_XML_ERR("Couldn't get text content for '%s' node\n", tag); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + Time64_T timev = 0; + if (strlen((const char*)strval) >= 11) { + struct TM btime; + parse_date((const char*)strval, &btime); + timev = timegm64(&btime); + } + data->realval = (double)(timev - MAC_EPOCH); + free(strval); + } + data->length = sizeof(double); + data->type = PLIST_DATE; + } else if (tag[0] == '/') { + closing_tag = 1; + } else { + PLIST_XML_ERR("Unexpected tag '<%s%s>' encountered while parsing XML\n", tag, (is_empty) ? "/" : ""); + ctx->pos = ctx->end; + ctx->err++; + free(tag); + free(keyname); + return; + } + free(tag); + if (subnode && !closing_tag) { + /* parse sub nodes for structured types */ + if (data->type == PLIST_DICT || data->type == PLIST_ARRAY) { + if (!is_empty) { + /* only if not empty */ + node_from_xml(ctx, &subnode); + if ((data->type == PLIST_DICT) && (plist_dict_get_size(subnode) == 1)) { + /* convert XML CF$UID dictionaries to PLIST_UID nodes */ + plist_t uid = plist_dict_get_item(subnode, "CF$UID"); + if (uid) { + uint64_t val = 0; + plist_get_uint_val(uid, &val); + plist_dict_remove_item(subnode, "CF$UID"); + plist_data_t nodedata = plist_get_data((node_t*)subnode); + free(nodedata->buff); + nodedata->type = PLIST_UID; + nodedata->length = sizeof(uint64_t); + nodedata->intval = val; + } + } + } + } + if (!*plist) { + /* no parent? make this node the new parent node */ + *plist = subnode; + subnode = NULL; + } else { + switch (plist_get_node_type(*plist)) { + case PLIST_DICT: + if (!keyname || *keyname == '\0') { + PLIST_XML_ERR("missing or empty key name while adding dict item\n"); + ctx->err++; + break; + } + plist_dict_set_item(*plist, keyname, subnode); + subnode = NULL; + break; + case PLIST_ARRAY: + plist_array_append_item(*plist, subnode); + subnode = NULL; + break; + default: + /* should not happen */ + PLIST_XML_ERR("while parsing XML plist: parent is not a structered node.\n"); + ctx->err++; + break; + } + } + } else if (closing_tag && *plist && plist_get_node_type(*plist) == PLIST_DICT && keyname) { + PLIST_XML_ERR("missing value node in dict\n"); + ctx->err++; + } + free(keyname); + keyname = NULL; + plist_free(subnode); + if (closing_tag) { + break; + } + } + ctx->pos++; + } + if (ctx->err) { + plist_free(*plist); + *plist = NULL; } - xmlFreeDoc(plist_doc); -} - -static xmlParserInputPtr plist_xml_external_entity_loader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt) -{ - return NULL; } PLIST_API void plist_from_xml(const char *plist_xml, uint32_t length, plist_t * plist) { - /* CVE-2013-0339: disable external entity loading to prevent XXE vulnerability */ - xmlSetExternalEntityLoader(plist_xml_external_entity_loader); + if (!plist_xml || (length == 0)) { + *plist = NULL; + return; + } - /* read XML from memory and disable network access for security reasons */ - xmlDocPtr plist_doc = xmlReadMemory(plist_xml, length, "plist_from_xml:memory", NULL, XML_PARSE_NONET); - if (plist_doc) { - xmlNodePtr root_node = xmlDocGetRootElement(plist_doc); + struct _parse_ctx ctx = { plist_xml, plist_xml + length, 0 }; - xml_to_node(root_node, plist); - xmlFreeDoc(plist_doc); - } + node_from_xml(&ctx, plist); } -- cgit v1.1-32-gdbae