Commit 9ae680d3 authored by miloyip@gmail.com's avatar miloyip@gmail.com

Added ultrajson to perftest, only tested parsing to dummy handler.

git-svn-id: https://rapidjson.googlecode.com/svn/trunk@28 c5894555-1306-4e8d-425f-1f6f381ee07c
parent 08d25ad1
#ifndef PERFTEST_H_
#define PERFTEST_H_
#define TEST_RAPIDJSON 0
#define TEST_JSONCPP 0
#define TEST_YAJL 0
#define TEST_RAPIDJSON 1
#define TEST_JSONCPP 1
#define TEST_YAJL 1
#define TEST_ULTRAJSON 1
#define TEST_PLATFORM 1
#if TEST_RAPIDJSON
//#define RAPIDJSON_SSE2
//#define RAPIDJSON_SSE42
......
......@@ -160,9 +160,10 @@ TEST_F(RapidJson, DocumentAccept) {
}
struct NullStream {
NullStream() : length_(0) {}
void Put(char c) { ++length_; }
size_t length_;
NullStream() /*: length_(0)*/ {}
void Put(char c) { /*++length_;*/ }
void Flush() {}
//size_t length_;
};
TEST_F(RapidJson, Writer_NullStream) {
......
UltraJSON is a fast and extendable JSON encoder and decoder written in pure C
Python bindings are available as the module ujson (through easy_install / pypi):
http://pypi.python.org/pypi/ujson/
Installation instructions:
1. Build and install ujson Python extension (requires root)
Go to <root>/python
Type: python setup.py build install
2. Run tests (as needed)
Type: python tests.py
Same instructions applies for Windows except that step 1) isn't necessary since
a prebuilt static library is included.
Preliminary benchmarks:
64-bit benchmarks Linux
Python 2.6.6 (r266:84292, Sep 15 2010, 16:22:56)
OS Version: Ubuntu 10.10
System Type: x64-based PC
Processor: Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
Total Physical Memory: 4096 MB
Array with 256 utf-8 strings:
ujson encode : 2714.66499 calls/sec
simplejson encode : 1542.63718 calls/sec
cjson encode : 132.23604 calls/sec
ujson decode : 2079.17287 calls/sec
cjson decode : 992.21602 calls/sec
simplejson decode : 278.92061 calls/sec
Medium complex object:
ujson encode : 17849.80356 calls/sec
simplejson encode : 3524.32372 calls/sec
cjson encode : 2967.34656 calls/sec
ujson decode : 11685.87610 calls/sec
cjson decode : 8206.67906 calls/sec
simplejson decode : 6549.99750 calls/sec
Array with 256 strings:
ujson encode : 38543.50303 calls/sec
simplejson encode : 19436.45772 calls/sec
cjson encode : 12392.55614 calls/sec
ujson decode : 27207.33157 calls/sec
cjson decode : 30237.60827 calls/sec
simplejson decode : 25271.93073 calls/sec
Array with 256 doubles:
ujson encode : 6027.45931 calls/sec
simplejson encode : 2915.54871 calls/sec
cjson encode : 3546.88804 calls/sec
ujson decode : 28045.13375 calls/sec
cjson decode : 15066.73209 calls/sec
simplejson decode : 15604.98222 calls/sec
Array with 256 True values:
ujson encode : 187342.39634 calls/sec
simplejson encode : 48972.93887 calls/sec
cjson encode : 67274.93082 calls/sec
ujson decode : 158103.79663 calls/sec
cjson decode : 83237.88990 calls/sec
simplejson decode : 115645.98241 calls/sec
Array with 256 dict{string, int} pairs:
ujson encode : 25301.85690 calls/sec
simplejson encode : 5734.29472 calls/sec
cjson encode : 4447.73411 calls/sec
ujson decode : 16290.72288 calls/sec
cjson decode : 12528.56060 calls/sec
simplejson decode : 10394.23358 calls/sec
Dict with 256 arrays with 256 dict{string, int} pairs:
ujson encode : 87.40865 calls/sec
simplejson encode : 17.07889 calls/sec
cjson encode : 17.25164 calls/sec
ujson decode : 45.94026 calls/sec
cjson decode : 34.60225 calls/sec
simplejson decode : 26.92238 calls/sec
32-bit benchmarks Windows
Python 2.6.6 (r266:84297, Aug 24 2010, 18:46:32) [MSC v.1500 32 bit (Intel)]
OS Version: 6.1.7601 Service Pack 1 Build 7601
System Type: x64-based PC
Processor: Intel(R) Core(TM)2 Quad CPU Q9550 @ 2.83GHz 2.83 GHz
Total Physical Memory: 8191 MB
Array with 256 utf-8 strings:
ujson encode : 1191.98175 calls/sec
simplejson encode : 1013.98279 calls/sec
cjson encode : 1040.66063 calls/sec
ujson decode : 1215.66875 calls/sec
cjson decode : 493.30484 calls/sec
simplejson decode : 269.85512 calls/sec
Medium complex object:
ujson encode : 10307.63723 calls/sec
simplejson encode : 2534.94769 calls/sec
cjson encode : 2047.95118 calls/sec
ujson decode : 7274.10026 calls/sec
cjson decode : 3575.39307 calls/sec
simplejson decode : 3565.51252 calls/sec
Array with 256 strings:
ujson encode : 21348.25210 calls/sec
simplejson encode : 15736.74638 calls/sec
cjson encode : 6371.26334 calls/sec
ujson decode : 26050.25316 calls/sec
cjson decode : 16468.88215 calls/sec
simplejson decode : 21115.75770 calls/sec
Array with 256 doubles:
ujson encode : 26975.49110 calls/sec
simplejson encode : 2046.29746 calls/sec
cjson encode : 2133.56594 calls/sec
ujson decode : 28430.33722 calls/sec
cjson decode : 4114.36400 calls/sec
simplejson decode : 4419.08507 calls/sec
Array with 256 True values:
ujson encode : 89846.12897 calls/sec
simplejson encode : 34288.36862 calls/sec
cjson encode : 47168.35849 calls/sec
ujson decode : 99423.47549 calls/sec
cjson decode : 58795.91460 calls/sec
simplejson decode : 76296.14699 calls/sec
Array with 256 dict{string, int} pairs:
ujson encode : 14776.41614 calls/sec
simplejson encode : 3876.86634 calls/sec
cjson encode : 3050.65343 calls/sec
ujson decode : 12934.39432 calls/sec
cjson decode : 7993.04345 calls/sec
simplejson decode : 7152.09475 calls/sec
Here is the benchmark run from a 32bit CentOS 5.6 (Python 2.4) machine:
Array with 256 utf-8 strings:
ujson encode : 1453.30891 calls/sec
simplejson encode : 658.31181 calls/sec
cjson encode : 62.18416 calls/sec
ujson decode : 1016.58767 calls/sec
cjson decode : 455.28550 calls/sec
simplejson decode : 124.20439 calls/sec
Medium complex object:
ujson encode : 6010.21634 calls/sec
simplejson encode : 1418.77823 calls/sec
cjson encode : 1252.92530 calls/sec
ujson decode : 4637.52630 calls/sec
cjson decode : 3444.13604 calls/sec
simplejson decode : 2166.18641 calls/sec
Array with 256 strings:
ujson encode : 12252.28889 calls/sec
simplejson encode : 9351.67532 calls/sec
cjson encode : 7786.13697 calls/sec
ujson decode : 10951.17394 calls/sec
cjson decode : 15971.02425 calls/sec
simplejson decode : 6796.77480 calls/sec
Array with 256 doubles:
ujson encode : 16300.61218 calls/sec
simplejson encode : 1613.39428 calls/sec
cjson encode : 2035.58937 calls/sec
ujson decode : 17301.00746 calls/sec
cjson decode : 5785.33627 calls/sec
simplejson decode : 6199.49364 calls/sec
Array with 256 True values:
ujson encode : 72618.15350 calls/sec
simplejson encode : 18707.57593 calls/sec
cjson encode : 24150.26201 calls/sec
ujson decode : 53650.94162 calls/sec
cjson decode : 48069.53050 calls/sec
simplejson decode : 47098.40293 calls/sec
Array with 256 dict{string, int} pairs:
ujson encode : 8811.85922 calls/sec
simplejson encode : 2756.91262 calls/sec
cjson encode : 1758.26962 calls/sec
ujson decode : 6490.36358 calls/sec
cjson decode : 6330.77263 calls/sec
simplejson decode : 4161.97048 calls/sec
Dict with 256 arrays with 256 dict{string, int} pairs:
ujson encode : 31.08834 calls/sec
simplejson encode : 10.41434 calls/sec
cjson encode : 6.93790 calls/sec
ujson decode : 19.81373 calls/sec
cjson decode : 20.31727 calls/sec
simplejson decode : 15.05690 calls/sec
See (python/benchmark.py) for further information.
NOTE: These benchmarks are preliminary!
/*
Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by ESN Social Software AB (www.esn.me).
4. Neither the name of the ESN Social Software AB nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Portions of code from:
MODP_ASCII - Ascii transformations (upper/lower, etc)
http://code.google.com/p/stringencoders/
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
*/
/*
Ultra fast JSON encoder and decoder
Developed by Jonas Tarnstrom (jonas@esn.me).
Encoder notes:
------------------
:: Cyclic references ::
Cyclic referenced objects are not detected.
Set JSONObjectEncoder.recursionMax to suitable value or make sure input object
tree doesn't have cyclic references.
*/
#ifndef __ULTRAJSON_H__
#define __ULTRAJSON_H__
#include <stdio.h>
#include <wchar.h>
//#define JSON_DECODE_NUMERIC_AS_DOUBLE
// Don't output any extra whitespaces when encoding
#define JSON_NO_EXTRA_WHITESPACE
// Max decimals to encode double floating point numbers with
#ifndef JSON_DOUBLE_MAX_DECIMALS
#define JSON_DOUBLE_MAX_DECIMALS 9
#endif
// Max recursion depth, default for encoder
#ifndef JSON_MAX_RECURSION_DEPTH
#define JSON_MAX_RECURSION_DEPTH 256
#endif
/*
Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */
#ifndef JSON_MAX_STACK_BUFFER_SIZE
#define JSON_MAX_STACK_BUFFER_SIZE 131072
#endif
#ifdef _WIN32
typedef __int64 JSINT64;
typedef unsigned __int64 JSUINT64;
typedef unsigned __int32 uint32_t;
typedef __int32 JSINT32;
typedef uint32_t JSUINT32;
typedef unsigned __int8 JSUINT8;
typedef unsigned __int16 JSUTF16;
typedef unsigned __int32 JSUTF32;
typedef __int64 JSLONG;
#define EXPORTFUNCTION __declspec(dllexport)
#define FASTCALL_MSVC __fastcall
#define FASTCALL_ATTR
#define INLINE_PREFIX __inline
#else
#include <sys/types.h>
typedef int64_t JSINT64;
typedef u_int64_t JSUINT64;
typedef int32_t JSINT32;
typedef u_int32_t JSUINT32;
#define FASTCALL_MSVC
#define FASTCALL_ATTR __attribute__((fastcall))
#define INLINE_PREFIX inline
typedef u_int32_t uint32_t;
typedef u_int8_t JSUINT8;
typedef u_int16_t JSUTF16;
typedef u_int32_t JSUTF32;
typedef int64_t JSLONG;
#define EXPORTFUNCTION
#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __LITTLE_ENDIAN__
#else
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define __BIG_ENDIAN__
#endif
#endif
#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
#error "Endianess not supported"
#endif
enum JSTYPES
{
JT_NULL, // NULL
JT_TRUE, //boolean true
JT_FALSE, //boolean false
JT_INT, //(JSINT32 (signed 32-bit))
JT_LONG, //(JSINT64 (signed 64-bit))
JT_DOUBLE, //(double)
JT_UTF8, //(char 8-bit)
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
};
typedef void * JSOBJ;
typedef void * JSITER;
typedef struct __JSONTypeContext
{
int type;
void *prv[32];
} JSONTypeContext;
/*
Function pointer declarations, suitable for implementing UltraJSON */
typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, size_t *outLen);
typedef void *(*JSPFN_MALLOC)(size_t size);
typedef void (*JSPFN_FREE)(void *pptr);
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
typedef struct __JSONObjectEncoder
{
void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc);
const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen);
JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc);
double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
/*
Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT)
Implementor should setup iteration state in ti->prv
*/
JSPFN_ITERBEGIN iterBegin;
/*
Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items.
Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this
*/
JSPFN_ITERNEXT iterNext;
/*
Ends the iteration of an iteratable object.
Any iteration state stored in ti->prv can be freed here
*/
JSPFN_ITEREND iterEnd;
/*
Returns a reference to the value object of an iterator
The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
*/
JSPFN_ITERGETVALUE iterGetValue;
/*
Return name of iterator.
The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
*/
JSPFN_ITERGETNAME iterGetName;
/*
Release a value as indicated by setting ti->release = 1 in the previous getValue call.
The ti->prv array should contain the necessary context to release the value
*/
void (*releaseObject)(JSOBJ obj);
/* Library functions
Set to NULL to use STDLIB malloc,realloc,free */
JSPFN_MALLOC malloc;
JSPFN_REALLOC realloc;
JSPFN_FREE free;
/*
Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/
int recursionMax;
/*
Configuration for max decimals of double floating poiunt numbers to encode (0-9) */
int doublePrecision;
/*
If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */
int forceASCII;
/*
Set to an error message if error occured */
const char *errorMsg;
JSOBJ errorObj;
/* Buffer stuff */
char *start;
char *offset;
char *end;
int heap;
int level;
} JSONObjectEncoder;
/*
Encode an object structure into JSON.
Arguments:
obj - An anonymous type representing the object
enc - Function definitions for querying JSOBJ type
buffer - Preallocated buffer to store result in. If NULL function allocates own buffer
cbBuffer - Length of buffer (ignored if buffer is NULL)
Returns:
Encoded JSON object as a null terminated char string.
NOTE:
If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer.
Life cycle of the provided buffer must still be handled by caller.
If the return value doesn't equal the specified buffer caller must release the memory using
JSONObjectEncoder.free or free() as specified when calling this function.
*/
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer);
typedef struct __JSONObjectDecoder
{
JSOBJ (*newString)(wchar_t *start, wchar_t *end);
void (*objectAddKey)(JSOBJ obj, JSOBJ name, JSOBJ value);
void (*arrayAddItem)(JSOBJ obj, JSOBJ value);
JSOBJ (*newTrue)(void);
JSOBJ (*newFalse)(void);
JSOBJ (*newNull)(void);
JSOBJ (*newObject)(void);
JSOBJ (*newArray)(void);
JSOBJ (*newInt)(JSINT32 value);
JSOBJ (*newLong)(JSINT64 value);
JSOBJ (*newDouble)(double value);
void (*releaseObject)(JSOBJ obj);
JSPFN_MALLOC malloc;
JSPFN_FREE free;
JSPFN_REALLOC realloc;
char *errorStr;
char *errorOffset;
} JSONObjectDecoder;
EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer);
#endif
\ No newline at end of file
/*
Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by ESN Social Software AB (www.esn.me).
4. Neither the name of the ESN Social Software AB nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Portions of code from:
MODP_ASCII - Ascii transformations (upper/lower, etc)
http://code.google.com/p/stringencoders/
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
*/
#include "ultrajson.h"
#include <math.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <wchar.h>
struct DecoderState
{
char *start;
char *end;
wchar_t *escStart;
wchar_t *escEnd;
int escHeap;
int lastType;
JSONObjectDecoder *dec;
};
JSOBJ FASTCALL_MSVC decode_any( struct DecoderState *ds) FASTCALL_ATTR;
typedef JSOBJ (*PFN_DECODER)( struct DecoderState *ds);
#define RETURN_JSOBJ_NULLCHECK(_expr) return(_expr);
double createDouble(double intNeg, double intValue, double frcValue, int frcDecimalCount)
{
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
return (intValue + (frcValue / g_pow10[frcDecimalCount])) * intNeg;
}
static JSOBJ SetError( struct DecoderState *ds, int offset, const char *message)
{
ds->dec->errorOffset = ds->start + offset;
ds->dec->errorStr = (char *) message;
return NULL;
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric ( struct DecoderState *ds)
{
#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
double intNeg = 1;
double intValue;
#else
int intNeg = 1;
JSLONG intValue;
#endif
double expNeg;
int chr;
int decimalCount = 0;
double frcValue = 0.0;
double expValue;
if (*(ds->start) == '-')
{
ds->start ++;
intNeg = -1;
}
// Scan integer part
intValue = 0;
while (1)
{
chr = (int) (unsigned char) *(ds->start);
switch (chr)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
//FIXME: Check for arithemtic overflow here
//PERF: Don't do 64-bit arithmetic here unless we know we have to
#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
intValue = intValue * 10.0 + (double) (chr - 48);
#else
intValue = intValue * 10LL + (JSLONG) (chr - 48);
#endif
ds->start ++;
break;
case '.':
ds->start ++;
goto DECODE_FRACTION;
break;
case 'e':
case 'E':
ds->start ++;
goto DECODE_EXPONENT;
break;
default:
goto BREAK_INT_LOOP;
break;
}
}
BREAK_INT_LOOP:
ds->lastType = JT_INT;
//If input string is LONGLONG_MIN here the value is already negative so we should not flip it
#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
#else
if (intValue < 0)
{
intNeg = 1;
}
#endif
//dbg1 = (intValue * intNeg);
//dbg2 = (JSLONG) dbg1;
#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
if (intValue > (double) INT_MAX || intValue < (double) INT_MIN)
#else
if ( (intValue >> 32))
#endif
{
RETURN_JSOBJ_NULLCHECK(ds->dec->newLong( (JSINT64) (intValue * (JSINT64) intNeg)));
}
else
{
RETURN_JSOBJ_NULLCHECK(ds->dec->newInt( (JSINT32) (intValue * intNeg)));
}
DECODE_FRACTION:
// Scan fraction part
frcValue = 0.0;
while (1)
{
chr = (int) (unsigned char) *(ds->start);
switch (chr)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (decimalCount < JSON_DOUBLE_MAX_DECIMALS)
{
frcValue = frcValue * 10.0 + (double) (chr - 48);
decimalCount ++;
}
ds->start ++;
break;
case 'e':
case 'E':
ds->start ++;
goto DECODE_EXPONENT;
break;
default:
goto BREAK_FRC_LOOP;
}
}
BREAK_FRC_LOOP:
if (intValue < 0)
{
intNeg = 1;
}
//FIXME: Check for arithemtic overflow here
ds->lastType = JT_DOUBLE;
RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue, frcValue, decimalCount)));
DECODE_EXPONENT:
expNeg = 1.0;
if (*(ds->start) == '-')
{
expNeg = -1.0;
ds->start ++;
}
else
if (*(ds->start) == '+')
{
expNeg = +1.0;
ds->start ++;
}
expValue = 0.0;
while (1)
{
chr = (int) (unsigned char) *(ds->start);
switch (chr)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
expValue = expValue * 10.0 + (double) (chr - 48);
ds->start ++;
break;
default:
goto BREAK_EXP_LOOP;
}
}
BREAK_EXP_LOOP:
#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
#else
if (intValue < 0)
{
intNeg = 1;
}
#endif
//FIXME: Check for arithemtic overflow here
ds->lastType = JT_DOUBLE;
RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue , frcValue, decimalCount) * pow(10.0, expValue * expNeg)));
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds)
{
ds->start ++;
if (*(ds->start++) != 'r')
goto SETERROR;
if (*(ds->start++) != 'u')
goto SETERROR;
if (*(ds->start++) != 'e')
goto SETERROR;
ds->lastType = JT_TRUE;
RETURN_JSOBJ_NULLCHECK(ds->dec->newTrue());
SETERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'true'");
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false ( struct DecoderState *ds)
{
ds->start ++;
if (*(ds->start++) != 'a')
goto SETERROR;
if (*(ds->start++) != 'l')
goto SETERROR;
if (*(ds->start++) != 's')
goto SETERROR;
if (*(ds->start++) != 'e')
goto SETERROR;
ds->lastType = JT_FALSE;
RETURN_JSOBJ_NULLCHECK(ds->dec->newFalse());
SETERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'false'");
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null ( struct DecoderState *ds)
{
ds->start ++;
if (*(ds->start++) != 'u')
goto SETERROR;
if (*(ds->start++) != 'l')
goto SETERROR;
if (*(ds->start++) != 'l')
goto SETERROR;
ds->lastType = JT_NULL;
RETURN_JSOBJ_NULLCHECK(ds->dec->newNull());
SETERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'null'");
}
FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds)
{
while (1)
{
switch (*ds->start)
{
case ' ':
case '\t':
case '\r':
case '\n':
ds->start ++;
break;
default:
return;
}
}
}
enum DECODESTRINGSTATE
{
DS_ISNULL = 0x32,
DS_ISQUOTE,
DS_ISESCAPE,
DS_UTFLENERROR,
};
static const JSUINT8 g_decoderLookup[256] =
{
/* 0x00 */ DS_ISNULL, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x20 */ 1, 1, DS_ISQUOTE, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, DS_ISESCAPE, 1, 1, 1,
/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR,
};
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds)
{
JSUTF16 sur[2] = { 0 };
int iSur = 0;
int index;
wchar_t *escOffset;
size_t escLen = (ds->escEnd - ds->escStart);
JSUINT8 *inputOffset;
JSUINT8 oct;
JSUTF32 ucs;
ds->lastType = JT_INVALID;
ds->start ++;
if ( (ds->end - ds->start) > escLen)
{
size_t newSize = (ds->end - ds->start);
if (ds->escHeap)
{
ds->escStart = (wchar_t *) ds->dec->realloc (ds->escStart, newSize * sizeof(wchar_t));
}
else
{
wchar_t *oldStart = ds->escStart;
ds->escHeap = 1;
ds->escStart = (wchar_t *) ds->dec->malloc (newSize * sizeof(wchar_t));
memcpy (ds->escStart, oldStart, escLen * sizeof(wchar_t));
}
ds->escEnd = ds->escStart + newSize;
}
escOffset = ds->escStart;
//inputOffset = ds->start;
inputOffset = (JSUINT8*)ds->start; // miloyip
while(1)
{
switch (g_decoderLookup[(JSUINT8)(*inputOffset)])
{
case DS_ISNULL:
return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'");
case DS_ISQUOTE:
ds->lastType = JT_UTF8;
inputOffset ++;
ds->start += ( (char *) inputOffset - (ds->start));
RETURN_JSOBJ_NULLCHECK(ds->dec->newString(ds->escStart, escOffset));
case DS_UTFLENERROR:
return SetError (ds, -1, "Invalid UTF-8 sequence length when decoding 'string'");
case DS_ISESCAPE:
inputOffset ++;
switch (*inputOffset)
{
case '\\': *(escOffset++) = L'\\'; inputOffset++; continue;
case '\"': *(escOffset++) = L'\"'; inputOffset++; continue;
case '/': *(escOffset++) = L'/'; inputOffset++; continue;
case 'b': *(escOffset++) = L'\b'; inputOffset++; continue;
case 'f': *(escOffset++) = L'\f'; inputOffset++; continue;
case 'n': *(escOffset++) = L'\n'; inputOffset++; continue;
case 'r': *(escOffset++) = L'\r'; inputOffset++; continue;
case 't': *(escOffset++) = L'\t'; inputOffset++; continue;
case 'u':
{
int index;
inputOffset ++;
for (index = 0; index < 4; index ++)
{
switch (*inputOffset)
{
case '\0': return SetError (ds, -1, "Unterminated unicode escape sequence when decoding 'string'");
default: return SetError (ds, -1, "Unexpected character in unicode escape sequence when decoding 'string'");
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0');
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a');
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A');
break;
}
inputOffset ++;
}
if (iSur == 0)
{
if((sur[iSur] & 0xfc00) == 0xd800)
{
// First of a surrogate pair, continue parsing
iSur ++;
break;
}
(*escOffset++) = (wchar_t) sur[iSur];
iSur = 0;
}
else
{
// Decode pair
if ((sur[1] & 0xfc00) != 0xdc00)
{
return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'");
}
#if WCHAR_MAX == 0xffff
(*escOffset++) = (wchar_t) sur[0];
(*escOffset++) = (wchar_t) sur[1];
#else
(*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00));
#endif
iSur = 0;
}
break;
}
case '\0': return SetError(ds, -1, "Unterminated escape sequence when decoding 'string'");
default: return SetError(ds, -1, "Unrecognized escape sequence when decoding 'string'");
}
break;
case 1:
*(escOffset++) = (wchar_t) (*inputOffset++);
break;
case 2:
{
ucs = (*inputOffset++) & 0x1f;
ucs <<= 6;
if (((*inputOffset) & 0x80) != 0x80)
{
return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
}
ucs |= (*inputOffset++) & 0x3f;
if (ucs < 0x80) return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'");
*(escOffset++) = (wchar_t) ucs;
break;
}
case 3:
{
JSUTF32 ucs = 0;
ucs |= (*inputOffset++) & 0x0f;
for (index = 0; index < 2; index ++)
{
ucs <<= 6;
oct = (*inputOffset++);
if ((oct & 0x80) != 0x80)
{
return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
}
ucs |= oct & 0x3f;
}
if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string");
*(escOffset++) = (wchar_t) ucs;
break;
}
case 4:
{
JSUTF32 ucs = 0;
ucs |= (*inputOffset++) & 0x07;
for (index = 0; index < 3; index ++)
{
ucs <<= 6;
oct = (*inputOffset++);
if ((oct & 0x80) != 0x80)
{
return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
}
ucs |= oct & 0x3f;
}
if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'");
#if WCHAR_MAX == 0xffff
if (ucs >= 0x10000)
{
ucs -= 0x10000;
*(escOffset++) = (ucs >> 10) + 0xd800;
*(escOffset++) = (ucs & 0x3ff) + 0xdc00;
}
else
{
*(escOffset++) = (wchar_t) ucs;
}
#else
*(escOffset++) = (wchar_t) ucs;
#endif
break;
}
}
}
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array( struct DecoderState *ds)
{
JSOBJ itemValue;
JSOBJ newObj = ds->dec->newArray();
ds->lastType = JT_INVALID;
ds->start ++;
while (1)//(*ds->start) != '\0')
{
SkipWhitespace(ds);
if ((*ds->start) == ']')
{
*ds->start ++;
return newObj;
}
itemValue = decode_any(ds);
if (itemValue == NULL)
{
ds->dec->releaseObject(newObj);
return NULL;
}
ds->dec->arrayAddItem (newObj, itemValue);
SkipWhitespace(ds);
switch (*(ds->start++))
{
case ']':
return newObj;
case ',':
break;
default:
ds->dec->releaseObject(newObj);
return SetError(ds, -1, "Unexpected character in found when decoding array value");
}
}
ds->dec->releaseObject(newObj);
return SetError(ds, -1, "Unmatched ']' when decoding 'array'");
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds)
{
JSOBJ itemName;
JSOBJ itemValue;
JSOBJ newObj = ds->dec->newObject();
ds->start ++;
while (1)
{
SkipWhitespace(ds);
if ((*ds->start) == '}')
{
ds->start ++;
return newObj;
}
ds->lastType = JT_INVALID;
itemName = decode_any(ds);
if (itemName == NULL)
{
ds->dec->releaseObject(newObj);
return NULL;
}
if (ds->lastType != JT_UTF8)
{
ds->dec->releaseObject(newObj);
ds->dec->releaseObject(itemName);
return SetError(ds, -1, "Key name of object must be 'string' when decoding 'object'");
}
SkipWhitespace(ds);
if (*(ds->start++) != ':')
{
ds->dec->releaseObject(newObj);
ds->dec->releaseObject(itemName);
return SetError(ds, -1, "No ':' found when decoding object value");
}
SkipWhitespace(ds);
itemValue = decode_any(ds);
if (itemValue == NULL)
{
ds->dec->releaseObject(newObj);
ds->dec->releaseObject(itemName);
return NULL;
}
ds->dec->objectAddKey (newObj, itemName, itemValue);
SkipWhitespace(ds);
switch (*(ds->start++))
{
case '}':
return newObj;
case ',':
break;
default:
ds->dec->releaseObject(newObj);
return SetError(ds, -1, "Unexpected character in found when decoding object value");
}
}
ds->dec->releaseObject(newObj);
return SetError(ds, -1, "Unmatched '}' when decoding object");
}
FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds)
{
while (1)
{
switch (*ds->start)
{
case '\"':
return decode_string (ds);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
return decode_numeric (ds);
case '[': return decode_array (ds);
case '{': return decode_object (ds);
case 't': return decode_true (ds);
case 'f': return decode_false (ds);
case 'n': return decode_null (ds);
case ' ':
case '\t':
case '\r':
case '\n':
// White space
ds->start ++;
break;
default:
return SetError(ds, -1, "Expected object or value");
}
}
}
JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer)
{
/*
FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */
struct DecoderState ds;
wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))];
JSOBJ ret;
ds.start = (char *) buffer;
ds.end = ds.start + cbBuffer;
ds.escStart = escBuffer;
ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t));
ds.escHeap = 0;
ds.dec = dec;
ds.dec->errorStr = NULL;
ds.dec->errorOffset = NULL;
ds.dec = dec;
ret = decode_any (&ds);
if (ds.escHeap)
{
dec->free(ds.escStart);
}
return ret;
}
/*
Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by ESN Social Software AB (www.esn.me).
4. Neither the name of the ESN Social Software AB nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Portions of code from:
MODP_ASCII - Ascii transformations (upper/lower, etc)
http://code.google.com/p/stringencoders/
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
*/
#include "ultrajson.h"
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
static const char g_hexChars[] = "0123456789abcdef";
static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/";
/*
FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands.
Needs a cleanup and more documentation */
/*
Table for pure ascii output escaping all characters above 127 to \uXXXX */
static const JSUINT8 g_asciiOutputTable[256] =
{
/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30,
/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/*24*/,
/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1,
/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message)
{
enc->errorMsg = message;
enc->errorObj = obj;
}
/*
FIXME: Keep track of how big these get across several encoder calls and try to make an estimate
That way we won't run our head into the wall each call */
void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded)
{
size_t curSize = enc->end - enc->start;
size_t newSize = curSize * 2;
size_t offset = enc->offset - enc->start;
while (newSize < curSize + cbNeeded)
{
newSize *= 2;
}
if (enc->heap)
{
enc->start = (char *) enc->realloc (enc->start, newSize);
}
else
{
char *oldStart = enc->start;
enc->heap = 1;
enc->start = (char *) enc->malloc (newSize);
memcpy (enc->start, oldStart, offset);
}
enc->offset = enc->start + offset;
enc->end = enc->start + newSize;
}
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value)
{
*(outputOffset++) = g_hexChars[(value & 0xf000) >> 12];
*(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8];
*(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4];
*(outputOffset++) = g_hexChars[(value & 0x000f) >> 0];
}
int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
{
char *of = (char *) enc->offset;
while (1)
{
switch (*io)
{
case 0x00:
enc->offset += (of - enc->offset);
return TRUE;
case '\"': (*of++) = '\\'; (*of++) = '\"'; break;
case '\\': (*of++) = '\\'; (*of++) = '\\'; break;
//case '/': (*of++) = '\\'; (*of++) = '/'; break;
case '\b': (*of++) = '\\'; (*of++) = 'b'; break;
case '\f': (*of++) = '\\'; (*of++) = 'f'; break;
case '\n': (*of++) = '\\'; (*of++) = 'n'; break;
case '\r': (*of++) = '\\'; (*of++) = 'r'; break;
case '\t': (*of++) = '\\'; (*of++) = 't'; break;
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x0b:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
*(of++) = '\\';
*(of++) = 'u';
*(of++) = '0';
*(of++) = '0';
*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
break;
default: (*of++) = (*io); break;
}
*io++;
}
return FALSE;
}
/*
FIXME:
This code only works with Little and Big Endian
FIXME: The JSON spec says escape "/" but non of the others do and we don't
want to be left alone doing it so we don't :)
*/
int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
{
JSUTF32 ucs;
char *of = (char *) enc->offset;
while (1)
{
//JSUINT8 chr = (unsigned char) *io;
JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io];
switch (utflen)
{
case 0:
{
enc->offset += (of - enc->offset);
return TRUE;
}
case 1:
{
*(of++)= (*io++);
continue;
}
case 2:
{
JSUTF32 in;
if (io + 1 > end)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
return FALSE;
}
in = *((JSUTF16 *) io);
#ifdef __LITTLE_ENDIAN__
ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f);
#else
ucs = ((in & 0x1f00) >> 2) | (in & 0x3f);
#endif
if (ucs < 0x80)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Overlong 2 byte UTF-8 sequence detected when encoding string");
return FALSE;
}
io += 2;
break;
}
case 3:
{
JSUTF32 in;
if (io + 2 > end)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
return FALSE;
}
#ifdef __LITTLE_ENDIAN__
in = *((JSUTF16 *) io);
in |= *((JSUINT8 *) io + 2) << 16;
ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | ((in & 0x3f0000) >> 16);
#else
in = *((JSUTF16 *) io) << 8;
in |= *((JSUINT8 *) io + 2);
ucs = ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f);
#endif
if (ucs < 0x800)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Overlong 3 byte UTF-8 sequence detected when encoding string");
return FALSE;
}
io += 3;
break;
}
case 4:
{
JSUTF32 in;
if (io + 3 > end)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
return FALSE;
}
#ifdef __LITTLE_ENDIAN__
in = *((JSUTF32 *) io);
ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24);
#else
in = *((JSUTF32 *) io);
ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f);
#endif
if (ucs < 0x10000)
{
enc->offset += (of - enc->offset);
SetError (obj, enc, "Overlong 4 byte UTF-8 sequence detected when encoding string");
return FALSE;
}
io += 4;
break;
}
case 5:
case 6:
enc->offset += (of - enc->offset);
SetError (obj, enc, "Unsupported UTF-8 sequence length when encoding string");
return FALSE;
case 30:
// \uXXXX encode
*(of++) = '\\';
*(of++) = 'u';
*(of++) = '0';
*(of++) = '0';
*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
io ++;
continue;
case 10:
case 12:
case 14:
case 16:
case 18:
case 20:
case 22:
//case 24: (enable for / escaping)
*(of++) = *( (char *) (g_escapeChars + utflen + 0));
*(of++) = *( (char *) (g_escapeChars + utflen + 1));
io ++;
continue;
}
/*
If the character is a UTF8 sequence of length > 1 we end up here */
if (ucs >= 0x10000)
{
ucs -= 0x10000;
*(of++) = '\\';
*(of++) = 'u';
Buffer_AppendShortHexUnchecked(of, (ucs >> 10) + 0xd800);
of += 4;
*(of++) = '\\';
*(of++) = 'u';
Buffer_AppendShortHexUnchecked(of, (ucs & 0x3ff) + 0xdc00);
of += 4;
}
else
{
*(of++) = '\\';
*(of++) = 'u';
Buffer_AppendShortHexUnchecked(of, ucs);
of += 4;
}
}
return FALSE;
}
#define Buffer_Reserve(__enc, __len) \
if ((__enc)->offset + (__len) > (__enc)->end) \
{ \
Buffer_Realloc((__enc), (__len));\
} \
#define Buffer_AppendCharUnchecked(__enc, __chr) \
*((__enc)->offset++) = __chr; \
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end)
{
char aux;
while (end > begin)
aux = *end, *end-- = *begin, *begin++ = aux;
}
void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value)
{
char* wstr;
JSUINT32 uvalue = (value < 0) ? -value : value;
wstr = enc->offset;
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
if (value < 0) *wstr++ = '-';
// Reverse string
strreverse(enc->offset,wstr - 1);
enc->offset += (wstr - (enc->offset));
}
void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)
{
char* wstr;
JSUINT64 uvalue = (value < 0) ? -value : value;
wstr = enc->offset;
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (uvalue % 10ULL)); while(uvalue /= 10ULL);
if (value < 0) *wstr++ = '-';
// Reverse string
strreverse(enc->offset,wstr - 1);
enc->offset += (wstr - (enc->offset));
}
int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value)
{
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
int count;
double diff = 0.0;
char* str = enc->offset;
char* wstr = str;
int whole;
double tmp;
uint32_t frac;
int neg;
double pow10;
if (value == HUGE_VAL || value == -HUGE_VAL)
{
SetError (obj, enc, "Invalid Inf value when encoding double");
return FALSE;
}
if (! (value == value))
{
SetError (obj, enc, "Invalid Nan value when encoding double");
return FALSE;
}
/* we'll work in positive values and deal with the
negative sign issue later */
neg = 0;
if (value < 0)
{
neg = 1;
value = -value;
}
pow10 = g_pow10[enc->doublePrecision];
whole = (int) value;
tmp = (value - whole) * pow10;
frac = (uint32_t)(tmp);
diff = tmp - frac;
if (diff > 0.5)
{
++frac;
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
if (frac >= pow10)
{
frac = 0;
++whole;
}
}
else
if (diff == 0.5 && ((frac == 0) || (frac & 1)))
{
/* if halfway, round up if odd, OR
if last digit is 0. That last part is strange */
++frac;
}
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max)
{
enc->offset += sprintf(str, "%e", neg ? -value : value);
return TRUE;
}
if (enc->doublePrecision == 0)
{
diff = value - whole;
if (diff > 0.5)
{
/* greater than 0.5, round up, e.g. 1.6 -> 2 */
++whole;
}
else
if (diff == 0.5 && (whole & 1))
{
/* exactly 0.5 and ODD, then round up */
/* 1.5 -> 2, but 2.5 -> 2 */
++whole;
}
//vvvvvvvvvvvvvvvvvvv Diff from modp_dto2
}
else
if (frac)
{
count = enc->doublePrecision;
// now do fractional part, as an unsigned number
// we know it is not 0 but we can have leading zeros, these
// should be removed
while (!(frac % 10))
{
--count;
frac /= 10;
}
//^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2
// now do fractional part, as an unsigned number
do
{
--count;
*wstr++ = (char)(48 + (frac % 10));
} while (frac /= 10);
// add extra 0s
while (count-- > 0)
{
*wstr++ = '0';
}
// add decimal
*wstr++ = '.';
}
else
{
*wstr++ = '0';
*wstr++ = '.';
}
// do whole part
// Take care of sign
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
if (neg)
{
*wstr++ = '-';
}
strreverse(str, wstr-1);
enc->offset += (wstr - (enc->offset));
return TRUE;
}
/*
FIXME:
Handle integration functions returning NULL here */
/*
FIXME:
Perhaps implement recursion detection */
void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
{
JSONTypeContext tc;
size_t szlen;
if (enc->level > enc->recursionMax)
{
SetError (obj, enc, "Maximum recursion level reached");
return;
}
/*
This reservation must hold
length of _name as encoded worst case +
maxLength of double to string OR maxLength of JSLONG to string
Since input is assumed to be UTF-8 the worst character length is:
4 bytes (of UTF-8) => "\uXXXX\uXXXX" (12 bytes)
*/
Buffer_Reserve(enc, 256 + (((cbName / 4) + 1) * 12));
if (name)
{
Buffer_AppendCharUnchecked(enc, '\"');
if (enc->forceASCII)
{
if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName))
{
return;
}
}
else
{
if (!Buffer_EscapeStringUnvalidated(obj, enc, name, name + cbName))
{
return;
}
}
Buffer_AppendCharUnchecked(enc, '\"');
Buffer_AppendCharUnchecked (enc, ':');
#ifndef JSON_NO_EXTRA_WHITESPACE
Buffer_AppendCharUnchecked (enc, ' ');
#endif
}
enc->beginTypeContext(obj, &tc);
switch (tc.type)
{
case JT_INVALID:
return;
case JT_ARRAY:
{
int count = 0;
JSOBJ iterObj;
enc->iterBegin(obj, &tc);
Buffer_AppendCharUnchecked (enc, '[');
while (enc->iterNext(obj, &tc))
{
if (count > 0)
{
Buffer_AppendCharUnchecked (enc, ',');
#ifndef JSON_NO_EXTRA_WHITESPACE
Buffer_AppendCharUnchecked (buffer, ' ');
#endif
}
iterObj = enc->iterGetValue(obj, &tc);
enc->level ++;
encode (iterObj, enc, NULL, 0);
count ++;
}
enc->iterEnd(obj, &tc);
Buffer_AppendCharUnchecked (enc, ']');
break;
}
case JT_OBJECT:
{
int count = 0;
JSOBJ iterObj;
char *objName;
enc->iterBegin(obj, &tc);
Buffer_AppendCharUnchecked (enc, '{');
while (enc->iterNext(obj, &tc))
{
if (count > 0)
{
Buffer_AppendCharUnchecked (enc, ',');
#ifndef JSON_NO_EXTRA_WHITESPACE
Buffer_AppendCharUnchecked (enc, ' ');
#endif
}
iterObj = enc->iterGetValue(obj, &tc);
objName = enc->iterGetName(obj, &tc, &szlen);
enc->level ++;
encode (iterObj, enc, objName, szlen);
count ++;
}
enc->iterEnd(obj, &tc);
Buffer_AppendCharUnchecked (enc, '}');
break;
}
case JT_LONG:
{
Buffer_AppendLongUnchecked (enc, enc->getLongValue(obj, &tc));
break;
}
case JT_INT:
{
Buffer_AppendIntUnchecked (enc, enc->getIntValue(obj, &tc));
break;
}
case JT_TRUE:
{
Buffer_AppendCharUnchecked (enc, 't');
Buffer_AppendCharUnchecked (enc, 'r');
Buffer_AppendCharUnchecked (enc, 'u');
Buffer_AppendCharUnchecked (enc, 'e');
break;
}
case JT_FALSE:
{
Buffer_AppendCharUnchecked (enc, 'f');
Buffer_AppendCharUnchecked (enc, 'a');
Buffer_AppendCharUnchecked (enc, 'l');
Buffer_AppendCharUnchecked (enc, 's');
Buffer_AppendCharUnchecked (enc, 'e');
break;
}
case JT_NULL:
{
Buffer_AppendCharUnchecked (enc, 'n');
Buffer_AppendCharUnchecked (enc, 'u');
Buffer_AppendCharUnchecked (enc, 'l');
Buffer_AppendCharUnchecked (enc, 'l');
break;
}
case JT_DOUBLE:
{
if (!Buffer_AppendDoubleUnchecked (obj, enc, enc->getDoubleValue(obj, &tc)))
{
enc->endTypeContext(obj, &tc);
enc->level --;
return;
}
break;
}
case JT_UTF8:
{
const char *value = enc->getStringValue(obj, &tc, &szlen);
Buffer_Reserve(enc, ((szlen / 4) + 1) * 12);
Buffer_AppendCharUnchecked (enc, '\"');
if (enc->forceASCII)
{
if (!Buffer_EscapeStringValidated(obj, enc, value, value + szlen))
{
enc->endTypeContext(obj, &tc);
enc->level --;
return;
}
}
else
{
if (!Buffer_EscapeStringUnvalidated(obj, enc, value, value + szlen))
{
enc->endTypeContext(obj, &tc);
enc->level --;
return;
}
}
Buffer_AppendCharUnchecked (enc, '\"');
break;
}
}
enc->endTypeContext(obj, &tc);
enc->level --;
}
char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer)
{
enc->malloc = enc->malloc ? enc->malloc : malloc;
enc->free = enc->free ? enc->free : free;
enc->realloc = enc->realloc ? enc->realloc : realloc;
enc->errorMsg = NULL;
enc->errorObj = NULL;
enc->level = 0;
if (enc->recursionMax < 1)
{
enc->recursionMax = JSON_MAX_RECURSION_DEPTH;
}
if (enc->doublePrecision < 0 ||
enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS)
{
enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS;
}
if (_buffer == NULL)
{
_cbBuffer = 32768;
enc->start = (char *) enc->malloc (_cbBuffer);
enc->heap = 1;
}
else
{
enc->start = _buffer;
enc->heap = 0;
}
enc->end = enc->start + _cbBuffer;
enc->offset = enc->start;
encode (obj, enc, NULL, 0);
Buffer_Reserve(enc, 1);
Buffer_AppendCharUnchecked(enc, '\0');
return enc->start;
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment