From 140756ad30361a0a44d5f9d7ec8db9e8684863fc Mon Sep 17 00:00:00 2001
From: Blagodarenko <blagodarenko@mail.ru>
Date: Sun, 20 Aug 2017 14:48:59 +0300
Subject: [PATCH] miner-burst-1.170820

* Optimization of the algorithm
- Removed support for burstdev pool (poolV2)
- "ShowMsg" parameter was deleted
- "ShowUpdates" parameter was deleted
- "SendBestOnly" parameter was deleted
- "UseFastRcv" parameter was deleted
+ Added statistics file stat.csv (height; baseTarget; deadline)
* bugs fixed
---
 miner.cpp                       |  484 ++++----
 miner.h                         |   17 +-
 miner.vcxproj                   |    2 +-
 mshabal256_avx2.c               |    2 +
 mshabal_avx1.c                  |   50 +-
 rapidjson/allocators.h          |   42 +-
 rapidjson/document.h            | 1081 +++++++++++++----
 rapidjson/encodedstream.h       |   58 +-
 rapidjson/encodings.h           |  233 ++--
 rapidjson/error/en.h            |   21 +-
 rapidjson/error/error.h         |   27 +-
 rapidjson/filereadstream.h      |   13 +-
 rapidjson/filewritestream.h     |   17 +-
 rapidjson/fwd.h                 |  151 +++
 rapidjson/internal/biginteger.h |   14 +-
 rapidjson/internal/diyfp.h      |   23 +-
 rapidjson/internal/dtoa.h       |   50 +-
 rapidjson/internal/ieee754.h    |    3 +-
 rapidjson/internal/regex.h      |  734 +++++++++++
 rapidjson/internal/stack.h      |   68 +-
 rapidjson/internal/strfunc.h    |   32 +-
 rapidjson/internal/strtod.h     |   39 +-
 rapidjson/internal/swap.h       |   46 +
 rapidjson/istreamwrapper.h      |  115 ++
 rapidjson/memorybuffer.h        |    2 +-
 rapidjson/memorystream.h        |   16 +-
 rapidjson/msinttypes/stdint.h   |    8 +-
 rapidjson/ostreamwrapper.h      |   81 ++
 rapidjson/pointer.h             | 1358 +++++++++++++++++++++
 rapidjson/prettywriter.h        |   98 +-
 rapidjson/rapidjson.h           |  314 +++--
 rapidjson/reader.h              | 1333 +++++++++++++++-----
 rapidjson/schema.h              | 2016 +++++++++++++++++++++++++++++++
 rapidjson/stream.h              |  179 +++
 rapidjson/stringbuffer.h        |   30 +-
 rapidjson/writer.h              |  468 +++++--
 36 files changed, 7999 insertions(+), 1226 deletions(-)
 create mode 100644 rapidjson/fwd.h
 create mode 100644 rapidjson/internal/regex.h
 create mode 100644 rapidjson/internal/swap.h
 create mode 100644 rapidjson/istreamwrapper.h
 create mode 100644 rapidjson/ostreamwrapper.h
 create mode 100644 rapidjson/pointer.h
 create mode 100644 rapidjson/schema.h
 create mode 100644 rapidjson/stream.h

diff --git a/miner.cpp b/miner.cpp
index cc24a53..c0360c6 100644
--- a/miner.cpp
+++ b/miner.cpp
@@ -25,19 +25,20 @@ void Log_init(void)
 		ss << "Logs\\" << cur_time.wYear << "-" << cur_time.wMonth << "-" << cur_time.wDay << "_" << cur_time.wHour << "_" << cur_time.wMinute << "_" << cur_time.wSecond << ".log";
 		std::string filename = ss.str();
 		
-		if (fopen_s(&fp_Log, filename.c_str(), "wt") != 0)
+		if ((fp_Log = _fsopen(filename.c_str(), "wt", _SH_DENYNO)) == NULL)
 		{
 			wattron(win_main, COLOR_PAIR(12));
 			wprintw(win_main, "LOG: file openinig error\n", 0);
 			wattroff(win_main, COLOR_PAIR(12));
 			use_log = false;
 		}
+		Log(version);
 	}
 }
 
 void Log(char const *const strLog)
 {
-	if (use_log && fp_Log)
+	if (use_log)
 	{
 		// если строка содержит интер, то добавить время  
 		if (strLog[0] == '\n')
@@ -53,7 +54,7 @@ void Log(char const *const strLog)
 void Log_server(char const *const strLog)
 {
 	size_t len_str = strlen(strLog);
-	if ((len_str> 0) && use_log && fp_Log)
+	if ((len_str> 0) && use_log)
 	{
 		char * Msg_log = (char*)HeapAlloc(hHeap, HEAP_ZERO_MEMORY, len_str * 2 + 1);
 		if (Msg_log == nullptr)	ShowMemErrorExit();
@@ -90,7 +91,7 @@ void Log_server(char const *const strLog)
 
 void Log_llu(unsigned long long const llu_num)
 {
-	if (use_log && fp_Log)
+	if (use_log)
 	{
 		fprintf_s(fp_Log, "%llu", llu_num);
 		fflush(fp_Log);
@@ -99,7 +100,7 @@ void Log_llu(unsigned long long const llu_num)
 
 void Log_u(size_t const u_num)
 {
-	if (use_log && fp_Log)
+	if (use_log)
 	{
 		fprintf_s(fp_Log, "%u", (unsigned)u_num);
 		fflush(fp_Log);
@@ -161,10 +162,7 @@ int load_config(char const *const filename)
 		{
 			Log("\nMode: ");
 			if(strcmp(document["Mode"].GetString(), "solo") == 0) miner_mode = 0;
-			else 
-				if(strcmp(document["Mode"].GetString(), "pool") == 0) miner_mode = 1;
-				else
-					if (strcmp(document["Mode"].GetString(), "poolV2") == 0) miner_mode = 2;
+			else miner_mode = 1;
 			Log_u(miner_mode);
 		}
 
@@ -198,14 +196,6 @@ int load_config(char const *const filename)
 		if(document.HasMember("UseHDDWakeUp") && (document["UseHDDWakeUp"].IsBool())) use_wakeup = document["UseHDDWakeUp"].GetBool();
 		Log_u(use_wakeup);
 
-		Log("\nShowMsg: ");
-		if(document.HasMember("ShowMsg") && (document["ShowMsg"].IsBool()))	show_msg = document["ShowMsg"].GetBool();
-		Log_u(show_msg);
-
-		Log("\nShowUpdates: ");
-		if(document.HasMember("ShowUpdates") && (document["ShowUpdates"].IsBool()))	show_updates = document["ShowUpdates"].GetBool();
-		Log_u(show_updates);
-
 		Log("\nSendInterval: "); 
 		if(document.HasMember("SendInterval") && (document["SendInterval"].IsUint())) send_interval = (size_t)document["SendInterval"].GetUint();
 		Log_u(send_interval);
@@ -214,10 +204,6 @@ int load_config(char const *const filename)
 		if(document.HasMember("UpdateInterval") && (document["UpdateInterval"].IsUint())) update_interval = (size_t)document["UpdateInterval"].GetUint();
 		Log_u(update_interval);
 
-		Log("\nUseFastRcv: ");
-		if(document.HasMember("UseFastRcv") && (document["UseFastRcv"].IsBool())) use_fast_rcv = document["UseFastRcv"].GetBool();
-		Log_u(use_fast_rcv);
-
 		Log("\nDebug: ");
 		if(document.HasMember("Debug") && (document["Debug"].IsBool()))	use_debug = document["Debug"].GetBool();
 		Log_u(use_debug);
@@ -264,10 +250,6 @@ int load_config(char const *const filename)
 		if (document.HasMember("ShowWinner") && (document["ShowWinner"].IsBool()))	show_winner = document["ShowWinner"].GetBool();
 		Log_u(show_winner);
 
-		Log("\nSendBestOnly: ");
-		if (document.HasMember("SendBestOnly") && (document["SendBestOnly"].IsBool())) send_best_only = document["SendBestOnly"].GetBool();
-		Log_u(send_best_only);
-
 		Log("\nTargetDeadline: ");
 		if (document.HasMember("TargetDeadline") && (document["TargetDeadline"].IsInt64()))	my_target_deadline = document["TargetDeadline"].GetUint64();
 		Log_llu(my_target_deadline);
@@ -487,49 +469,113 @@ size_t Get_index_acc(unsigned long long const key)
 	LeaveCriticalSection(&bestsLock);
 	return bests.size() - 1;
 }
+
+////////////////////////
 /*
 void gen_nonce(unsigned long long addr, unsigned long long start_nonce, unsigned long long count) {
 	#define PLOT_SIZE	(4096 * 64)
 	#define HASH_SIZE	32
 	#define HASH_CAP	4096
-	char * final = (char *)calloc(32, 1);
-	char * gendata = (char *)calloc(16 + PLOT_SIZE, 1);
+	#define SCOOP_SIZE	64
+
+	unsigned long long nonce1;
+	unsigned long long nonce2;
+	unsigned long long nonce3;
+	unsigned long long nonce4;
+	//char * final = (char *)calloc(32, 1);
+	char *final1 = new char[32];
+	char *final2 = new char[32];
+	char *final3 = new char[32];
+	char *final4 = new char[32];
+
+	char *gendata1 = new char[16 + PLOT_SIZE];
+	char *gendata2 = new char[16 + PLOT_SIZE];
+	char *gendata3 = new char[16 + PLOT_SIZE];
+	char *gendata4 = new char[16 + PLOT_SIZE];
+
 	char * cache = (char *)calloc(64 * count, 1);
 	char *xv = (char*)&addr;
-	sph_shabal_context x;
-	size_t i;
+	mshabal_context *mx = new mshabal_context[sizeof(mshabal_context)];
+
 	size_t len;
-	//gendata[PLOT_SIZE] = xv[7]; gendata[PLOT_SIZE+1] = xv[6]; gendata[PLOT_SIZE+2] = xv[5]; gendata[PLOT_SIZE+3] = xv[4];
-	//gendata[PLOT_SIZE+4] = xv[3]; gendata[PLOT_SIZE+5] = xv[2]; gendata[PLOT_SIZE+6] = xv[1]; gendata[PLOT_SIZE+7] = xv[0];
-	for (size_t i = 0; i < 8; i++) gendata[PLOT_SIZE+i] = xv[7-i];
-	 
-	for (unsigned long long z = start_nonce; z < (start_nonce + count); z++) {
-		xv = (char*)&z;
-		//gendata[PLOT_SIZE + 8] = xv[7]; gendata[PLOT_SIZE + 9] = xv[6]; gendata[PLOT_SIZE + 10] = xv[5]; gendata[PLOT_SIZE + 11] = xv[4];
-		//gendata[PLOT_SIZE + 12] = xv[3]; gendata[PLOT_SIZE + 13] = xv[2]; gendata[PLOT_SIZE + 14] = xv[1]; gendata[PLOT_SIZE + 15] = xv[0];
-		for (i = 8; i < 16; i++) gendata[PLOT_SIZE + i] = xv[15 - i];
-
-		for (i = PLOT_SIZE; i > 0; i -= HASH_SIZE) {
-			sph_shabal256_init(&x);
+	for (size_t i = 0; i < 8; i++)
+	{
+		gendata1[PLOT_SIZE + i] = xv[7 - i];
+		gendata2[PLOT_SIZE + i] = xv[7 - i];
+		gendata3[PLOT_SIZE + i] = xv[7 - i];
+		gendata4[PLOT_SIZE + i] = xv[7 - i];
+	}
+
+	for (unsigned long long z = start_nonce; z < (start_nonce + count); z+=4) {
+//		xv = (char*)&z;
+//		for (i = 8; i < 16; i++) gendata[PLOT_SIZE + i] = xv[15 - i];
+
+		nonce1 = z + 0;
+		nonce2 = z + 1;
+		nonce3 = z + 2;
+		nonce4 = z + 3;
+		char *xv1 = (char*)&nonce1;
+		char *xv2 = (char*)&nonce2;
+		char *xv3 = (char*)&nonce3;
+		char *xv4 = (char*)&nonce4;
+		for (size_t i = 8; i < 16; i++)
+		{
+			gendata1[PLOT_SIZE + i] = xv1[15 - i];
+			gendata2[PLOT_SIZE + i] = xv2[15 - i];
+			gendata3[PLOT_SIZE + i] = xv3[15 - i];
+			gendata4[PLOT_SIZE + i] = xv4[15 - i];
+		}
+
+		for (size_t i = PLOT_SIZE; i > 0; i -= HASH_SIZE)
+		{
+			avx1_mshabal_init(mx, 256);
 			len = PLOT_SIZE + 16 - i;
-			if (len > HASH_CAP)	len = HASH_CAP;
-			sph_shabal256(&x, (const unsigned char*)&gendata[i], len);
-			sph_shabal256_close(&x, &gendata[i - HASH_SIZE]);
+			if (len > HASH_CAP)   len = HASH_CAP;
+			avx1_mshabal(mx, &gendata1[i], &gendata2[i], &gendata3[i], &gendata4[i], len);
+			avx1_mshabal_close(mx, 0, 0, 0, 0, 0, &gendata1[i - HASH_SIZE], &gendata2[i - HASH_SIZE], &gendata3[i - HASH_SIZE], &gendata4[i - HASH_SIZE]);
+
+			// ceil because (4096*32*2-32)/64 == 4095,5 but scoop 4095 is not complete yet
+			//int scoopNumberReady = i / SCOOP_SIZE;
+			//if (i % SCOOP_SIZE)	++scoopNumberReady;
+			//if (scoopNumberReady == scoop) break;
+			if ((i / SCOOP_SIZE == scoop) && (i % SCOOP_SIZE == 0))  break;
+
 		}
 
-		sph_shabal256_init(&x);
-		sph_shabal256(&x, (const unsigned char*)gendata, 16 + PLOT_SIZE);
-		sph_shabal256_close(&x, final);
+
+		//avx1_mshabal_init(mx, 256);
+		//avx1_mshabal(mx, gendata1, gendata2, gendata3, gendata4, 16 + PLOT_SIZE);
+		//avx1_mshabal_close(mx, 0, 0, 0, 0, 0, final1, final2, final3, final4);
 
 		// XOR with final
-		//for(i = 0; i < PLOT_SIZE; i ++)	gendata[i] ^= (final[i % HASH_SIZE]);
-		//for (i = scoop * 64; i < ((scoop + 1) * 64); i++)	gendata[i] ^= (final[i % HASH_SIZE]);
-		gendata[scoop * 64] ^= (final[(scoop * 64) % HASH_SIZE]);
-		memmove(&cache[(z - start_nonce) * 64], &gendata[scoop * 64], 64);
+		for (size_t i = scoop * SCOOP_SIZE; i < scoop * SCOOP_SIZE + SCOOP_SIZE; i++)
+		{
+			gendata1[i] ^= (final1[i % 32]);
+			gendata2[i] ^= (final2[i % 32]);
+			gendata3[i] ^= (final3[i % 32]);
+			gendata4[i] ^= (final4[i % 32]);
+		}
+		gendata1[scoop * 64] ^= (final1[(scoop * 64) % HASH_SIZE]);
+		gendata2[scoop * 64] ^= (final2[(scoop * 64) % HASH_SIZE]);
+		gendata3[scoop * 64] ^= (final3[(scoop * 64) % HASH_SIZE]);
+		gendata4[scoop * 64] ^= (final4[(scoop * 64) % HASH_SIZE]);
+
+		memmove(&cache[(z - start_nonce) * 64], &gendata1[scoop * 64], 64);
+		memmove(&cache[(z - start_nonce + 1) * 64], &gendata2[scoop * 64], 64);
+		memmove(&cache[(z - start_nonce + 2) * 64], &gendata3[scoop * 64], 64);
+		memmove(&cache[(z - start_nonce + 3) * 64], &gendata4[scoop * 64], 64);
 	}
-	free(final);
-	free(gendata);
-	
+	//free(final);
+	//free(gendata);
+	delete[] gendata1;
+	delete[] gendata2;
+	delete[] gendata3;
+	delete[] gendata4;
+	delete[] final1;
+	delete[] final2;
+	delete[] final3;
+	delete[] final4;
+	delete[] mx;
 
 	//acc = Get_index_acc(key);
 	//procscoop_sph(n, size, cache, 0, std::string("generator"));
@@ -539,24 +585,25 @@ void gen_nonce(unsigned long long addr, unsigned long long start_nonce, unsigned
 
 void generator_i(size_t number)
 {
-	unsigned long long start_nonce = 100000000 * (number + 1);// * (local_num + 1);
-	unsigned long long size = 400;
+	unsigned long long start_nonce = 10000000000 * (number + 1);// * (local_num + 1);
+	unsigned long long size = 1000;
 	clock_t start_work_time;
-	//wprintw(win_main, "\ngenerator RUNING !");
+	wprintw(win_main, "\ngenerator RUNING !");
 	while (!stopThreads)
 	{
 		start_work_time = clock();
 		gen_nonce(bests[0].account_id, start_nonce, size);
 		
-		wprintw(win_main, "\n%llu\tnoces/min: %f", start_nonce, (float)((float)size * CLOCKS_PER_SEC * 60 / (float)(clock() - start_work_time)));
+		wprintw(win_main, "\n%llu\tnoces/min: %f   (scoop %llu)", start_nonce, (float)((float)size * CLOCKS_PER_SEC * 60 / (float)(clock() - start_work_time)), scoop);
 		//wrefresh(win_main);
 		
 		start_nonce = start_nonce + size;
 	};
 	return;
 }
-*/
 
+*/
+////////////////////////////
 
 void proxy_i(void)
 {
@@ -647,7 +694,7 @@ void proxy_i(void)
 				RtlSecureZeroMemory(tmp_buffer, buffer_size);
 				iResult = recv(ClientSocket, tmp_buffer, (int)(buffer_size - 1), 0);
 				strcat_s(buffer, buffer_size, tmp_buffer);
-			} while ((iResult > 0) && !use_fast_rcv);
+			} while (iResult > 0);
 
 			Log("\nProxy get info: ");  Log_server(buffer);
 			unsigned long long get_accountId = 0;
@@ -758,7 +805,7 @@ void proxy_i(void)
 						{
 							find[0] = 0;
 							wattron(win_main, COLOR_PAIR(15));
-							wprintw(win_main, "PROXY: %s\n", buffer, 0);
+							wprintw(win_main, "PROXY: %s\n", buffer, 0);//You can crash the miner when the proxy is enabled and you open the address in a browser.  wprintw(win_main, "PROXY: %s\n", "Error", 0);
 							wattroff(win_main, COLOR_PAIR(15));
 						}
 					}
@@ -799,8 +846,7 @@ void send_i(void)
 		for (auto iter = shares.begin(); iter != shares.end();)
 		{
 
-			if (send_best_only) //Гасим шару если она больше текущего targetDeadline, актуально для режима Proxy
-			{
+		//Гасим шару если она больше текущего targetDeadline, актуально для режима Proxy
 				if ((iter->best / baseTarget) > bests[Get_index_acc(iter->account_id)].targetDeadline)
 				{
 					if (use_debug)
@@ -815,7 +861,6 @@ void send_i(void)
 					LeaveCriticalSection(&sharesLock);
 					continue;
 				}
-			}
 
 			RtlSecureZeroMemory(&hints, sizeof(hints));
 			hints.ai_family = AF_INET;
@@ -867,24 +912,10 @@ void send_i(void)
 				{
 					unsigned long long total = total_size / 1024 / 1024 / 1024;
 					for (auto It = satellite_size.begin(); It != satellite_size.end(); ++It) total = total + It->second;
-					bytes = sprintf_s(buffer, buffer_size, "POST /burst?requestType=submitNonce&accountId=%llu&nonce=%llu&deadline=%llu HTTP/1.0\r\nHost: %s:%s\r\nX-Miner: Blago %s\r\nX-Capacity: %llu\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", iter->account_id, iter->nonce, iter->best, nodeaddr.c_str(), nodeport.c_str(), version, total, 0);
-				}
-				if (miner_mode == 2)
-				{
-					char* f1 = (char*)HeapAlloc(hHeap, HEAP_ZERO_MEMORY, MAX_PATH);
-					char* str_len = (char*)HeapAlloc(hHeap, HEAP_ZERO_MEMORY, MAX_PATH);
-					if ((f1 == nullptr) || (str_len == nullptr)) ShowMemErrorExit();
-
-					int len = sprintf_s(f1, MAX_PATH, "%llu:%llu:%llu", iter->account_id, iter->nonce, height);
-					_itoa_s(len, str_len, MAX_PATH - 1, 10);
-
-					bytes = sprintf_s(buffer, buffer_size, "POST /pool/submitWork HTTP/1.0\r\nHost: %s:%s\r\nContent-Type: text/plain;charset=UTF-8\r\nContent-Length: %i\r\n\r\n%s", nodeaddr.c_str(), nodeport.c_str(), len, f1);
-					HeapFree(hHeap, 0, f1);
-					HeapFree(hHeap, 0, str_len);
+					bytes = sprintf_s(buffer, buffer_size, "POST /burst?requestType=submitNonce&accountId=%llu&nonce=%llu&deadline=%llu HTTP/1.0\r\nHost: %s:%s\r\nX-Miner: Blago %s\r\nX-Capacity: %llu\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", iter->account_id, iter->nonce, iter->best, nodeaddr.c_str(), nodeport.c_str(), version, total);
 				}
 
 				// Sending to server
-
 				iResult = send(ConnectSocket, buffer, bytes, 0);
 				if (iResult == SOCKET_ERROR)
 				{
@@ -904,15 +935,12 @@ void send_i(void)
 					wprintw(win_main, "%s [%20llu] sent DL: %15llu %5llud %02llu:%02llu:%02llu\n", tbuffer, iter->account_id, dl, (dl) / (24 * 60 * 60), (dl % (24 * 60 * 60)) / (60 * 60), (dl % (60 * 60)) / 60, dl % 60, 0);
 					wattroff(win_main, COLOR_PAIR(9));
 
-					if (show_msg) wprintw(win_main, "send: %s\n", buffer, 0); // показываем послание
-					Log("\nSender: Sent: "); Log_server(buffer);
-
 					EnterCriticalSection(&sessionsLock);
 					//sessions.push_back({ ConnectSocket, iter->account_id, dl, iter->best, iter->nonce });
 					sessions.push_back({ ConnectSocket, dl, *iter });
 					LeaveCriticalSection(&sessionsLock);
 
-					if (send_best_only) bests[Get_index_acc(iter->account_id)].targetDeadline = dl;
+					bests[Get_index_acc(iter->account_id)].targetDeadline = dl;
 					EnterCriticalSection(&sharesLock);
 					iter = shares.erase(iter);
 					LeaveCriticalSection(&sharesLock);
@@ -944,7 +972,7 @@ void send_i(void)
 				do{
 					iResult = recv(ConnectSocket, &buffer[pos], (int)(buffer_size - pos - 1), 0);
 					if (iResult > 0) pos += (size_t)iResult;
-				} while ((iResult > 0) && !use_fast_rcv);
+				} while (iResult > 0);
 
 				if (iResult == SOCKET_ERROR)
 				{
@@ -961,11 +989,8 @@ void send_i(void)
 				}
 				else //что-то получили от сервера
 				{
-					if (show_msg) wprintw(win_main, "\nReceived: %s\n", buffer, 0);
-					Log("\nSender: Received: "); Log_server(buffer);
 					if (network_quality < 100) network_quality++;
 
-
 					//получили пустую строку, переотправляем дедлайн
 					if (buffer[0] == '\0')
 					{
@@ -1125,7 +1150,8 @@ void procscoop_m_4(unsigned long long const nonce, unsigned long long const n, c
 	char res2[32];
 	char res3[32];
 	unsigned posn;
-	mshabal_context x;
+	mshabal_context x, init_x;
+	avx1_mshabal_init(&init_x, 256);
 
 	for (unsigned long long v = 0; v < n; v += 4)
 	{
@@ -1133,9 +1159,8 @@ void procscoop_m_4(unsigned long long const nonce, unsigned long long const n, c
 		memcpy(&sig1[32], &cache[(v + 1) * 64], 64);
 		memcpy(&sig2[32], &cache[(v + 2) * 64], 64);
 		memcpy(&sig3[32], &cache[(v + 3) * 64], 64);
-
 		
-		avx1_mshabal_init(&x, 256);
+		memcpy(&x, &init_x, sizeof(init_x)); // optimization: avx1_mshabal_init(&x, 256);
 		avx1_mshabal(&x, (const unsigned char*)sig0, (const unsigned char*)sig1, (const unsigned char*)sig2, (const unsigned char*)sig3, 64 + 32);
 		avx1_mshabal_close(&x, 0, 0, 0, 0, 0, res0, res1, res2, res3);
 
@@ -1163,8 +1188,6 @@ void procscoop_m_4(unsigned long long const nonce, unsigned long long const n, c
 
 		if ((*wertung / baseTarget) <= bests[acc].targetDeadline)
 		{
-			if (send_best_only)
-			{
 				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
 				{
 					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v + posn); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
@@ -1185,30 +1208,6 @@ void procscoop_m_4(unsigned long long const nonce, unsigned long long const n, c
 						wattroff(win_main, COLOR_PAIR(2));
 					}
 				}
-			}
-			else
-			{
-				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
-				{
-					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
-					EnterCriticalSection(&bestsLock);
-					bests[acc].best = *wertung;
-					bests[acc].nonce = nonce + v + posn;
-					bests[acc].DL = *wertung / baseTarget;
-					LeaveCriticalSection(&bestsLock);
-				}
-				EnterCriticalSection(&sharesLock);
-				shares.push_back({ file_name, bests[acc].account_id, *wertung, nonce + v + posn });
-				LeaveCriticalSection(&sharesLock);
-				if (use_debug)
-				{
-					char tbuffer[9];
-					_strtime_s(tbuffer);
-					wattron(win_main, COLOR_PAIR(2));
-					wprintw(win_main, "%s [%20llu] found DL:      %9llu\n", tbuffer, bests[acc].account_id, *wertung / baseTarget, 0);
-					wattroff(win_main, COLOR_PAIR(2));
-				}
-			}
 		}
 	}
 }
@@ -1223,10 +1222,17 @@ void procscoop_m256_8(unsigned long long const nonce, unsigned long long const n
 	char sig5[32 + 64];
 	char sig6[32 + 64];
 	char sig7[32 + 64];
+	char res0[32];
+	char res1[32];
+	char res2[32];
+	char res3[32];
+	char res4[32];
+	char res5[32];
+	char res6[32];
+	char res7[32];
 	cache = data;
 	unsigned long long v;
-	char tbuffer[9];
-
+	
 	memmove(sig0, signature, 32);
 	memmove(sig1, signature, 32);
 	memmove(sig2, signature, 32);
@@ -1236,6 +1242,9 @@ void procscoop_m256_8(unsigned long long const nonce, unsigned long long const n
 	memmove(sig6, signature, 32);
 	memmove(sig7, signature, 32);
 
+	mshabal256_context x, init_x;
+	mshabal256_init(&init_x, 256);
+
 	for (v = 0; v<n; v += 8) {
 		memmove(&sig0[32], &cache[(v + 0) * 64], 64);
 		memmove(&sig1[32], &cache[(v + 1) * 64], 64);
@@ -1245,17 +1254,8 @@ void procscoop_m256_8(unsigned long long const nonce, unsigned long long const n
 		memmove(&sig5[32], &cache[(v + 5) * 64], 64);
 		memmove(&sig6[32], &cache[(v + 6) * 64], 64);
 		memmove(&sig7[32], &cache[(v + 7) * 64], 64);
-		char res0[32];
-		char res1[32];
-		char res2[32];
-		char res3[32];
-		char res4[32];
-		char res5[32];
-		char res6[32];
-		char res7[32];
-
-		mshabal256_context x;
-		mshabal256_init(&x, 256);
+
+		memcpy(&x, &init_x, sizeof(init_x)); // optimization: mshabal256_init(&x, 256);
 		mshabal256(&x, (const unsigned char*)sig0, (const unsigned char*)sig1, (const unsigned char*)sig2, (const unsigned char*)sig3, (const unsigned char*)sig4, (const unsigned char*)sig5, (const unsigned char*)sig6, (const unsigned char*)sig7, 64 + 32);
 		mshabal256_close(&x, 0, 0, 0, 0, 0, 0, 0, 0, 0, res0, res1, res2, res3, res4, res5, res6, res7);
 
@@ -1306,8 +1306,6 @@ void procscoop_m256_8(unsigned long long const nonce, unsigned long long const n
 		
 		if ((*wertung / baseTarget) <= bests[acc].targetDeadline)
 		{
-			if (send_best_only)
-			{
 				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
 				{
 					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v + posn); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
@@ -1328,29 +1326,6 @@ void procscoop_m256_8(unsigned long long const nonce, unsigned long long const n
 						wattroff(win_main, COLOR_PAIR(2));
 					}
 				}
-			}
-			else
-			{
-				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
-				{
-					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
-					EnterCriticalSection(&bestsLock);
-					bests[acc].best = *wertung;
-					bests[acc].nonce = nonce + v + posn;
-					bests[acc].DL = *wertung / baseTarget;
-					LeaveCriticalSection(&bestsLock);
-				}
-				EnterCriticalSection(&sharesLock);
-				shares.push_back({ file_name, bests[acc].account_id, *wertung, nonce + v + posn });
-				LeaveCriticalSection(&sharesLock);
-				if (use_debug)
-				{
-					_strtime_s(tbuffer);
-					wattron(win_main, COLOR_PAIR(2));
-					wprintw(win_main, "%s [%20llu] found DL:      %9llu\n", tbuffer, bests[acc].account_id, *wertung / baseTarget, 0);
-					wattroff(win_main, COLOR_PAIR(2));
-				}
-			}
 		}
 	}
 }
@@ -1361,12 +1336,14 @@ void procscoop_sph(const unsigned long long nonce, const unsigned long long n, c
 	cache = data;
 	char res[32];
 	memcpy_s(sig, sizeof(sig), signature, sizeof(char) * 32);
-	sph_shabal_context x;
+	
+	sph_shabal_context x, init_x;
+	sph_shabal256_init(&init_x);
 	for (unsigned long long v = 0; v < n; v++)
 	{
 		memcpy_s(&sig[32], sizeof(sig)-32, &cache[v * 64], sizeof(char)* 64);
 		
-		sph_shabal256_init(&x);
+		memcpy(&x, &init_x, sizeof(init_x)); // optimization: sph_shabal256_init(&x);
 		sph_shabal256(&x, (const unsigned char*)sig, 64 + 32);
 		sph_shabal256_close(&x, res);
 
@@ -1374,8 +1351,6 @@ void procscoop_sph(const unsigned long long nonce, const unsigned long long n, c
 
 		if ((*wertung / baseTarget) <= bests[acc].targetDeadline)
 		{
-			if (send_best_only)
-			{
 				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
 				{
 					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
@@ -1396,30 +1371,6 @@ void procscoop_sph(const unsigned long long nonce, const unsigned long long n, c
 						wattroff(win_main, COLOR_PAIR(2));
 					}
 				}
-			}
-			else
-			{
-				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
-				{
-					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
-					EnterCriticalSection(&bestsLock);
-					bests[acc].best = *wertung;
-					bests[acc].nonce = nonce + v;
-					bests[acc].DL = *wertung / baseTarget;
-					LeaveCriticalSection(&bestsLock);
-				}
-				EnterCriticalSection(&sharesLock);
-				shares.push_back({ file_name, bests[acc].account_id, *wertung, nonce + v });
-				LeaveCriticalSection(&sharesLock);
-				if (use_debug)
-				{
-					char tbuffer[9];
-					_strtime_s(tbuffer);
-					wattron(win_main, COLOR_PAIR(2));
-					wprintw(win_main, "%s [%20llu] found DL:      %9llu\n", tbuffer, bests[acc].account_id, *wertung / baseTarget, 0);
-					wattroff(win_main, COLOR_PAIR(2));
-				}
-			}
 		}
 	}
 }
@@ -1443,8 +1394,6 @@ void procscoop_asm(const unsigned long long nonce, const unsigned long long n, c
 
 		if ((*wertung / baseTarget) <= bests[acc].targetDeadline)
 		{
-			if (send_best_only)
-			{
 				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
 				{
 					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
@@ -1465,30 +1414,6 @@ void procscoop_asm(const unsigned long long nonce, const unsigned long long n, c
 						wattroff(win_main, COLOR_PAIR(2));
 					}
 				}
-			}
-			else
-			{
-				if (bests[acc].nonce == 0 || *wertung < bests[acc].best)
-				{
-					Log("\nfound deadline=");	Log_llu(*wertung / baseTarget); Log(" nonce=");	Log_llu(nonce + v); Log(" for account: "); Log_llu(bests[acc].account_id); Log(" file: "); Log((char*)file_name.c_str());
-					EnterCriticalSection(&bestsLock);
-					bests[acc].best = *wertung;
-					bests[acc].nonce = nonce + v;
-					bests[acc].DL = *wertung / baseTarget;
-					LeaveCriticalSection(&bestsLock);
-				}
-				EnterCriticalSection(&sharesLock);
-				shares.push_back({ file_name, bests[acc].account_id, *wertung, nonce + v });
-				LeaveCriticalSection(&sharesLock);
-				if (use_debug)
-				{
-					char tbuffer[9];
-					_strtime_s(tbuffer);
-					wattron(win_main, COLOR_PAIR(2));
-					wprintw(win_main, "%s [%20llu] found DL:      %9llu\n", tbuffer, bests[acc].account_id, *wertung / baseTarget, 0);
-					wattroff(win_main, COLOR_PAIR(2));
-				}
-			}
 		}
 	}
 }
@@ -1641,7 +1566,23 @@ void work_i(const size_t local_num) {
 				if (i + cache_size_local > stagger)
 				{
 					cache_size_local = stagger - i;  // остаток
-					//wprintw(win_main, "%llu\n", cache_size_local);
+					#ifdef __AVX2__
+					if (cache_size_local < 8)
+					{
+						wattron(win_main, COLOR_PAIR(12));
+						wprintw(win_main, "WARNING: %llu\n", cache_size_local);
+						wattroff(win_main, COLOR_PAIR(12));
+					}
+					#else
+						#ifdef __AVX__
+						if (cache_size_local < 4)
+						{
+						wattron(win_main, COLOR_PAIR(12));
+						wprintw(win_main, "WARNING: %llu\n", cache_size_local);
+						wattroff(win_main, COLOR_PAIR(12));
+						}
+						#endif
+					#endif
 				}
 				bytes = 0;
 				b = 0;
@@ -2030,9 +1971,7 @@ void pollLocal(void) {
 				Log("\n*! GMI: connect function failed with error: "); Log_u(WSAGetLastError());
 			}
 			else {
-				int bytes;
-				if (miner_mode == 2) bytes = sprintf_s(buffer, buffer_size, "GET /pool/getMiningInfo HTTP/1.0\r\nHost: %s:%s\r\nContent-Type: text/plain;charset=UTF-8\r\n\r\n", updateraddr.c_str(), updaterport.c_str());
-				else bytes = sprintf_s(buffer, buffer_size, "POST /burst?requestType=getMiningInfo HTTP/1.0\r\nHost: %s:%s\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", nodeaddr.c_str(), nodeport.c_str());
+				int bytes = sprintf_s(buffer, buffer_size, "POST /burst?requestType=getMiningInfo HTTP/1.0\r\nHost: %s:%s\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", nodeaddr.c_str(), nodeport.c_str());
 				iResult = send(UpdaterSocket, buffer, bytes, 0);
 				if (iResult == SOCKET_ERROR)
 				{
@@ -2040,17 +1979,13 @@ void pollLocal(void) {
 					Log("\n*! GMI: send request failed: "); Log_u(WSAGetLastError());
 				}
 				else{
-					if (show_updates) wprintw(win_main, "Sent: \n%s\n", buffer, 0);
-					Log("\n* GMI: Sent: "); Log_server(buffer);
-
-
 					RtlSecureZeroMemory(buffer, buffer_size);
 					size_t  pos = 0;
 					iResult = 0;
 					do{
 						iResult = recv(UpdaterSocket, &buffer[pos], (int)(buffer_size - pos - 1), 0);
 						if (iResult > 0) pos += (size_t)iResult;
-					} while ((iResult > 0) && !use_fast_rcv);
+					} while (iResult > 0);
 					if (iResult == SOCKET_ERROR)
 					{
 						if (network_quality > 0) network_quality--;
@@ -2059,8 +1994,7 @@ void pollLocal(void) {
 					else {
 						if (network_quality < 100) network_quality++;
 						Log("\n* GMI: Received: "); Log_server(buffer);
-						if (show_updates)  wprintw(win_main, "Received: %s\n", buffer, 0);
-
+						
 						// locate HTTP header
 						char *find = strstr(buffer, "\r\n\r\n");
 						if (find == nullptr)	Log("\n*! GMI: error message from pool");
@@ -2104,6 +2038,100 @@ void pollLocal(void) {
 	HeapFree(hHeap, 0, buffer);
 }
 
+
+void pollLocal2(void) {
+	size_t const buffer_size = 1000;
+	char *buffer = (char*)HeapAlloc(hHeap, HEAP_ZERO_MEMORY, buffer_size);
+	if (buffer == nullptr) ShowMemErrorExit();
+
+	int iResult = 0;
+	SOCKET UpdaterSocket = INVALID_SOCKET;
+	SOCKADDR_STORAGE LocalAddr = { 0 };
+	SOCKADDR_STORAGE RemoteAddr = { 0 };
+	DWORD dwLocalAddr = sizeof(LocalAddr);
+	DWORD dwRemoteAddr = sizeof(RemoteAddr);
+	BOOL bSuccess;
+
+	UpdaterSocket = socket(AF_INET, SOCK_STREAM, 0);
+	timeval  timeout;
+	timeout.tv_sec = 1;
+	timeout.tv_usec = 0;
+
+	bSuccess = WSAConnectByNameA(UpdaterSocket, (LPCSTR)updateraddr.c_str(), (LPCSTR)updaterport.c_str(), &dwLocalAddr, (SOCKADDR*)&LocalAddr, &dwRemoteAddr, (SOCKADDR*)&RemoteAddr, &timeout, NULL);
+	if (!bSuccess) {
+		if (network_quality > 0) network_quality--;
+		Log("\n*! GMI: WsaConnectByName failed with error: "); Log_u(WSAGetLastError());
+		Log(updateraddr.c_str());
+	}
+	else {
+			setsockopt(UpdaterSocket, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0);
+
+			int bytes = sprintf_s(buffer, buffer_size, "POST /burst?requestType=getMiningInfo HTTP/1.0\r\nHost: %s:%s\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", nodeaddr.c_str(), nodeport.c_str());
+				iResult = send(UpdaterSocket, buffer, bytes, 0);
+				if (iResult == SOCKET_ERROR)
+				{
+					if (network_quality > 0) network_quality--;
+					Log("\n*! GMI: send request failed: "); Log_u(WSAGetLastError());
+				}
+				else{
+					RtlSecureZeroMemory(buffer, buffer_size);
+					size_t  pos = 0;
+					iResult = 0;
+					do{
+						iResult = recv(UpdaterSocket, &buffer[pos], (int)(buffer_size - pos - 1), 0);
+						if (iResult > 0) pos += (size_t)iResult;
+					} while (iResult > 0);
+					if (iResult == SOCKET_ERROR)
+					{
+						if (network_quality > 0) network_quality--;
+						Log("\n*! GMI: get mining info failed:: "); Log_u(WSAGetLastError());
+					}
+					else {
+						if (network_quality < 100) network_quality++;
+						Log("\n* GMI: Received: "); Log_server(buffer);
+
+						// locate HTTP header
+						char *find = strstr(buffer, "\r\n\r\n");
+						if (find == nullptr)	Log("\n*! GMI: error message from pool");
+						else {
+							rapidjson::Document gmi;
+							if (gmi.Parse<0>(find).HasParseError()) Log("\n*! GMI: error parsing JSON message from pool");
+							else {
+								if (gmi.IsObject())
+								{
+									if (gmi.HasMember("baseTarget")) {
+										if (gmi["baseTarget"].IsString())	baseTarget = _strtoui64(gmi["baseTarget"].GetString(), 0, 10);
+										else
+											if (gmi["baseTarget"].IsInt64()) baseTarget = gmi["baseTarget"].GetInt64();
+									}
+
+									if (gmi.HasMember("height")) {
+										if (gmi["height"].IsString())	height = _strtoui64(gmi["height"].GetString(), 0, 10);
+										else
+											if (gmi["height"].IsInt64()) height = gmi["height"].GetInt64();
+									}
+
+									if (gmi.HasMember("generationSignature")) {
+										strcpy_s(str_signature, gmi["generationSignature"].GetString());
+										if (xstr2strr(signature, 33, gmi["generationSignature"].GetString()) == 0)	Log("\n*! GMI: Node response: Error decoding generationsignature\n");
+									}
+									if (gmi.HasMember("targetDeadline")) {
+										if (gmi["targetDeadline"].IsString())	targetDeadlineInfo = _strtoui64(gmi["targetDeadline"].GetString(), 0, 10);
+										else
+											if (gmi["targetDeadline"].IsInt64()) targetDeadlineInfo = gmi["targetDeadline"].GetInt64();
+									}
+								}
+							}
+						}
+					}
+				}
+	}
+	closesocket(UpdaterSocket);
+	HeapFree(hHeap, 0, buffer);
+}
+
+
+
 void updater_i(void) {
 	if (updateraddr.length() <= 3) {
 		Log("\nGMI: ERROR in UpdaterAddr");
@@ -2648,7 +2676,7 @@ int main(int argc, char **argv) {
 	double pcFreq = double(li.QuadPart);
 
 	std::thread proxy;
-	//std::vector<std::thread> generator;
+	std::vector<std::thread> generator;
 
 	InitializeCriticalSection(&sessionsLock);
 	InitializeCriticalSection(&bestsLock);
@@ -2656,6 +2684,7 @@ int main(int argc, char **argv) {
 
 	char tbuffer[9];
 	unsigned long long bytesRead = 0;
+	FILE * pFileStat;
 
 	shares.reserve(20);
 	bests.reserve(4);
@@ -3013,9 +3042,9 @@ int main(int argc, char **argv) {
 			wmove(win_progress, 1, 1);
 			wattron(win_progress, COLOR_PAIR(14));
 			if (deadline == 0)
-				wprintw(win_progress, "%3llu%% %6llu GB (%.2f MB/s). no deadline            Network quality: %3u%%", (bytesRead * 4096 * 100 / total_size), (bytesRead / (256 * 1024)), threads_speed, network_quality, 0);
+				wprintw(win_progress, "%3llu%% %6llu GB (%.2f MB/s). no deadline            Connection: %3u%%", (bytesRead * 4096 * 100 / total_size), (bytesRead / (256 * 1024)), threads_speed, network_quality, 0);
 			else
-				wprintw(win_progress, "%3llu%% %6llu GB (%.2f MB/s). Deadline =%10llu   Network quality: %3u%%", (bytesRead * 4096 * 100 / total_size), (bytesRead / (256 * 1024)), threads_speed, deadline, network_quality, 0);
+				wprintw(win_progress, "%3llu%% %6llu GB (%.2f MB/s). Deadline =%10llu   Connection: %3u%%", (bytesRead * 4096 * 100 / total_size), (bytesRead / (256 * 1024)), threads_speed, deadline, network_quality, 0);
 			wattroff(win_progress, COLOR_PAIR(14));
 
 			wrefresh(win_main);
@@ -3037,6 +3066,7 @@ int main(int argc, char **argv) {
 
 		Log("\nInterrupt Sender. ");
 		if (sender.joinable()) sender.join();
+		
 		/*
 		if (can_generate != 0){
 			Log("\nInterrupt Generator. ");
@@ -3046,6 +3076,16 @@ int main(int argc, char **argv) {
 			can_generate = 1;
 		}
 		*/
+
+		
+		fopen_s(&pFileStat, "stat.csv", "a+t");
+		if (pFileStat != nullptr)
+		{
+			fprintf(pFileStat, "%llu;%llu;%llu\n", old_height, old_baseTarget, deadline);
+			fclose(pFileStat);
+		}
+
+
 	}
 
 	if (pass != nullptr) HeapFree(hHeap, 0, pass);
diff --git a/miner.h b/miner.h
index 9869b6c..1af8480 100644
--- a/miner.h
+++ b/miner.h
@@ -19,6 +19,7 @@ using namespace rapidjson;
 
 #pragma comment(lib,"Ws2_32.lib")
 #include <ws2tcpip.h>
+#include <mswsock.h> // Need for SO_UPDATE_CONNECT_CONTEXT
 
 #include "curses.h" 
 //#include "panel.h" 
@@ -52,12 +53,12 @@ HANDLE hHeap;
 
 bool exit_flag = false;
 #ifdef __AVX2__
-	char const *const version = "v1.170603_AVX2";
+	char const *const version = "v1.170820_AVX2";
 #else
 	#ifdef __AVX__
-		char const *const version = "v1.170603_AVX";
+		char const *const version = "v1.170820_AVX";
 	#else
-		char const *const version = "v1.170603";
+		char const *const version = "v1.170820";
 	#endif
 #endif 
 
@@ -91,22 +92,22 @@ volatile int stopThreads = 0;
 size_t miner_mode = 0;				// ����� �������. 0=����, 1=���
 size_t cache_size = 100000;			// ������ ���� ������ ������
 std::vector<std::string> paths_dir; // ����
-bool show_msg = false;				// �������� ������� � �������� � ����������
-bool show_updates = false;			// �������� ������� � �������� � ���������
+//bool show_msg = false;				// �������� ������� � �������� � ����������
+//bool show_updates = false;			// �������� ������� � �������� � ���������
 FILE * fp_Log = nullptr;			// ��������� �� ���-����
 size_t send_interval = 100;			// ����� �������� ����� ����������
 size_t update_interval = 1000;		// ����� �������� ����� ���������
 short win_size_x = 80;
 short win_size_y = 60;
-bool use_fast_rcv = false;
+//bool use_fast_rcv = false;
 bool use_debug = false;
 bool enable_proxy = false;
-bool send_best_only = true;
+//bool send_best_only = true;
 bool use_wakeup = false;
 bool use_log = true;				// ����� ���
 bool use_boost = false;				// ������������ ���������� ��������� ��� �������
 bool show_winner = false;			// ���������� ����������
-//short can_generate = 1;				// 0 - disable; 1 - can start generate; 2 - already run generate
+//short can_generate = 0;				// 0 - disable; 1 - can start generate; 2 - already run generator
 
 
 SYSTEMTIME cur_time;				// ������� �����
diff --git a/miner.vcxproj b/miner.vcxproj
index 674d944..56e15b8 100644
--- a/miner.vcxproj
+++ b/miner.vcxproj
@@ -175,7 +175,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
       <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
-      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
       <BrowseInformation>true</BrowseInformation>
       <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
       <OmitFramePointers>true</OmitFramePointers>
diff --git a/mshabal256_avx2.c b/mshabal256_avx2.c
index 82c66cf..27581ae 100644
--- a/mshabal256_avx2.c
+++ b/mshabal256_avx2.c
@@ -44,6 +44,7 @@ extern "C" {
     const unsigned char *buf6, const unsigned char *buf7,
     size_t num)
   {
+    _mm256_zeroupper();
     union {
       u32 words[64 * MSHABAL256_FACTOR];
       __m256i data[16];
@@ -235,6 +236,7 @@ extern "C" {
       _mm256_storeu_si256((__m256i *)sc->state + j + 28, C[j]);
     }
 
+	_mm256_zeroupper();
 #undef M
   }
 
diff --git a/mshabal_avx1.c b/mshabal_avx1.c
index f1d3b37..aafd899 100644
--- a/mshabal_avx1.c
+++ b/mshabal_avx1.c
@@ -222,7 +222,7 @@ extern "C" {
 		  _mm_storeu_si128((__m128i *)sc->state + j + 12, B[j]);
 		  _mm_storeu_si128((__m128i *)sc->state + j + 28, C[j]);
 	  }
-
+	 // _mm256_zeroupper();
 #undef M
   }
 
@@ -265,53 +265,6 @@ extern "C" {
 	  sc->out_size = out_size;
   }
 
- 
-
-  static const mshabal_u32 A_init_256[] = {
-	  C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
-	  C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
-	  C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
-  };
-
-  static const mshabal_u32 B_init_256[] = {
-	  C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
-	  C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
-	  C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
-	  C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
-  };
-
-  static const mshabal_u32 C_init_256[] = {
-	  C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
-	  C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
-	  C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
-	  C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
-  };
-
-
-  void  avx1_mshabal_init2(mshabal_context *cc, unsigned out_size)
-  {
-	  //for (unsigned u = 0; u < 176; u++)  cc->state[u] = 0;
-
-	  memset(cc->buf0, 0, sizeof cc->buf0);
-	  memset(cc->buf1, 0, sizeof cc->buf1);
-	  memset(cc->buf2, 0, sizeof cc->buf2);
-	  memset(cc->buf3, 0, sizeof cc->buf3);
-
-	  for (unsigned u = 0; u < 176; u = u + 12 + 16 + 16)
-	  {
-		  memcpy(cc->state + u, A_init_256, sizeof(u32) * 12);
-		  memcpy(cc->state + u + 12, B_init_256, sizeof(u32) * 16);
-		  memcpy(cc->state + u + 12 + 16, C_init_256, sizeof(u32) * 16);
-	  }
-
-	  cc->Wlow = 1;
-	  cc->Whigh = 0;
-	  cc->ptr = 0;
-	  //cc->Whigh = cc->Wlow = C32(0xFFFFFFFF);
-	  cc->out_size = out_size;
-  }
-  
-
 
   /* see shabal_small.h */
   void  avx1_mshabal(mshabal_context *sc, const void *data0, const void *data1,
@@ -435,7 +388,6 @@ extern "C" {
 		  out = (u32*)dst3;
 		  for (z = 0; z < out_size_w32; z++)  out[(size_t)z] = sc->state[off + (size_t)(z << 2) + 3];
 	  }
-	  _mm256_zeroupper();
   }
   //#pragma optimize("", on)
 
diff --git a/rapidjson/allocators.h b/rapidjson/allocators.h
index d74a671..655f4a3 100644
--- a/rapidjson/allocators.h
+++ b/rapidjson/allocators.h
@@ -179,9 +179,10 @@ class MemoryPoolAllocator {
 
         size = RAPIDJSON_ALIGN(size);
         if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity)
-            AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size);
+            if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
+                return NULL;
 
-        void *buffer = reinterpret_cast<char *>(chunkHead_ + 1) + chunkHead_->size;
+        void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size;
         chunkHead_->size += size;
         return buffer;
     }
@@ -194,14 +195,16 @@ class MemoryPoolAllocator {
         if (newSize == 0)
             return NULL;
 
+        originalSize = RAPIDJSON_ALIGN(originalSize);
+        newSize = RAPIDJSON_ALIGN(newSize);
+
         // Do not shrink if new size is smaller than original
         if (originalSize >= newSize)
             return originalPtr;
 
         // Simply expand it if it is the last allocation and there is sufficient space
-        if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) {
+        if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) {
             size_t increment = static_cast<size_t>(newSize - originalSize);
-            increment = RAPIDJSON_ALIGN(increment);
             if (chunkHead_->size + increment <= chunkHead_->capacity) {
                 chunkHead_->size += increment;
                 return originalPtr;
@@ -209,11 +212,13 @@ class MemoryPoolAllocator {
         }
 
         // Realloc process: allocate and copy memory, do not free original buffer.
-        void* newBuffer = Malloc(newSize);
-        RAPIDJSON_ASSERT(newBuffer != 0);   // Do not handle out-of-memory explicitly.
-        if (originalSize)
-            std::memcpy(newBuffer, originalPtr, originalSize);
-        return newBuffer;
+        if (void* newBuffer = Malloc(newSize)) {
+            if (originalSize)
+                std::memcpy(newBuffer, originalPtr, originalSize);
+            return newBuffer;
+        }
+        else
+            return NULL;
     }
 
     //! Frees a memory block (concept Allocator)
@@ -227,15 +232,20 @@ class MemoryPoolAllocator {
 
     //! Creates a new chunk.
     /*! \param capacity Capacity of the chunk in bytes.
+        \return true if success.
     */
-    void AddChunk(size_t capacity) {
+    bool AddChunk(size_t capacity) {
         if (!baseAllocator_)
-            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator());
-        ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity));
-        chunk->capacity = capacity;
-        chunk->size = 0;
-        chunk->next = chunkHead_;
-        chunkHead_ =  chunk;
+            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
+        if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) {
+            chunk->capacity = capacity;
+            chunk->size = 0;
+            chunk->next = chunkHead_;
+            chunkHead_ =  chunk;
+            return true;
+        }
+        else
+            return false;
     }
 
     static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity.
diff --git a/rapidjson/document.h b/rapidjson/document.h
index c6acbd9..3133a2f 100644
--- a/rapidjson/document.h
+++ b/rapidjson/document.h
@@ -20,40 +20,37 @@
 #include "reader.h"
 #include "internal/meta.h"
 #include "internal/strfunc.h"
+#include "memorystream.h"
+#include "encodedstream.h"
 #include <new>      // placement new
+#include <limits>
 
-#ifdef _MSC_VER
 RAPIDJSON_DIAG_PUSH
+#ifdef _MSC_VER
 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
-#elif defined(__GNUC__)
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(effc++)
+RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data
+#ifdef _MINWINDEF_       // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma push_macro("min")
+#pragma push_macro("max")
+#undef min
+#undef max
+#endif
 #endif
-
-///////////////////////////////////////////////////////////////////////////////
-// RAPIDJSON_HAS_STDSTRING
-
-#ifndef RAPIDJSON_HAS_STDSTRING
-#ifdef RAPIDJSON_DOXYGEN_RUNNING
-#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation
-#else
-#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default
 #endif
-/*! \def RAPIDJSON_HAS_STDSTRING
-    \ingroup RAPIDJSON_CONFIG
-    \brief Enable RapidJSON support for \c std::string
-
-    By defining this preprocessor symbol to \c 1, several convenience functions for using
-    \ref rapidjson::GenericValue with \c std::string are enabled, especially
-    for construction and comparison.
 
-    \hideinitializer
-*/
-#endif // !defined(RAPIDJSON_HAS_STDSTRING)
+#ifdef __clang__
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
 
-#if RAPIDJSON_HAS_STDSTRING
-#include <string>
-#endif // RAPIDJSON_HAS_STDSTRING
+#ifdef __GNUC__
+RAPIDJSON_DIAG_OFF(effc++)
+#if __GNUC__ >= 6
+RAPIDJSON_DIAG_OFF(terminate) // ignore throwing RAPIDJSON_ASSERT in RAPIDJSON_NOEXCEPT functions
+#endif
+#endif // __GNUC__
 
 #ifndef RAPIDJSON_NOMEMBERITERATORCLASS
 #include <iterator> // std::iterator, std::random_access_iterator_tag
@@ -69,6 +66,9 @@ RAPIDJSON_NAMESPACE_BEGIN
 template <typename Encoding, typename Allocator>
 class GenericValue;
 
+template <typename Encoding, typename Allocator, typename StackAllocator>
+class GenericDocument;
+
 //! Name-value pair in a JSON object value.
 /*!
     This class was internal to GenericValue. It used to be a inner struct.
@@ -155,6 +155,7 @@ class GenericMemberIterator
             Otherwise, the copy constructor is implicitly defined.
     */
     GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {}
+    Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; }
 
     //! @name stepping
     //@{
@@ -257,6 +258,7 @@ struct GenericStringRef {
     typedef CharType Ch; //!< character type of the string
 
     //! Create string reference from \c const character array
+#ifndef __clang__ // -Wdocumentation
     /*!
         This constructor implicitly creates a constant string reference from
         a \c const character array.  It has better performance than
@@ -279,11 +281,13 @@ struct GenericStringRef {
             In such cases, the referenced string should be \b copied to the
             GenericValue instead.
      */
+#endif
     template<SizeType N>
     GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT
         : s(str), length(N-1) {}
 
     //! Explicitly create string reference from \c const character pointer
+#ifndef __clang__ // -Wdocumentation
     /*!
         This constructor can be used to \b explicitly  create a reference to
         a constant string pointer.
@@ -302,18 +306,23 @@ struct GenericStringRef {
             In such cases, the referenced string should be \b copied to the
             GenericValue instead.
      */
+#endif
     explicit GenericStringRef(const CharType* str)
-        : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); }
+        : s(str), length(NotNullStrLen(str)) {}
 
     //! Create constant string reference from pointer and length
+#ifndef __clang__ // -Wdocumentation
     /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
         \param len length of the string, excluding the trailing NULL terminator
 
         \post \ref s == str && \ref length == len
         \note Constant complexity.
      */
+#endif
     GenericStringRef(const CharType* str, SizeType len)
-        : s(str), length(len) { RAPIDJSON_ASSERT(s != NULL); }
+        : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len) { RAPIDJSON_ASSERT(str != 0 || len == 0u); }
+
+    GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {}
 
     //! implicit conversion to plain CharType pointer
     operator const Ch *() const { return s; }
@@ -322,13 +331,24 @@ struct GenericStringRef {
     const SizeType length; //!< length of the string (excluding the trailing NULL terminator)
 
 private:
-    //! Disallow copy-assignment
-    GenericStringRef operator=(const GenericStringRef&);
+    SizeType NotNullStrLen(const CharType* str) {
+        RAPIDJSON_ASSERT(str != 0);
+        return internal::StrLen(str);
+    }
+
+    /// Empty string - used when passing in a NULL pointer
+    static const Ch emptyString[];
+
     //! Disallow construction from non-const array
     template<SizeType N>
     GenericStringRef(CharType (&str)[N]) /* = delete */;
+    //! Copy assignment operator not permitted - immutable type
+    GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */;
 };
 
+template<typename CharType>
+const CharType GenericStringRef<CharType>::emptyString[] = { CharType() };
+
 //! Mark a character pointer as constant string
 /*! Mark a plain character pointer as a "string literal".  This function
     can be used to avoid copying a character string to be referenced as a
@@ -343,7 +363,7 @@ struct GenericStringRef {
 */
 template<typename CharType>
 inline GenericStringRef<CharType> StringRef(const CharType* str) {
-    return GenericStringRef<CharType>(str, internal::StrLen(str));
+    return GenericStringRef<CharType>(str);
 }
 
 //! Mark a character pointer as constant string
@@ -401,6 +421,127 @@ template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {};
 
 } // namespace internal
 
+///////////////////////////////////////////////////////////////////////////////
+// TypeHelper
+
+namespace internal {
+
+template <typename ValueType, typename T>
+struct TypeHelper {};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, bool> {
+    static bool Is(const ValueType& v) { return v.IsBool(); }
+    static bool Get(const ValueType& v) { return v.GetBool(); }
+    static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); }
+    static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, int> {
+    static bool Is(const ValueType& v) { return v.IsInt(); }
+    static int Get(const ValueType& v) { return v.GetInt(); }
+    static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); }
+    static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, unsigned> {
+    static bool Is(const ValueType& v) { return v.IsUint(); }
+    static unsigned Get(const ValueType& v) { return v.GetUint(); }
+    static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); }
+    static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, int64_t> {
+    static bool Is(const ValueType& v) { return v.IsInt64(); }
+    static int64_t Get(const ValueType& v) { return v.GetInt64(); }
+    static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); }
+    static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, uint64_t> {
+    static bool Is(const ValueType& v) { return v.IsUint64(); }
+    static uint64_t Get(const ValueType& v) { return v.GetUint64(); }
+    static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); }
+    static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, double> {
+    static bool Is(const ValueType& v) { return v.IsDouble(); }
+    static double Get(const ValueType& v) { return v.GetDouble(); }
+    static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); }
+    static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, float> {
+    static bool Is(const ValueType& v) { return v.IsFloat(); }
+    static float Get(const ValueType& v) { return v.GetFloat(); }
+    static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); }
+    static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, const typename ValueType::Ch*> {
+    typedef const typename ValueType::Ch* StringType;
+    static bool Is(const ValueType& v) { return v.IsString(); }
+    static StringType Get(const ValueType& v) { return v.GetString(); }
+    static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); }
+    static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+
+#if RAPIDJSON_HAS_STDSTRING
+template<typename ValueType> 
+struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch> > {
+    typedef std::basic_string<typename ValueType::Ch> StringType;
+    static bool Is(const ValueType& v) { return v.IsString(); }
+    static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); }
+    static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+#endif
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::Array> {
+    typedef typename ValueType::Array ArrayType;
+    static bool Is(const ValueType& v) { return v.IsArray(); }
+    static ArrayType Get(ValueType& v) { return v.GetArray(); }
+    static ValueType& Set(ValueType& v, ArrayType data) { return v = data; }
+    static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::ConstArray> {
+    typedef typename ValueType::ConstArray ArrayType;
+    static bool Is(const ValueType& v) { return v.IsArray(); }
+    static ArrayType Get(const ValueType& v) { return v.GetArray(); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::Object> {
+    typedef typename ValueType::Object ObjectType;
+    static bool Is(const ValueType& v) { return v.IsObject(); }
+    static ObjectType Get(ValueType& v) { return v.GetObject(); }
+    static ValueType& Set(ValueType& v, ObjectType data) { return v = data; }
+    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { return v = data; }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::ConstObject> {
+    typedef typename ValueType::ConstObject ObjectType;
+    static bool Is(const ValueType& v) { return v.IsObject(); }
+    static ObjectType Get(const ValueType& v) { return v.GetObject(); }
+};
+
+} // namespace internal
+
+// Forward declarations
+template <bool, typename> class GenericArray;
+template <bool, typename> class GenericObject;
+
 ///////////////////////////////////////////////////////////////////////////////
 // GenericValue
 
@@ -428,17 +569,21 @@ class GenericValue {
     typedef GenericValue* ValueIterator;            //!< Value iterator for iterating in array.
     typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array.
     typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of itself.
+    typedef GenericArray<false, ValueType> Array;
+    typedef GenericArray<true, ValueType> ConstArray;
+    typedef GenericObject<false, ValueType> Object;
+    typedef GenericObject<true, ValueType> ConstObject;
 
     //!@name Constructors and destructor.
     //@{
 
     //! Default constructor creates a null value.
-    GenericValue() RAPIDJSON_NOEXCEPT : data_(), flags_(kNullFlag) {}
+    GenericValue() RAPIDJSON_NOEXCEPT : data_() { data_.f.flags = kNullFlag; }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
-    GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_), flags_(rhs.flags_) {
-        rhs.flags_ = kNullFlag; // give up contents
+    GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) {
+        rhs.data_.f.flags = kNullFlag; // give up contents
     }
 #endif
 
@@ -446,6 +591,16 @@ class GenericValue {
     //! Copy constructor is not permitted.
     GenericValue(const GenericValue& rhs);
 
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Moving from a GenericDocument is not permitted.
+    template <typename StackAllocator>
+    GenericValue(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+
+    //! Move assignment from a GenericDocument is not permitted.
+    template <typename StackAllocator>
+    GenericValue& operator=(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+#endif
+
 public:
 
     //! Constructor with JSON value type.
@@ -453,13 +608,13 @@ class GenericValue {
         \param type Type of the value.
         \note Default content for number is zero.
     */
-    explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_(), flags_() {
-        static const unsigned defaultFlags[7] = {
+    explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() {
+        static const uint16_t defaultFlags[7] = {
             kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag,
             kNumberAnyFlag
         };
         RAPIDJSON_ASSERT(type <= kNumberType);
-        flags_ = defaultFlags[type];
+        data_.f.flags = defaultFlags[type];
 
         // Use ShortString to store empty string.
         if (type == kStringType)
@@ -471,10 +626,50 @@ class GenericValue {
         \tparam SourceAllocator allocator of \c rhs
         \param rhs Value to copy from (read-only)
         \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator().
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
         \see CopyFrom()
     */
-    template< typename SourceAllocator >
-    GenericValue(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator & allocator);
+    template <typename SourceAllocator>
+    GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+        switch (rhs.GetType()) {
+        case kObjectType: {
+                SizeType count = rhs.data_.o.size;
+                Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+                const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+                for (SizeType i = 0; i < count; i++) {
+                    new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
+                    new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
+                }
+                data_.f.flags = kObjectFlag;
+                data_.o.size = data_.o.capacity = count;
+                SetMembersPointer(lm);
+            }
+            break;
+        case kArrayType: {
+                SizeType count = rhs.data_.a.size;
+                GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+                const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer();
+                for (SizeType i = 0; i < count; i++)
+                    new (&le[i]) GenericValue(re[i], allocator, copyConstStrings);
+                data_.f.flags = kArrayFlag;
+                data_.a.size = data_.a.capacity = count;
+                SetElementsPointer(le);
+            }
+            break;
+        case kStringType:
+            if (rhs.data_.f.flags == kConstStringFlag && !copyConstStrings) {
+                data_.f.flags = rhs.data_.f.flags;
+                data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            }
+            else
+                SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
+            break;
+        default:
+            data_.f.flags = rhs.data_.f.flags;
+            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            break;
+        }
+    }
 
     //! Constructor for boolean value.
     /*! \param b Boolean value
@@ -484,96 +679,125 @@ class GenericValue {
      */
 #ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen
     template <typename T>
-    explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<T,bool>))) RAPIDJSON_NOEXCEPT
+    explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>))) RAPIDJSON_NOEXCEPT  // See #472
 #else
     explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT
 #endif
-        : data_(), flags_(b ? kTrueFlag : kFalseFlag) {
+        : data_() {
             // safe-guard against failing SFINAE
             RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool,T>::Value));
+            data_.f.flags = b ? kTrueFlag : kFalseFlag;
     }
 
     //! Constructor for int value.
-    explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberIntFlag) {
+    explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() {
         data_.n.i64 = i;
-        if (i >= 0)
-            flags_ |= kUintFlag | kUint64Flag;
+        data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag;
     }
 
     //! Constructor for unsigned value.
-    explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberUintFlag) {
+    explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() {
         data_.n.u64 = u; 
-        if (!(u & 0x80000000))
-            flags_ |= kIntFlag | kInt64Flag;
+        data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag);
     }
 
     //! Constructor for int64_t value.
-    explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberInt64Flag) {
+    explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() {
         data_.n.i64 = i64;
+        data_.f.flags = kNumberInt64Flag;
         if (i64 >= 0) {
-            flags_ |= kNumberUint64Flag;
+            data_.f.flags |= kNumberUint64Flag;
             if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
-                flags_ |= kUintFlag;
+                data_.f.flags |= kUintFlag;
             if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
-                flags_ |= kIntFlag;
+                data_.f.flags |= kIntFlag;
         }
         else if (i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
-            flags_ |= kIntFlag;
+            data_.f.flags |= kIntFlag;
     }
 
     //! Constructor for uint64_t value.
-    explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberUint64Flag) {
+    explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() {
         data_.n.u64 = u64;
+        data_.f.flags = kNumberUint64Flag;
         if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000)))
-            flags_ |= kInt64Flag;
+            data_.f.flags |= kInt64Flag;
         if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
-            flags_ |= kUintFlag;
+            data_.f.flags |= kUintFlag;
         if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
-            flags_ |= kIntFlag;
+            data_.f.flags |= kIntFlag;
     }
 
     //! Constructor for double value.
-    explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberDoubleFlag) { data_.n.d = d; }
+    explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; }
+
+    //! Constructor for float value.
+    explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast<double>(f); data_.f.flags = kNumberDoubleFlag; }
 
     //! Constructor for constant string (i.e. do not make a copy of string)
-    GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_(), flags_() { SetStringRaw(StringRef(s, length)); }
+    GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); }
 
     //! Constructor for constant string (i.e. do not make a copy of string)
-    explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_(), flags_() { SetStringRaw(s); }
+    explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); }
 
     //! Constructor for copy-string (i.e. do make a copy of string)
-    GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s, length), allocator); }
+    GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); }
 
     //! Constructor for copy-string (i.e. do make a copy of string)
-    GenericValue(const Ch*s, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s), allocator); }
+    GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Constructor for copy-string from a string object (i.e. do make a copy of string)
     /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
      */
-    GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s), allocator); }
+    GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
 #endif
 
+    //! Constructor for Array.
+    /*!
+        \param a An array obtained by \c GetArray().
+        \note \c Array is always pass-by-value.
+        \note the source array is moved into this value and the sourec array becomes empty.
+    */
+    GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) {
+        a.value_.data_ = Data();
+        a.value_.data_.f.flags = kArrayFlag;
+    }
+
+    //! Constructor for Object.
+    /*!
+        \param o An object obtained by \c GetObject().
+        \note \c Object is always pass-by-value.
+        \note the source object is moved into this value and the sourec object becomes empty.
+    */
+    GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) {
+        o.value_.data_ = Data();
+        o.value_.data_.f.flags = kObjectFlag;
+    }
+
     //! Destructor.
     /*! Need to destruct elements of array, members of object, or copy-string.
     */
     ~GenericValue() {
         if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
-            switch(flags_) {
+            switch(data_.f.flags) {
             case kArrayFlag:
-                for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v)
-                    v->~GenericValue();
-                Allocator::Free(data_.a.elements);
+                {
+                    GenericValue* e = GetElementsPointer();
+                    for (GenericValue* v = e; v != e + data_.a.size; ++v)
+                        v->~GenericValue();
+                    Allocator::Free(e);
+                }
                 break;
 
             case kObjectFlag:
                 for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
                     m->~Member();
-                Allocator::Free(data_.o.members);
+                Allocator::Free(GetMembersPointer());
                 break;
 
             case kCopyStringFlag:
-                Allocator::Free(const_cast<Ch*>(data_.s.str));
+                Allocator::Free(const_cast<Ch*>(GetStringPointer()));
                 break;
 
             default:
@@ -638,12 +862,13 @@ class GenericValue {
         \tparam SourceAllocator Allocator type of \c rhs
         \param rhs Value to copy from (read-only)
         \param allocator Allocator to use for copying
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
      */
     template <typename SourceAllocator>
-    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator) {
-        RAPIDJSON_ASSERT((void*)this != (void const*)&rhs);
+    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+        RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs));
         this->~GenericValue();
-        new (this) GenericValue(rhs, allocator);
+        new (this) GenericValue(rhs, allocator, copyConstStrings);
         return *this;
     }
 
@@ -660,6 +885,20 @@ class GenericValue {
         return *this;
     }
 
+    //! free-standing swap function helper
+    /*!
+        Helper function to enable support for common swap implementation pattern based on \c std::swap:
+        \code
+        void swap(MyClass& a, MyClass& b) {
+            using std::swap;
+            swap(a.value, b.value);
+            // ...
+        }
+        \endcode
+        \see Swap()
+     */
+    friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
     //! Prepare Value for move semantics
     /*! \return *this */
     GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; }
@@ -709,7 +948,7 @@ class GenericValue {
             else
                 return data_.n.u64 == rhs.data_.n.u64;
 
-        default: // kTrueType, kFalseType, kNullType
+        default:
             return true;
         }
     }
@@ -757,20 +996,58 @@ class GenericValue {
     //!@name Type
     //@{
 
-    Type GetType()  const { return static_cast<Type>(flags_ & kTypeMask); }
-    bool IsNull()   const { return flags_ == kNullFlag; }
-    bool IsFalse()  const { return flags_ == kFalseFlag; }
-    bool IsTrue()   const { return flags_ == kTrueFlag; }
-    bool IsBool()   const { return (flags_ & kBoolFlag) != 0; }
-    bool IsObject() const { return flags_ == kObjectFlag; }
-    bool IsArray()  const { return flags_ == kArrayFlag; }
-    bool IsNumber() const { return (flags_ & kNumberFlag) != 0; }
-    bool IsInt()    const { return (flags_ & kIntFlag) != 0; }
-    bool IsUint()   const { return (flags_ & kUintFlag) != 0; }
-    bool IsInt64()  const { return (flags_ & kInt64Flag) != 0; }
-    bool IsUint64() const { return (flags_ & kUint64Flag) != 0; }
-    bool IsDouble() const { return (flags_ & kDoubleFlag) != 0; }
-    bool IsString() const { return (flags_ & kStringFlag) != 0; }
+    Type GetType()  const { return static_cast<Type>(data_.f.flags & kTypeMask); }
+    bool IsNull()   const { return data_.f.flags == kNullFlag; }
+    bool IsFalse()  const { return data_.f.flags == kFalseFlag; }
+    bool IsTrue()   const { return data_.f.flags == kTrueFlag; }
+    bool IsBool()   const { return (data_.f.flags & kBoolFlag) != 0; }
+    bool IsObject() const { return data_.f.flags == kObjectFlag; }
+    bool IsArray()  const { return data_.f.flags == kArrayFlag; }
+    bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; }
+    bool IsInt()    const { return (data_.f.flags & kIntFlag) != 0; }
+    bool IsUint()   const { return (data_.f.flags & kUintFlag) != 0; }
+    bool IsInt64()  const { return (data_.f.flags & kInt64Flag) != 0; }
+    bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; }
+    bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; }
+    bool IsString() const { return (data_.f.flags & kStringFlag) != 0; }
+
+    // Checks whether a number can be losslessly converted to a double.
+    bool IsLosslessDouble() const {
+        if (!IsNumber()) return false;
+        if (IsUint64()) {
+            uint64_t u = GetUint64();
+            volatile double d = static_cast<double>(u);
+            return (d >= 0.0)
+                && (d < static_cast<double>(std::numeric_limits<uint64_t>::max()))
+                && (u == static_cast<uint64_t>(d));
+        }
+        if (IsInt64()) {
+            int64_t i = GetInt64();
+            volatile double d = static_cast<double>(i);
+            return (d >= static_cast<double>(std::numeric_limits<int64_t>::min()))
+                && (d < static_cast<double>(std::numeric_limits<int64_t>::max()))
+                && (i == static_cast<int64_t>(d));
+        }
+        return true; // double, int, uint are always lossless
+    }
+
+    // Checks whether a number is a float (possible lossy).
+    bool IsFloat() const  {
+        if ((data_.f.flags & kDoubleFlag) == 0)
+            return false;
+        double d = GetDouble();
+        return d >= -3.4028234e38 && d <= 3.4028234e38;
+    }
+    // Checks whether a number can be losslessly converted to a float.
+    bool IsLosslessFloat() const {
+        if (!IsNumber()) return false;
+        double a = GetDouble();
+        if (a < static_cast<double>(-std::numeric_limits<float>::max())
+                || a > static_cast<double>(std::numeric_limits<float>::max()))
+            return false;
+        double b = static_cast<double>(static_cast<float>(a));
+        return a >= b && a <= b;    // Prevent -Wfloat-equal
+    }
 
     //@}
 
@@ -784,7 +1061,7 @@ class GenericValue {
     //!@name Bool
     //@{
 
-    bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return flags_ == kTrueFlag; }
+    bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; }
     //!< Set boolean value
     /*! \post IsBool() == true */
     GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; }
@@ -837,8 +1114,14 @@ class GenericValue {
             return member->value;
         else {
             RAPIDJSON_ASSERT(false);    // see above note
-            static GenericValue NullValue;
-            return NullValue;
+
+            // This will generate -Wexit-time-destructors in clang
+            // static GenericValue NullValue;
+            // return NullValue;
+
+            // Use static buffer and placement-new to prevent destruction
+            static char buffer[sizeof(GenericValue)];
+            return *new (buffer) GenericValue();
         }
     }
     template <typename SourceAllocator>
@@ -852,16 +1135,16 @@ class GenericValue {
 
     //! Const member iterator
     /*! \pre IsObject() == true */
-    ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(data_.o.members); }
+    ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); }
     //! Const \em past-the-end member iterator
     /*! \pre IsObject() == true */
-    ConstMemberIterator MemberEnd() const   { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(data_.o.members + data_.o.size); }
+    ConstMemberIterator MemberEnd() const   { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); }
     //! Member iterator
     /*! \pre IsObject() == true */
-    MemberIterator MemberBegin()            { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(data_.o.members); }
+    MemberIterator MemberBegin()            { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); }
     //! \em Past-the-end member iterator
     /*! \pre IsObject() == true */
-    MemberIterator MemberEnd()              { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(data_.o.members + data_.o.size); }
+    MemberIterator MemberEnd()              { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); }
 
     //! Check whether a member exists in the object.
     /*!
@@ -949,8 +1232,8 @@ class GenericValue {
         \return Iterator to member, if it exists.
             Otherwise returns \ref MemberEnd().
     */
-    MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(StringRef(name)); }
-    ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(StringRef(name)); }
+    MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(GenericValue(StringRef(name))); }
+    ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(GenericValue(StringRef(name))); }
 #endif
 
     //! Add a member (name-value pair) to the object.
@@ -967,20 +1250,21 @@ class GenericValue {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(name.IsString());
 
-        Object& o = data_.o;
+        ObjectData& o = data_.o;
         if (o.size >= o.capacity) {
             if (o.capacity == 0) {
                 o.capacity = kDefaultObjectCapacity;
-                o.members = reinterpret_cast<Member*>(allocator.Malloc(o.capacity * sizeof(Member)));
+                SetMembersPointer(reinterpret_cast<Member*>(allocator.Malloc(o.capacity * sizeof(Member))));
             }
             else {
                 SizeType oldCapacity = o.capacity;
                 o.capacity += (oldCapacity + 1) / 2; // grow by factor 1.5
-                o.members = reinterpret_cast<Member*>(allocator.Realloc(o.members, oldCapacity * sizeof(Member), o.capacity * sizeof(Member)));
+                SetMembersPointer(reinterpret_cast<Member*>(allocator.Realloc(GetMembersPointer(), oldCapacity * sizeof(Member), o.capacity * sizeof(Member))));
             }
         }
-        o.members[o.size].name.RawAssign(name);
-        o.members[o.size].value.RawAssign(value);
+        Member* members = GetMembersPointer();
+        members[o.size].name.RawAssign(name);
+        members[o.size].value.RawAssign(value);
         o.size++;
         return *this;
     }
@@ -1159,18 +1443,14 @@ class GenericValue {
     MemberIterator RemoveMember(MemberIterator m) {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(data_.o.size > 0);
-        RAPIDJSON_ASSERT(data_.o.members != 0);
+        RAPIDJSON_ASSERT(GetMembersPointer() != 0);
         RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd());
 
-        MemberIterator last(data_.o.members + (data_.o.size - 1));
-        if (data_.o.size > 1 && m != last) {
-            // Move the last one to this place
-            *m = *last;
-        }
-        else {
-            // Only one left, just destroy
-            m->~Member();
-        }
+        MemberIterator last(GetMembersPointer() + (data_.o.size - 1));
+        if (data_.o.size > 1 && m != last)
+            *m = *last; // Move the last one to this place
+        else
+            m->~Member(); // Only one left, just destroy
         --data_.o.size;
         return m;
     }
@@ -1200,7 +1480,7 @@ class GenericValue {
     MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(data_.o.size > 0);
-        RAPIDJSON_ASSERT(data_.o.members != 0);
+        RAPIDJSON_ASSERT(GetMembersPointer() != 0);
         RAPIDJSON_ASSERT(first >= MemberBegin());
         RAPIDJSON_ASSERT(first <= last);
         RAPIDJSON_ASSERT(last <= MemberEnd());
@@ -1208,11 +1488,39 @@ class GenericValue {
         MemberIterator pos = MemberBegin() + (first - MemberBegin());
         for (MemberIterator itr = pos; itr != last; ++itr)
             itr->~Member();
-        std::memmove(&*pos, &*last, (MemberEnd() - last) * sizeof(Member));
-        data_.o.size -= (last - first);
+        std::memmove(&*pos, &*last, static_cast<size_t>(MemberEnd() - last) * sizeof(Member));
+        data_.o.size -= static_cast<SizeType>(last - first);
         return pos;
     }
 
+    //! Erase a member in object by its name.
+    /*! \param name Name of member to be removed.
+        \return Whether the member existed.
+        \note Linear time complexity.
+    */
+    bool EraseMember(const Ch* name) {
+        GenericValue n(StringRef(name));
+        return EraseMember(n);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); }
+#endif
+
+    template <typename SourceAllocator>
+    bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        MemberIterator m = FindMember(name);
+        if (m != MemberEnd()) {
+            EraseMember(m);
+            return true;
+        }
+        else
+            return false;
+    }
+
+    Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
+    ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
+
     //@}
 
     //!@name Array
@@ -1220,7 +1528,7 @@ class GenericValue {
 
     //! Set this value as an empty array.
     /*! \post IsArray == true */
-    GenericValue& SetArray() {  this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; }
+    GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; }
 
     //! Get the number of elements in array.
     SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; }
@@ -1237,8 +1545,9 @@ class GenericValue {
     */
     void Clear() {
         RAPIDJSON_ASSERT(IsArray()); 
-        for (SizeType i = 0; i < data_.a.size; ++i)
-            data_.a.elements[i].~GenericValue();
+        GenericValue* e = GetElementsPointer();
+        for (GenericValue* v = e; v != e + data_.a.size; ++v)
+            v->~GenericValue();
         data_.a.size = 0;
     }
 
@@ -1250,16 +1559,16 @@ class GenericValue {
     GenericValue& operator[](SizeType index) {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(index < data_.a.size);
-        return data_.a.elements[index];
+        return GetElementsPointer()[index];
     }
     const GenericValue& operator[](SizeType index) const { return const_cast<GenericValue&>(*this)[index]; }
 
     //! Element iterator
     /*! \pre IsArray() == true */
-    ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements; }
+    ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); }
     //! \em Past-the-end element iterator
     /*! \pre IsArray() == true */
-    ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements + data_.a.size; }
+    ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; }
     //! Constant element iterator
     /*! \pre IsArray() == true */
     ConstValueIterator Begin() const { return const_cast<GenericValue&>(*this).Begin(); }
@@ -1276,7 +1585,7 @@ class GenericValue {
     GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) {
         RAPIDJSON_ASSERT(IsArray());
         if (newCapacity > data_.a.capacity) {
-            data_.a.elements = (GenericValue*)allocator.Realloc(data_.a.elements, data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue));
+            SetElementsPointer(reinterpret_cast<GenericValue*>(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue))));
             data_.a.capacity = newCapacity;
         }
         return *this;
@@ -1296,7 +1605,7 @@ class GenericValue {
         RAPIDJSON_ASSERT(IsArray());
         if (data_.a.size >= data_.a.capacity)
             Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator);
-        data_.a.elements[data_.a.size++].RawAssign(value);
+        GetElementsPointer()[data_.a.size++].RawAssign(value);
         return *this;
     }
 
@@ -1350,7 +1659,7 @@ class GenericValue {
     GenericValue& PopBack() {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(!Empty());
-        data_.a.elements[--data_.a.size].~GenericValue();
+        GetElementsPointer()[--data_.a.size].~GenericValue();
         return *this;
     }
 
@@ -1376,35 +1685,48 @@ class GenericValue {
     ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) {
         RAPIDJSON_ASSERT(IsArray());
         RAPIDJSON_ASSERT(data_.a.size > 0);
-        RAPIDJSON_ASSERT(data_.a.elements != 0);
+        RAPIDJSON_ASSERT(GetElementsPointer() != 0);
         RAPIDJSON_ASSERT(first >= Begin());
         RAPIDJSON_ASSERT(first <= last);
         RAPIDJSON_ASSERT(last <= End());
         ValueIterator pos = Begin() + (first - Begin());
         for (ValueIterator itr = pos; itr != last; ++itr)
             itr->~GenericValue();       
-        std::memmove(pos, last, (End() - last) * sizeof(GenericValue));
-        data_.a.size -= (last - first);
+        std::memmove(pos, last, static_cast<size_t>(End() - last) * sizeof(GenericValue));
+        data_.a.size -= static_cast<SizeType>(last - first);
         return pos;
     }
 
+    Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); }
+    ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); }
+
     //@}
 
     //!@name Number
     //@{
 
-    int GetInt() const          { RAPIDJSON_ASSERT(flags_ & kIntFlag);   return data_.n.i.i;   }
-    unsigned GetUint() const    { RAPIDJSON_ASSERT(flags_ & kUintFlag);  return data_.n.u.u;   }
-    int64_t GetInt64() const    { RAPIDJSON_ASSERT(flags_ & kInt64Flag); return data_.n.i64; }
-    uint64_t GetUint64() const  { RAPIDJSON_ASSERT(flags_ & kUint64Flag); return data_.n.u64; }
+    int GetInt() const          { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag);   return data_.n.i.i;   }
+    unsigned GetUint() const    { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag);  return data_.n.u.u;   }
+    int64_t GetInt64() const    { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; }
+    uint64_t GetUint64() const  { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; }
 
+    //! Get the value as double type.
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless.
+    */
     double GetDouble() const {
         RAPIDJSON_ASSERT(IsNumber());
-        if ((flags_ & kDoubleFlag) != 0)                return data_.n.d;   // exact type, no conversion.
-        if ((flags_ & kIntFlag) != 0)                   return data_.n.i.i; // int -> double
-        if ((flags_ & kUintFlag) != 0)                  return data_.n.u.u; // unsigned -> double
-        if ((flags_ & kInt64Flag) != 0)                 return (double)data_.n.i64; // int64_t -> double (may lose precision)
-        RAPIDJSON_ASSERT((flags_ & kUint64Flag) != 0);  return (double)data_.n.u64; // uint64_t -> double (may lose precision)
+        if ((data_.f.flags & kDoubleFlag) != 0)                return data_.n.d;   // exact type, no conversion.
+        if ((data_.f.flags & kIntFlag) != 0)                   return data_.n.i.i; // int -> double
+        if ((data_.f.flags & kUintFlag) != 0)                  return data_.n.u.u; // unsigned -> double
+        if ((data_.f.flags & kInt64Flag) != 0)                 return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision)
+        RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0);  return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision)
+    }
+
+    //! Get the value as float type.
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless.
+    */
+    float GetFloat() const {
+        return static_cast<float>(GetDouble());
     }
 
     GenericValue& SetInt(int i)             { this->~GenericValue(); new (this) GenericValue(i);    return *this; }
@@ -1412,18 +1734,19 @@ class GenericValue {
     GenericValue& SetInt64(int64_t i64)     { this->~GenericValue(); new (this) GenericValue(i64);  return *this; }
     GenericValue& SetUint64(uint64_t u64)   { this->~GenericValue(); new (this) GenericValue(u64);  return *this; }
     GenericValue& SetDouble(double d)       { this->~GenericValue(); new (this) GenericValue(d);    return *this; }
+    GenericValue& SetFloat(float f)         { this->~GenericValue(); new (this) GenericValue(static_cast<double>(f)); return *this; }
 
     //@}
 
     //!@name String
     //@{
 
-    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? data_.ss.str : data_.s.str); }
+    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); }
 
     //! Get the length of string.
     /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
     */
-    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); }
+    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); }
 
     //! Set this value as a string without copying source string.
     /*! This version has better performance with supplied length, and also support string containing null character.
@@ -1450,7 +1773,7 @@ class GenericValue {
         \return The value itself for fluent API.
         \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; }
+    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { return SetString(StringRef(s, length), allocator); }
 
     //! Set this value as a string by copying from source string.
     /*! \param s source string. 
@@ -1458,7 +1781,15 @@ class GenericValue {
         \return The value itself for fluent API.
         \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
     */
-    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); }
+    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+
+    //! Set this value as a string by copying from source string.
+    /*! \param s source string reference
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    */
+    GenericValue& SetString(StringRefType s, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, allocator); return *this; }
 
 #if RAPIDJSON_HAS_STDSTRING
     //! Set this value as a string by copying from source string.
@@ -1468,11 +1799,35 @@ class GenericValue {
         \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size()
         \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
     */
-    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); }
+    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
 #endif
 
     //@}
 
+    //!@name Array
+    //@{
+
+    //! Templated version for checking whether this value is type T.
+    /*!
+        \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string<Ch>
+    */
+    template <typename T>
+    bool Is() const { return internal::TypeHelper<ValueType, T>::Is(*this); }
+
+    template <typename T>
+    T Get() const { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+    template <typename T>
+    T Get() { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+    template<typename T>
+    ValueType& Set(const T& data) { return internal::TypeHelper<ValueType, T>::Set(*this, data); }
+
+    template<typename T>
+    ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator); }
+
+    //@}
+
     //! Generate events of this value to a Handler.
     /*! This function adopts the GoF visitor pattern.
         Typical usage is to output this JSON value as JSON text via Writer, which is a Handler.
@@ -1488,35 +1843,35 @@ class GenericValue {
         case kTrueType:     return handler.Bool(true);
 
         case kObjectType:
-            if (!handler.StartObject())
+            if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
                 return false;
             for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
                 RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator.
-                if (!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.flags_ & kCopyFlag) != 0))
+                if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0)))
                     return false;
-                if (!m->value.Accept(handler))
+                if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
                     return false;
             }
             return handler.EndObject(data_.o.size);
 
         case kArrayType:
-            if (!handler.StartArray())
+            if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
                 return false;
-            for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v)
-                if (!v->Accept(handler))
+            for (const GenericValue* v = Begin(); v != End(); ++v)
+                if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
                     return false;
             return handler.EndArray(data_.a.size);
     
         case kStringType:
-            return handler.String(GetString(), GetStringLength(), (flags_ & kCopyFlag) != 0);
+            return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0);
     
         default:
             RAPIDJSON_ASSERT(GetType() == kNumberType);
-            if (IsInt())            return handler.Int(data_.n.i.i);
+            if (IsDouble())         return handler.Double(data_.n.d);
+            else if (IsInt())       return handler.Int(data_.n.i.i);
             else if (IsUint())      return handler.Uint(data_.n.u.u);
             else if (IsInt64())     return handler.Int64(data_.n.i64);
-            else if (IsUint64())    return handler.Uint64(data_.n.u64);
-            else                    return handler.Double(data_.n.d);
+            else                    return handler.Uint64(data_.n.u64);
         }
     }
 
@@ -1525,16 +1880,16 @@ class GenericValue {
     template <typename, typename, typename> friend class GenericDocument;
 
     enum {
-        kBoolFlag = 0x100,
-        kNumberFlag = 0x200,
-        kIntFlag = 0x400,
-        kUintFlag = 0x800,
-        kInt64Flag = 0x1000,
-        kUint64Flag = 0x2000,
-        kDoubleFlag = 0x4000,
-        kStringFlag = 0x100000,
-        kCopyFlag = 0x200000,
-        kInlineStrFlag = 0x400000,
+        kBoolFlag       = 0x0008,
+        kNumberFlag     = 0x0010,
+        kIntFlag        = 0x0020,
+        kUintFlag       = 0x0040,
+        kInt64Flag      = 0x0080,
+        kUint64Flag     = 0x0100,
+        kDoubleFlag     = 0x0200,
+        kStringFlag     = 0x0400,
+        kCopyFlag       = 0x0800,
+        kInlineStrFlag  = 0x1000,
 
         // Initial flags of different types.
         kNullFlag = kNullType,
@@ -1552,16 +1907,27 @@ class GenericValue {
         kObjectFlag = kObjectType,
         kArrayFlag = kArrayType,
 
-        kTypeMask = 0xFF    // bitwise-and with mask of 0xFF can be optimized by compiler
+        kTypeMask = 0x07
     };
 
     static const SizeType kDefaultArrayCapacity = 16;
     static const SizeType kDefaultObjectCapacity = 16;
 
+    struct Flag {
+#if RAPIDJSON_48BITPOINTER_OPTIMIZATION
+        char payload[sizeof(SizeType) * 2 + 6];     // 2 x SizeType + lower 48-bit pointer
+#elif RAPIDJSON_64BIT
+        char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes
+#else
+        char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes
+#endif
+        uint16_t flags;
+    };
+
     struct String {
-        const Ch* str;
         SizeType length;
-        unsigned hashcode;  //!< reserved
+        SizeType hashcode;  //!< reserved
+        const Ch* str;
     };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
     // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars
@@ -1570,15 +1936,15 @@ class GenericValue {
     // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as
     // the string terminator as well. For getting the string length back from that value just use
     // "MaxSize - str[LenPos]".
-    // This allows to store 11-chars strings in 32-bit mode and 15-chars strings in 64-bit mode
-    // inline (for `UTF8`-encoded strings).
+    // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode,
+    // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings).
     struct ShortString {
-        enum { MaxChars = sizeof(String) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize };
+        enum { MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize };
         Ch str[MaxChars];
 
-        inline static bool Usable(SizeType len) { return            (MaxSize >= len); }
-        inline void     SetLength(SizeType len) { str[LenPos] = (Ch)(MaxSize -  len); }
-        inline SizeType GetLength() const       { return  (SizeType)(MaxSize -  str[LenPos]); }
+        inline static bool Usable(SizeType len) { return                       (MaxSize >= len); }
+        inline void     SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize -  len); }
+        inline SizeType GetLength() const       { return  static_cast<SizeType>(MaxSize -  str[LenPos]); }
     };  // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
     // By using proper binary layout, retrieval of different integer types do not need conversions.
@@ -1607,69 +1973,79 @@ class GenericValue {
         double d;
     };  // 8 bytes
 
-    struct Object {
-        Member* members;
+    struct ObjectData {
         SizeType size;
         SizeType capacity;
+        Member* members;
     };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
-    struct Array {
-        GenericValue* elements;
+    struct ArrayData {
         SizeType size;
         SizeType capacity;
+        GenericValue* elements;
     };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
 
     union Data {
         String s;
         ShortString ss;
         Number n;
-        Object o;
-        Array a;
-    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+        ObjectData o;
+        ArrayData a;
+        Flag f;
+    };  // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+    RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); }
+    RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); }
+    RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); }
+    RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); }
+    RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); }
+    RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); }
 
     // Initialize this value as array with initial data, without calling destructor.
     void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) {
-        flags_ = kArrayFlag;
+        data_.f.flags = kArrayFlag;
         if (count) {
-            data_.a.elements = (GenericValue*)allocator.Malloc(count * sizeof(GenericValue));
-            std::memcpy(data_.a.elements, values, count * sizeof(GenericValue));
+            GenericValue* e = static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+            SetElementsPointer(e);
+            std::memcpy(e, values, count * sizeof(GenericValue));
         }
         else
-            data_.a.elements = NULL;
+            SetElementsPointer(0);
         data_.a.size = data_.a.capacity = count;
     }
 
     //! Initialize this value as object with initial data, without calling destructor.
     void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) {
-        flags_ = kObjectFlag;
+        data_.f.flags = kObjectFlag;
         if (count) {
-            data_.o.members = (Member*)allocator.Malloc(count * sizeof(Member));
-            std::memcpy(data_.o.members, members, count * sizeof(Member));
+            Member* m = static_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+            SetMembersPointer(m);
+            std::memcpy(m, members, count * sizeof(Member));
         }
         else
-            data_.o.members = NULL;
+            SetMembersPointer(0);
         data_.o.size = data_.o.capacity = count;
     }
 
     //! Initialize this value as constant string, without calling destructor.
     void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT {
-        flags_ = kConstStringFlag;
-        data_.s.str = s;
+        data_.f.flags = kConstStringFlag;
+        SetStringPointer(s);
         data_.s.length = s.length;
     }
 
     //! Initialize this value as copy string with initial data, without calling destructor.
     void SetStringRaw(StringRefType s, Allocator& allocator) {
-        Ch* str = NULL;
-        if(ShortString::Usable(s.length)) {
-            flags_ = kShortStringFlag;
+        Ch* str = 0;
+        if (ShortString::Usable(s.length)) {
+            data_.f.flags = kShortStringFlag;
             data_.ss.SetLength(s.length);
             str = data_.ss.str;
         } else {
-            flags_ = kCopyStringFlag;
+            data_.f.flags = kCopyStringFlag;
             data_.s.length = s.length;
-            str = (Ch *)allocator.Malloc((s.length + 1) * sizeof(Ch));
-            data_.s.str = str;
+            str = static_cast<Ch *>(allocator.Malloc((s.length + 1) * sizeof(Ch)));
+            SetStringPointer(str);
         }
         std::memcpy(str, s, s.length * sizeof(Ch));
         str[s.length] = '\0';
@@ -1678,8 +2054,8 @@ class GenericValue {
     //! Assignment without calling destructor
     void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
         data_ = rhs.data_;
-        flags_ = rhs.flags_;
-        rhs.flags_ = kNullFlag;
+        // data_.f.flags = rhs.data_.f.flags;
+        rhs.data_.f.flags = kNullFlag;
     }
 
     template <typename SourceAllocator>
@@ -1699,7 +2075,6 @@ class GenericValue {
     }
 
     Data data_;
-    unsigned flags_;
 };
 
 //! GenericValue with UTF8 encoding
@@ -1724,7 +2099,22 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     typedef Allocator AllocatorType;                        //!< Allocator type from template parameter.
 
     //! Constructor
-    /*! \param allocator        Optional allocator for allocating memory.
+    /*! Creates an empty document of specified type.
+        \param type             Mandatory type of object to create.
+        \param allocator        Optional allocator for allocating memory.
+        \param stackCapacity    Optional initial capacity of stack in bytes.
+        \param stackAllocator   Optional allocator for allocating memory for stack.
+    */
+    explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) :
+        GenericValue<Encoding, Allocator>(type),  allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+    {
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+    }
+
+    //! Constructor
+    /*! Creates an empty document which type is Null. 
+        \param allocator        Optional allocator for allocating memory.
         \param stackCapacity    Optional initial capacity of stack in bytes.
         \param stackAllocator   Optional allocator for allocating memory for stack.
     */
@@ -1732,13 +2122,13 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
         allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
     {
         if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
     //! Move constructor in C++11
     GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
-        : ValueType(std::move(rhs)),
+        : ValueType(std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document
           allocator_(rhs.allocator_),
           ownAllocator_(rhs.ownAllocator_),
           stack_(std::move(rhs.stack_)),
@@ -1778,6 +2168,54 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     }
 #endif
 
+    //! Exchange the contents of this document with those of another.
+    /*!
+        \param rhs Another document.
+        \note Constant complexity.
+        \see GenericValue::Swap
+    */
+    GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT {
+        ValueType::Swap(rhs);
+        stack_.Swap(rhs.stack_);
+        internal::Swap(allocator_, rhs.allocator_);
+        internal::Swap(ownAllocator_, rhs.ownAllocator_);
+        internal::Swap(parseResult_, rhs.parseResult_);
+        return *this;
+    }
+
+    // Allow Swap with ValueType.
+    // Refer to Effective C++ 3rd Edition/Item 33: Avoid hiding inherited names.
+    using ValueType::Swap;
+
+    //! free-standing swap function helper
+    /*!
+        Helper function to enable support for common swap implementation pattern based on \c std::swap:
+        \code
+        void swap(MyClass& a, MyClass& b) {
+            using std::swap;
+            swap(a.doc, b.doc);
+            // ...
+        }
+        \endcode
+        \see Swap()
+     */
+    friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
+    //! Populate this document by a generator which produces SAX events.
+    /*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype.
+        \param g Generator functor which sends SAX events to the parameter.
+        \return The document itself for fluent API.
+    */
+    template <typename Generator>
+    GenericDocument& Populate(Generator& g) {
+        ClearStackOnExit scope(*this);
+        if (g(*this)) {
+            RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
+            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+        }
+        return *this;
+    }
+
     //!@name Parse from stream
     //!@{
 
@@ -1790,13 +2228,13 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     */
     template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
     GenericDocument& ParseStream(InputStream& is) {
-        ValueType::SetNull(); // Remove existing root if exist
-        GenericReader<SourceEncoding, Encoding, StackAllocator> reader(&stack_.GetAllocator());
+        GenericReader<SourceEncoding, Encoding, StackAllocator> reader(
+            stack_.HasAllocator() ? &stack_.GetAllocator() : 0);
         ClearStackOnExit scope(*this);
         parseResult_ = reader.template Parse<parseFlags>(is, *this);
         if (parseResult_) {
             RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
-            this->RawAssign(*stack_.template Pop<ValueType>(1));    // Add this-> to prevent issue 13.
+            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
         }
         return *this;
     }
@@ -1855,7 +2293,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
         \param str Read-only zero-terminated string to be parsed.
     */
     template <unsigned parseFlags, typename SourceEncoding>
-    GenericDocument& Parse(const Ch* str) {
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str) {
         RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
         GenericStringStream<SourceEncoding> s(str);
         return ParseStream<parseFlags, SourceEncoding>(s);
@@ -1876,6 +2314,42 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     GenericDocument& Parse(const Ch* str) {
         return Parse<kParseDefaultFlags>(str);
     }
+
+    template <unsigned parseFlags, typename SourceEncoding>
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
+        RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
+        MemoryStream ms(reinterpret_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
+        EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
+        ParseStream<parseFlags, SourceEncoding>(is);
+        return *this;
+    }
+
+    template <unsigned parseFlags>
+    GenericDocument& Parse(const Ch* str, size_t length) {
+        return Parse<parseFlags, Encoding>(str, length);
+    }
+    
+    GenericDocument& Parse(const Ch* str, size_t length) {
+        return Parse<kParseDefaultFlags>(str, length);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    template <unsigned parseFlags, typename SourceEncoding>
+    GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) {
+        // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
+        return Parse<parseFlags, SourceEncoding>(str.c_str());
+    }
+
+    template <unsigned parseFlags>
+    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+        return Parse<parseFlags, Encoding>(str.c_str());
+    }
+
+    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+        return Parse<kParseDefaultFlags>(str);
+    }
+#endif // RAPIDJSON_HAS_STDSTRING    
+
     //!@}
 
     //!@name Handling parse errors
@@ -1890,10 +2364,26 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     //! Get the position of last parsing error in input, 0 otherwise.
     size_t GetErrorOffset() const { return parseResult_.Offset(); }
 
+    //! Implicit conversion to get the last parse result
+#ifndef __clang // -Wdocumentation
+    /*! \return \ref ParseResult of the last parse operation
+
+        \code
+          Document doc;
+          ParseResult ok = doc.Parse(json);
+          if (!ok)
+            printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset());
+        \endcode
+     */
+#endif
+    operator ParseResult() const { return parseResult_; }
     //!@}
 
     //! Get the allocator of this document.
-    Allocator& GetAllocator() { return *allocator_; }
+    Allocator& GetAllocator() {
+        RAPIDJSON_ASSERT(allocator_);
+        return *allocator_;
+    }
 
     //! Get the capacity of stack in bytes.
     size_t GetStackCapacity() const { return stack_.GetCapacity(); }
@@ -1910,9 +2400,10 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     };
 
     // callers of the following private Handler functions
-    template <typename,typename,typename> friend class GenericReader; // for parsing
+    // template <typename,typename,typename> friend class GenericReader; // for parsing
     template <typename, typename> friend class GenericValue; // for deep copying
 
+public:
     // Implementation of Handler
     bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; }
     bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; }
@@ -1922,6 +2413,14 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
     bool Uint64(uint64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
     bool Double(double d) { new (stack_.template Push<ValueType>()) ValueType(d); return true; }
 
+    bool RawNumber(const Ch* str, SizeType length, bool copy) { 
+        if (copy) 
+            new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+        else
+            new (stack_.template Push<ValueType>()) ValueType(str, length);
+        return true;
+    }
+
     bool String(const Ch* str, SizeType length, bool copy) { 
         if (copy) 
             new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
@@ -1936,7 +2435,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
 
     bool EndObject(SizeType memberCount) {
         typename ValueType::Member* members = stack_.template Pop<typename ValueType::Member>(memberCount);
-        stack_.template Top<ValueType>()->SetObjectRaw(members, (SizeType)memberCount, GetAllocator());
+        stack_.template Top<ValueType>()->SetObjectRaw(members, memberCount, GetAllocator());
         return true;
     }
 
@@ -1977,38 +2476,152 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
 //! GenericDocument with UTF8 encoding
 typedef GenericDocument<UTF8<> > Document;
 
-// defined here due to the dependency on GenericDocument
-template <typename Encoding, typename Allocator>
-template <typename SourceAllocator>
-inline
-GenericValue<Encoding,Allocator>::GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator)
-{
-    switch (rhs.GetType()) {
-    case kObjectType:
-    case kArrayType: { // perform deep copy via SAX Handler
-            GenericDocument<Encoding,Allocator> d(&allocator);
-            rhs.Accept(d);
-            RawAssign(*d.stack_.template Pop<GenericValue>(1));
-        }
-        break;
-    case kStringType:
-        if (rhs.flags_ == kConstStringFlag) {
-            flags_ = rhs.flags_;
-            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
-        } else {
-            SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
-        }
-        break;
-    default: // kNumberType, kTrueType, kFalseType, kNullType
-        flags_ = rhs.flags_;
-        data_  = *reinterpret_cast<const Data*>(&rhs.data_);
-    }
-}
+//! Helper class for accessing Value of array type.
+/*!
+    Instance of this helper class is obtained by \c GenericValue::GetArray().
+    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericArray {
+public:
+    typedef GenericArray<true, ValueT> ConstArray;
+    typedef GenericArray<false, ValueT> Array;
+    typedef ValueT PlainType;
+    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+    typedef ValueType* ValueIterator;  // This may be const or non-const iterator
+    typedef const ValueT* ConstValueIterator;
+    typedef typename ValueType::AllocatorType AllocatorType;
+    typedef typename ValueType::StringRefType StringRefType;
+
+    template <typename, typename>
+    friend class GenericValue;
+
+    GenericArray(const GenericArray& rhs) : value_(rhs.value_) {}
+    GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; }
+    ~GenericArray() {}
+
+    SizeType Size() const { return value_.Size(); }
+    SizeType Capacity() const { return value_.Capacity(); }
+    bool Empty() const { return value_.Empty(); }
+    void Clear() const { value_.Clear(); }
+    ValueType& operator[](SizeType index) const {  return value_[index]; }
+    ValueIterator Begin() const { return value_.Begin(); }
+    ValueIterator End() const { return value_.End(); }
+    GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; }
+    GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    GenericArray PopBack() const { value_.PopBack(); return *this; }
+    ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); }
+    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+    ValueIterator begin() const { return value_.Begin(); }
+    ValueIterator end() const { return value_.End(); }
+#endif
 
-RAPIDJSON_NAMESPACE_END
+private:
+    GenericArray();
+    GenericArray(ValueType& value) : value_(value) {}
+    ValueType& value_;
+};
 
-#if defined(_MSC_VER) || defined(__GNUC__)
-RAPIDJSON_DIAG_POP
+//! Helper class for accessing Value of object type.
+/*!
+    Instance of this helper class is obtained by \c GenericValue::GetObject().
+    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericObject {
+public:
+    typedef GenericObject<true, ValueT> ConstObject;
+    typedef GenericObject<false, ValueT> Object;
+    typedef ValueT PlainType;
+    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+    typedef GenericMemberIterator<Const, typename ValueT::EncodingType, typename ValueT::AllocatorType> MemberIterator;  // This may be const or non-const iterator
+    typedef GenericMemberIterator<true, typename ValueT::EncodingType, typename ValueT::AllocatorType> ConstMemberIterator;
+    typedef typename ValueType::AllocatorType AllocatorType;
+    typedef typename ValueType::StringRefType StringRefType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename ValueType::Ch Ch;
+
+    template <typename, typename>
+    friend class GenericValue;
+
+    GenericObject(const GenericObject& rhs) : value_(rhs.value_) {}
+    GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; }
+    ~GenericObject() {}
+
+    SizeType MemberCount() const { return value_.MemberCount(); }
+    bool ObjectEmpty() const { return value_.ObjectEmpty(); }
+    template <typename T> ValueType& operator[](T* name) const { return value_[name]; }
+    template <typename SourceAllocator> ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const { return value_[name]; }
+#if RAPIDJSON_HAS_STDSTRING
+    ValueType& operator[](const std::basic_string<Ch>& name) const { return value_[name]; }
+#endif
+    MemberIterator MemberBegin() const { return value_.MemberBegin(); }
+    MemberIterator MemberEnd() const { return value_.MemberEnd(); }
+    bool HasMember(const Ch* name) const { return value_.HasMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool HasMember(const std::basic_string<Ch>& name) const { return value_.HasMember(name); }
+#endif
+    template <typename SourceAllocator> bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.HasMember(name); }
+    MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); }
+    template <typename SourceAllocator> MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.FindMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    MemberIterator FindMember(const std::basic_string<Ch>& name) const { return value_.FindMember(name); }
+#endif
+    GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_STDSTRING
+    GenericObject AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
 #endif
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    void RemoveAllMembers() { value_.RemoveAllMembers(); }
+    bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); }
+#endif
+    template <typename SourceAllocator> bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.RemoveMember(name); }
+    MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); }
+    MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); }
+    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); }
+    bool EraseMember(const Ch* name) const { return value_.EraseMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool EraseMember(const std::basic_string<Ch>& name) const { return EraseMember(ValueType(StringRef(name))); }
+#endif
+    template <typename SourceAllocator> bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.EraseMember(name); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+    MemberIterator begin() const { return value_.MemberBegin(); }
+    MemberIterator end() const { return value_.MemberEnd(); }
+#endif
+
+private:
+    GenericObject();
+    GenericObject(ValueType& value) : value_(value) {}
+    ValueType& value_;
+};
+
+RAPIDJSON_NAMESPACE_END
+#ifdef _MINWINDEF_       // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma pop_macro("min")
+#pragma pop_macro("max")
+#endif
+#endif
+RAPIDJSON_DIAG_POP
 
 #endif // RAPIDJSON_DOCUMENT_H_
diff --git a/rapidjson/encodedstream.h b/rapidjson/encodedstream.h
index 7c8863f..223601c 100644
--- a/rapidjson/encodedstream.h
+++ b/rapidjson/encodedstream.h
@@ -15,13 +15,19 @@
 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
 #define RAPIDJSON_ENCODEDSTREAM_H_
 
-#include "rapidjson.h"
+#include "stream.h"
+#include "memorystream.h"
 
 #ifdef __GNUC__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! Input byte stream wrapper with a statically bound encoding.
@@ -57,10 +63,38 @@ class EncodedInputStream {
     Ch current_;
 };
 
+//! Specialized for UTF8 MemoryStream.
+template <>
+class EncodedInputStream<UTF8<>, MemoryStream> {
+public:
+    typedef UTF8<>::Ch Ch;
+
+    EncodedInputStream(MemoryStream& is) : is_(is) {
+        if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
+        if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
+        if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
+    }
+    Ch Peek() const { return is_.Peek(); }
+    Ch Take() { return is_.Take(); }
+    size_t Tell() const { return is_.Tell(); }
+
+    // Not implemented
+    void Put(Ch) {}
+    void Flush() {} 
+    Ch* PutBegin() { return 0; }
+    size_t PutEnd(Ch*) { return 0; }
+
+    MemoryStream& is_;
+
+private:
+    EncodedInputStream(const EncodedInputStream&);
+    EncodedInputStream& operator=(const EncodedInputStream&);
+};
+
 //! Output byte stream wrapper with statically bound encoding.
 /*!
     \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
-    \tparam InputByteStream Type of input byte stream. For example, FileWriteStream.
+    \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
 */
 template <typename Encoding, typename OutputByteStream>
 class EncodedOutputStream {
@@ -77,8 +111,8 @@ class EncodedOutputStream {
     void Flush() { os_.Flush(); }
 
     // Not implemented
-    Ch Peek() const { RAPIDJSON_ASSERT(false); }
-    Ch Take() { RAPIDJSON_ASSERT(false);  }
+    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
     size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; }
     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
@@ -142,11 +176,11 @@ class AutoUTFInputStream {
         // FF FE        UTF-16LE
         // EF BB BF     UTF-8
 
-        const unsigned char* c = (const unsigned char *)is_->Peek4();
+        const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
         if (!c)
             return;
 
-        unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
+        unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
         hasBOM_ = false;
         if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
         else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
@@ -166,7 +200,7 @@ class AutoUTFInputStream {
         // xx xx xx xx  UTF-8
 
         if (!hasBOM_) {
-            unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
+            int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
             switch (pattern) {
             case 0x08: type_ = kUTF32BE; break;
             case 0x0A: type_ = kUTF16BE; break;
@@ -193,7 +227,7 @@ class AutoUTFInputStream {
 //! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
 /*!
     \tparam CharType Type of character for writing.
-    \tparam InputByteStream type of output byte stream to be wrapped.
+    \tparam OutputByteStream type of output byte stream to be wrapped.
 */
 template <typename CharType, typename OutputByteStream>
 class AutoUTFOutputStream {
@@ -227,8 +261,8 @@ class AutoUTFOutputStream {
     void Flush() { os_->Flush(); } 
 
     // Not implemented
-    Ch Peek() const { RAPIDJSON_ASSERT(false); }
-    Ch Take() { RAPIDJSON_ASSERT(false); }
+    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
     size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
@@ -254,6 +288,10 @@ class AutoUTFOutputStream {
 
 RAPIDJSON_NAMESPACE_END
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
 #ifdef __GNUC__
 RAPIDJSON_DIAG_POP
 #endif
diff --git a/rapidjson/encodings.h b/rapidjson/encodings.h
index 90b46ed..0df1c34 100644
--- a/rapidjson/encodings.h
+++ b/rapidjson/encodings.h
@@ -120,19 +120,45 @@ struct UTF8 {
         }
     }
 
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        if (codepoint <= 0x7F) 
+            PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
+        else if (codepoint <= 0x7FF) {
+            PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
+        }
+        else if (codepoint <= 0xFFFF) {
+            PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+    }
+
     template <typename InputStream>
     static bool Decode(InputStream& is, unsigned* codepoint) {
-#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
-#define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
+#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
+#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
 #define TAIL() COPY(); TRANS(0x70)
-        Ch c = is.Take();
+        typename InputStream::Ch c = is.Take();
         if (!(c & 0x80)) {
-            *codepoint = (unsigned char)c;
+            *codepoint = static_cast<unsigned char>(c);
             return true;
         }
 
-        unsigned char type = GetRange((unsigned char)c);
-        *codepoint = (0xFF >> type) & (unsigned char)c;
+        unsigned char type = GetRange(static_cast<unsigned char>(c));
+        if (type >= 32) {
+            *codepoint = 0;
+        } else {
+            *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
+        }
         bool result = true;
         switch (type) {
         case 2: TAIL(); return result;
@@ -152,7 +178,7 @@ struct UTF8 {
     template <typename InputStream, typename OutputStream>
     static bool Validate(InputStream& is, OutputStream& os) {
 #define COPY() os.Put(c = is.Take())
-#define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
+#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
 #define TAIL() COPY(); TRANS(0x70)
         Ch c;
         COPY();
@@ -160,7 +186,7 @@ struct UTF8 {
             return true;
 
         bool result = true;
-        switch (GetRange((unsigned char)c)) {
+        switch (GetRange(static_cast<unsigned char>(c))) {
         case 2: TAIL(); return result;
         case 3: TAIL(); TAIL(); return result;
         case 4: COPY(); TRANS(0x50); TAIL(); return result;
@@ -196,12 +222,12 @@ struct UTF8 {
     template <typename InputByteStream>
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        Ch c = Take(is);
-        if ((unsigned char)c != 0xEFu) return c;
+        typename InputByteStream::Ch c = Take(is);
+        if (static_cast<unsigned char>(c) != 0xEFu) return c;
         c = is.Take();
-        if ((unsigned char)c != 0xBBu) return c;
+        if (static_cast<unsigned char>(c) != 0xBBu) return c;
         c = is.Take();
-        if ((unsigned char)c != 0xBFu) return c;
+        if (static_cast<unsigned char>(c) != 0xBFu) return c;
         c = is.Take();
         return c;
     }
@@ -209,13 +235,15 @@ struct UTF8 {
     template <typename InputByteStream>
     static Ch Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        return is.Take();
+        return static_cast<Ch>(is.Take());
     }
 
     template <typename OutputByteStream>
     static void PutBOM(OutputByteStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
     }
 
     template <typename OutputByteStream>
@@ -255,22 +283,38 @@ struct UTF16 {
             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
             unsigned v = codepoint - 0x10000;
             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
-            os.Put((v & 0x3FF) | 0xDC00);
+            os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
+        }
+    }
+
+
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
+        if (codepoint <= 0xFFFF) {
+            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            unsigned v = codepoint - 0x10000;
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
         }
     }
 
     template <typename InputStream>
     static bool Decode(InputStream& is, unsigned* codepoint) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
-        Ch c = is.Take();
+        typename InputStream::Ch c = is.Take();
         if (c < 0xD800 || c > 0xDFFF) {
-            *codepoint = c;
+            *codepoint = static_cast<unsigned>(c);
             return true;
         }
         else if (c <= 0xDBFF) {
-            *codepoint = (c & 0x3FF) << 10;
+            *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
             c = is.Take();
-            *codepoint |= (c & 0x3FF);
+            *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
             *codepoint += 0x10000;
             return c >= 0xDC00 && c <= 0xDFFF;
         }
@@ -281,8 +325,8 @@ struct UTF16 {
     static bool Validate(InputStream& is, OutputStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
-        Ch c;
-        os.Put(c = is.Take());
+        typename InputStream::Ch c;
+        os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
         if (c < 0xD800 || c > 0xDFFF)
             return true;
         else if (c <= 0xDBFF) {
@@ -300,28 +344,29 @@ struct UTF16LE : UTF16<CharType> {
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
-        return (unsigned short)c == 0xFEFFu ? Take(is) : c;
+        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
     static CharType Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        CharType c = (unsigned char)is.Take();
-        c |= (unsigned char)is.Take() << 8;
-        return c;
+        unsigned c = static_cast<uint8_t>(is.Take());
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        return static_cast<CharType>(c);
     }
 
     template <typename OutputByteStream>
     static void PutBOM(OutputByteStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(0xFFu); os.Put(0xFEu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
     }
 
     template <typename OutputByteStream>
     static void Put(OutputByteStream& os, CharType c) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(c & 0xFFu);
-        os.Put((c >> 8) & 0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
     }
 };
 
@@ -332,28 +377,29 @@ struct UTF16BE : UTF16<CharType> {
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
-        return (unsigned short)c == 0xFEFFu ? Take(is) : c;
+        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
     static CharType Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        CharType c = (unsigned char)is.Take() << 8;
-        c |= (unsigned char)is.Take();
-        return c;
+        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<uint8_t>(is.Take());
+        return static_cast<CharType>(c);
     }
 
     template <typename OutputByteStream>
     static void PutBOM(OutputByteStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(0xFEu); os.Put(0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
     }
 
     template <typename OutputByteStream>
     static void Put(OutputByteStream& os, CharType c) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put((c >> 8) & 0xFFu);
-        os.Put(c & 0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
     }
 };
 
@@ -382,6 +428,13 @@ struct UTF32 {
         os.Put(codepoint);
     }
 
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
+        RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+        PutUnsafe(os, codepoint);
+    }
+
     template <typename InputStream>
     static bool Decode(InputStream& is, unsigned* codepoint) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
@@ -406,32 +459,35 @@ struct UTF32LE : UTF32<CharType> {
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
-        return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
+        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
     }
 
     template <typename InputByteStream>
     static CharType Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        CharType c = (unsigned char)is.Take();
-        c |= (unsigned char)is.Take() << 8;
-        c |= (unsigned char)is.Take() << 16;
-        c |= (unsigned char)is.Take() << 24;
-        return c;
+        unsigned c = static_cast<uint8_t>(is.Take());
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
+        return static_cast<CharType>(c);
     }
 
     template <typename OutputByteStream>
     static void PutBOM(OutputByteStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
     }
 
     template <typename OutputByteStream>
     static void Put(OutputByteStream& os, CharType c) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(c & 0xFFu);
-        os.Put((c >> 8) & 0xFFu);
-        os.Put((c >> 16) & 0xFFu);
-        os.Put((c >> 24) & 0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
     }
 };
 
@@ -442,32 +498,35 @@ struct UTF32BE : UTF32<CharType> {
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
         CharType c = Take(is);
-        return (unsigned)c == 0x0000FEFFu ? Take(is) : c; 
+        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 
     }
 
     template <typename InputByteStream>
     static CharType Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        CharType c = (unsigned char)is.Take() << 24;
-        c |= (unsigned char)is.Take() << 16;
-        c |= (unsigned char)is.Take() << 8;
-        c |= (unsigned char)is.Take();
-        return c;
+        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
+        return static_cast<CharType>(c);
     }
 
     template <typename OutputByteStream>
     static void PutBOM(OutputByteStream& os) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
     }
 
     template <typename OutputByteStream>
     static void Put(OutputByteStream& os, CharType c) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
-        os.Put((c >> 24) & 0xFFu);
-        os.Put((c >> 16) & 0xFFu);
-        os.Put((c >> 8) & 0xFFu);
-        os.Put(c & 0xFFu);
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
     }
 };
 
@@ -491,31 +550,37 @@ struct ASCII {
         os.Put(static_cast<Ch>(codepoint & 0xFF));
     }
 
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_ASSERT(codepoint <= 0x7F);
+        PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
+    }
+
     template <typename InputStream>
     static bool Decode(InputStream& is, unsigned* codepoint) {
-        unsigned char c = static_cast<unsigned char>(is.Take());
+        uint8_t c = static_cast<uint8_t>(is.Take());
         *codepoint = c;
         return c <= 0X7F;
     }
 
     template <typename InputStream, typename OutputStream>
     static bool Validate(InputStream& is, OutputStream& os) {
-        unsigned char c = is.Take();
-        os.Put(c);
+        uint8_t c = static_cast<uint8_t>(is.Take());
+        os.Put(static_cast<typename OutputStream::Ch>(c));
         return c <= 0x7F;
     }
 
     template <typename InputByteStream>
     static CharType TakeBOM(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        Ch c = Take(is);
-        return c;
+        uint8_t c = static_cast<uint8_t>(Take(is));
+        return static_cast<Ch>(c);
     }
 
     template <typename InputByteStream>
     static Ch Take(InputByteStream& is) {
         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
-        return is.Take();
+        return static_cast<Ch>(is.Take());
     }
 
     template <typename OutputByteStream>
@@ -555,21 +620,28 @@ struct AutoUTF {
 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
 
     template<typename OutputStream>
-    RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
+    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
         typedef void (*EncodeFunc)(OutputStream&, unsigned);
         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
         (*f[os.GetType()])(os, codepoint);
     }
 
+    template<typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        typedef void (*EncodeFunc)(OutputStream&, unsigned);
+        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
+        (*f[os.GetType()])(os, codepoint);
+    }
+
     template <typename InputStream>
-    RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
+    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) {
         typedef bool (*DecodeFunc)(InputStream&, unsigned*);
         static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
         return (*f[is.GetType()])(is, codepoint);
     }
 
     template <typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
         static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
         return (*f[is.GetType()])(is, os);
@@ -586,7 +658,7 @@ template<typename SourceEncoding, typename TargetEncoding>
 struct Transcoder {
     //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
         unsigned codepoint;
         if (!SourceEncoding::Decode(is, &codepoint))
             return false;
@@ -594,31 +666,50 @@ struct Transcoder {
         return true;
     }
 
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+        unsigned codepoint;
+        if (!SourceEncoding::Decode(is, &codepoint))
+            return false;
+        TargetEncoding::EncodeUnsafe(os, codepoint);
+        return true;
+    }
+
     //! Validate one Unicode codepoint from an encoded stream.
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         return Transcode(is, os);   // Since source/target encoding is different, must transcode.
     }
 };
 
+// Forward declaration.
+template<typename Stream>
+inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
+
 //! Specialization of Transcoder with same source and target encoding.
 template<typename Encoding>
 struct Transcoder<Encoding, Encoding> {
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
         os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
         return true;
     }
     
     template<typename InputStream, typename OutputStream>
-    RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+        PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
+        return true;
+    }
+    
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
         return Encoding::Validate(is, os);  // source/target encoding are the same
     }
 };
 
 RAPIDJSON_NAMESPACE_END
 
-#if defined(__GNUC__) || defined(_MSV_VER)
+#if defined(__GNUC__) || defined(_MSC_VER)
 RAPIDJSON_DIAG_POP
 #endif
 
diff --git a/rapidjson/error/en.h b/rapidjson/error/en.h
index d5f9caa..2db838b 100644
--- a/rapidjson/error/en.h
+++ b/rapidjson/error/en.h
@@ -12,11 +12,17 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 // specific language governing permissions and limitations under the License.
 
-#ifndef RAPIDJSON_ERROR_EN_H__
-#define RAPIDJSON_ERROR_EN_H__
+#ifndef RAPIDJSON_ERROR_EN_H_
+#define RAPIDJSON_ERROR_EN_H_
 
 #include "error.h"
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(covered-switch-default)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! Maps error code of parsing into error message.
@@ -32,7 +38,7 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
         case kParseErrorNone:                           return RAPIDJSON_ERROR_STRING("No error.");
 
         case kParseErrorDocumentEmpty:                  return RAPIDJSON_ERROR_STRING("The document is empty.");
-        case kParseErrorDocumentRootNotSingular:        return RAPIDJSON_ERROR_STRING("The document root must not follow by other values.");
+        case kParseErrorDocumentRootNotSingular:        return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values.");
     
         case kParseErrorValueInvalid:                   return RAPIDJSON_ERROR_STRING("Invalid value.");
     
@@ -55,11 +61,14 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
         case kParseErrorTermination:                    return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
         case kParseErrorUnspecificSyntaxError:          return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
 
-        default:
-            return RAPIDJSON_ERROR_STRING("Unknown error.");
+        default:                                        return RAPIDJSON_ERROR_STRING("Unknown error.");
     }
 }
 
 RAPIDJSON_NAMESPACE_END
 
-#endif // RAPIDJSON_ERROR_EN_H__
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_ERROR_EN_H_
diff --git a/rapidjson/error/error.h b/rapidjson/error/error.h
index f9094fb..9311d2f 100644
--- a/rapidjson/error/error.h
+++ b/rapidjson/error/error.h
@@ -12,11 +12,16 @@
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 // specific language governing permissions and limitations under the License.
 
-#ifndef RAPIDJSON_ERROR_ERROR_H__
-#define RAPIDJSON_ERROR_ERROR_H__
+#ifndef RAPIDJSON_ERROR_ERROR_H_
+#define RAPIDJSON_ERROR_ERROR_H_
 
 #include "../rapidjson.h"
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
 /*! \file error.h */
 
 /*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */
@@ -99,7 +104,9 @@ enum ParseErrorCode {
     \see GenericReader::Parse, GenericDocument::Parse
 */
 struct ParseResult {
-
+    //!! Unspecified boolean type
+    typedef bool (ParseResult::*BooleanType)() const;
+public:
     //! Default constructor, no error.
     ParseResult() : code_(kParseErrorNone), offset_(0) {}
     //! Constructor to set an error.
@@ -110,8 +117,8 @@ struct ParseResult {
     //! Get the error offset, if \ref IsError(), 0 otherwise.
     size_t Offset() const { return offset_; }
 
-    //! Conversion to \c bool, returns \c true, iff !\ref IsError().
-    operator bool() const { return !IsError(); }
+    //! Explicit conversion to \c bool, returns \c true, iff !\ref IsError().
+    operator BooleanType() const { return !IsError() ? &ParseResult::IsError : NULL; }
     //! Whether the result is an error.
     bool IsError() const { return code_ != kParseErrorNone; }
 
@@ -119,6 +126,10 @@ struct ParseResult {
     bool operator==(ParseErrorCode code) const { return code_ == code; }
     friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; }
 
+    bool operator!=(const ParseResult& that) const { return !(*this == that); }
+    bool operator!=(ParseErrorCode code) const { return !(*this == code); }
+    friend bool operator!=(ParseErrorCode code, const ParseResult & err) { return err != code; }
+
     //! Reset error code.
     void Clear() { Set(kParseErrorNone); }
     //! Update error code and offset.
@@ -143,4 +154,8 @@ typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode);
 
 RAPIDJSON_NAMESPACE_END
 
-#endif // RAPIDJSON_ERROR_ERROR_H__
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_ERROR_ERROR_H_
diff --git a/rapidjson/filereadstream.h b/rapidjson/filereadstream.h
index 3913eb7..b56ea13 100644
--- a/rapidjson/filereadstream.h
+++ b/rapidjson/filereadstream.h
@@ -15,9 +15,16 @@
 #ifndef RAPIDJSON_FILEREADSTREAM_H_
 #define RAPIDJSON_FILEREADSTREAM_H_
 
-#include "rapidjson.h"
+#include "stream.h"
 #include <cstdio>
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(missing-noreturn)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! File byte stream for input using fread().
@@ -85,4 +92,8 @@ class FileReadStream {
 
 RAPIDJSON_NAMESPACE_END
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_FILESTREAM_H_
diff --git a/rapidjson/filewritestream.h b/rapidjson/filewritestream.h
index dfb9cbd..6378dd6 100644
--- a/rapidjson/filewritestream.h
+++ b/rapidjson/filewritestream.h
@@ -15,9 +15,14 @@
 #ifndef RAPIDJSON_FILEWRITESTREAM_H_
 #define RAPIDJSON_FILEWRITESTREAM_H_
 
-#include "rapidjson.h"
+#include "stream.h"
 #include <cstdio>
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(unreachable-code)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! Wrapper of C file stream for input using fread().
@@ -57,7 +62,11 @@ class FileWriteStream {
 
     void Flush() {
         if (current_ != buffer_) {
-            fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
+            size_t result = fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
+            if (result < static_cast<size_t>(current_ - buffer_)) {
+                // failure deliberately ignored at this time
+                // added to avoid warn_unused_result build errors
+            }
             current_ = buffer_;
         }
     }
@@ -88,4 +97,8 @@ inline void PutN(FileWriteStream& stream, char c, size_t n) {
 
 RAPIDJSON_NAMESPACE_END
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_FILESTREAM_H_
diff --git a/rapidjson/fwd.h b/rapidjson/fwd.h
new file mode 100644
index 0000000..e8104e8
--- /dev/null
+++ b/rapidjson/fwd.h
@@ -0,0 +1,151 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_FWD_H_
+#define RAPIDJSON_FWD_H_
+
+#include "rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+// encodings.h
+
+template<typename CharType> struct UTF8;
+template<typename CharType> struct UTF16;
+template<typename CharType> struct UTF16BE;
+template<typename CharType> struct UTF16LE;
+template<typename CharType> struct UTF32;
+template<typename CharType> struct UTF32BE;
+template<typename CharType> struct UTF32LE;
+template<typename CharType> struct ASCII;
+template<typename CharType> struct AutoUTF;
+
+template<typename SourceEncoding, typename TargetEncoding>
+struct Transcoder;
+
+// allocators.h
+
+class CrtAllocator;
+
+template <typename BaseAllocator>
+class MemoryPoolAllocator;
+
+// stream.h
+
+template <typename Encoding>
+struct GenericStringStream;
+
+typedef GenericStringStream<UTF8<char> > StringStream;
+
+template <typename Encoding>
+struct GenericInsituStringStream;
+
+typedef GenericInsituStringStream<UTF8<char> > InsituStringStream;
+
+// stringbuffer.h
+
+template <typename Encoding, typename Allocator>
+class GenericStringBuffer;
+
+typedef GenericStringBuffer<UTF8<char>, CrtAllocator> StringBuffer;
+
+// filereadstream.h
+
+class FileReadStream;
+
+// filewritestream.h
+
+class FileWriteStream;
+
+// memorybuffer.h
+
+template <typename Allocator>
+struct GenericMemoryBuffer;
+
+typedef GenericMemoryBuffer<CrtAllocator> MemoryBuffer;
+
+// memorystream.h
+
+struct MemoryStream;
+
+// reader.h
+
+template<typename Encoding, typename Derived>
+struct BaseReaderHandler;
+
+template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator>
+class GenericReader;
+
+typedef GenericReader<UTF8<char>, UTF8<char>, CrtAllocator> Reader;
+
+// writer.h
+
+template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+class Writer;
+
+// prettywriter.h
+
+template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+class PrettyWriter;
+
+// document.h
+
+template <typename Encoding, typename Allocator> 
+struct GenericMember;
+
+template <bool Const, typename Encoding, typename Allocator>
+class GenericMemberIterator;
+
+template<typename CharType>
+struct GenericStringRef;
+
+template <typename Encoding, typename Allocator> 
+class GenericValue;
+
+typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator> > Value;
+
+template <typename Encoding, typename Allocator, typename StackAllocator>
+class GenericDocument;
+
+typedef GenericDocument<UTF8<char>, MemoryPoolAllocator<CrtAllocator>, CrtAllocator> Document;
+
+// pointer.h
+
+template <typename ValueType, typename Allocator>
+class GenericPointer;
+
+typedef GenericPointer<Value, CrtAllocator> Pointer;
+
+// schema.h
+
+template <typename SchemaDocumentType>
+class IGenericRemoteSchemaDocumentProvider;
+
+template <typename ValueT, typename Allocator>
+class GenericSchemaDocument;
+
+typedef GenericSchemaDocument<Value, CrtAllocator> SchemaDocument;
+typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
+
+template <
+    typename SchemaDocumentType,
+    typename OutputHandler,
+    typename StateAllocator>
+class GenericSchemaValidator;
+
+typedef GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<char>, void>, CrtAllocator> SchemaValidator;
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_RAPIDJSONFWD_H_
diff --git a/rapidjson/internal/biginteger.h b/rapidjson/internal/biginteger.h
index 99a30ac..9d3e88c 100644
--- a/rapidjson/internal/biginteger.h
+++ b/rapidjson/internal/biginteger.h
@@ -19,6 +19,7 @@
 
 #if defined(_MSC_VER) && defined(_M_AMD64)
 #include <intrin.h> // for _umul128
+#pragma intrinsic(_umul128)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -50,7 +51,16 @@ class BigInteger {
         if (length > 0)
             AppendDecimal64(decimals + i, decimals + i + length);
     }
-
+    
+    BigInteger& operator=(const BigInteger &rhs)
+    {
+        if (this != &rhs) {
+            count_ = rhs.count_;
+            std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type));
+        }
+        return *this;
+    }
+    
     BigInteger& operator=(uint64_t u) {
         digits_[0] = u;            
         count_ = 1;
@@ -230,7 +240,7 @@ class BigInteger {
         uint64_t r = 0;
         for (const char* p = begin; p != end; ++p) {
             RAPIDJSON_ASSERT(*p >= '0' && *p <= '9');
-            r = r * 10 + (*p - '0');
+            r = r * 10u + static_cast<unsigned>(*p - '0');
         }
         return r;
     }
diff --git a/rapidjson/internal/diyfp.h b/rapidjson/internal/diyfp.h
index be5903a..29abf80 100644
--- a/rapidjson/internal/diyfp.h
+++ b/rapidjson/internal/diyfp.h
@@ -21,9 +21,10 @@
 
 #include "../rapidjson.h"
 
-#if defined(_MSC_VER) && defined(_M_AMD64)
+#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse64)
+#pragma intrinsic(_umul128)
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -34,8 +35,13 @@ RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
 struct DiyFp {
-    DiyFp() {}
+    DiyFp() : f(), e() {}
 
     DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
 
@@ -228,19 +234,24 @@ inline DiyFp GetCachedPower(int e, int* K) {
     unsigned index = static_cast<unsigned>((k >> 3) + 1);
     *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
 
-	return GetCachedPowerByIndex((size_t)index);
+    return GetCachedPowerByIndex(index);
 }
 
 inline DiyFp GetCachedPower10(int exp, int *outExp) {
-     unsigned index = (exp + 348) / 8;
-     *outExp = -348 + index * 8;
-	 return GetCachedPowerByIndex((size_t)index);
+     unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
+     *outExp = -348 + static_cast<int>(index) * 8;
+     return GetCachedPowerByIndex(index);
  }
 
 #ifdef __GNUC__
 RAPIDJSON_DIAG_POP
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
 } // namespace internal
 RAPIDJSON_NAMESPACE_END
 
diff --git a/rapidjson/internal/dtoa.h b/rapidjson/internal/dtoa.h
index 2d8d2e4..bf2e9b2 100644
--- a/rapidjson/internal/dtoa.h
+++ b/rapidjson/internal/dtoa.h
@@ -29,6 +29,7 @@ namespace internal {
 #ifdef __GNUC__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
+RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
 #endif
 
 inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
@@ -40,7 +41,7 @@ inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uin
     }
 }
 
-inline unsigned CountDecimalDigit32(uint32_t n) {
+inline int CountDecimalDigit32(uint32_t n) {
     // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
     if (n < 10) return 1;
     if (n < 100) return 2;
@@ -101,7 +102,8 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
         kappa--;
         if (p2 < delta) {
             *K += kappa;
-            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
+            int index = -kappa;
+            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0));
             return;
         }
     }
@@ -145,10 +147,10 @@ inline char* WriteExponent(int K, char* buffer) {
     return buffer;
 }
 
-inline char* Prettify(char* buffer, int length, int k) {
+inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) {
     const int kk = length + k;  // 10^(kk-1) <= v < 10^kk
 
-    if (length <= kk && kk <= 21) {
+    if (0 <= k && kk <= 21) {
         // 1234e7 -> 12340000000
         for (int i = length; i < kk; i++)
             buffer[i] = '0';
@@ -158,19 +160,44 @@ inline char* Prettify(char* buffer, int length, int k) {
     }
     else if (0 < kk && kk <= 21) {
         // 1234e-2 -> 12.34
-        std::memmove(&buffer[kk + 1], &buffer[kk], length - kk);
+        std::memmove(&buffer[kk + 1], &buffer[kk], static_cast<size_t>(length - kk));
         buffer[kk] = '.';
-        return &buffer[length + 1];
+        if (0 > k + maxDecimalPlaces) {
+            // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1
+            // Remove extra trailing zeros (at least one) after truncation.
+            for (int i = kk + maxDecimalPlaces; i > kk + 1; i--)
+                if (buffer[i] != '0')
+                    return &buffer[i + 1];
+            return &buffer[kk + 2]; // Reserve one zero
+        }
+        else
+            return &buffer[length + 1];
     }
     else if (-6 < kk && kk <= 0) {
         // 1234e-6 -> 0.001234
         const int offset = 2 - kk;
-        std::memmove(&buffer[offset], &buffer[0], length);
+        std::memmove(&buffer[offset], &buffer[0], static_cast<size_t>(length));
         buffer[0] = '0';
         buffer[1] = '.';
         for (int i = 2; i < offset; i++)
             buffer[i] = '0';
-        return &buffer[length + offset];
+        if (length - kk > maxDecimalPlaces) {
+            // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1
+            // Remove extra trailing zeros (at least one) after truncation.
+            for (int i = maxDecimalPlaces + 1; i > 2; i--)
+                if (buffer[i] != '0')
+                    return &buffer[i + 1];
+            return &buffer[3]; // Reserve one zero
+        }
+        else
+            return &buffer[length + offset];
+    }
+    else if (kk < -maxDecimalPlaces) {
+        // Truncate to zero
+        buffer[0] = '0';
+        buffer[1] = '.';
+        buffer[2] = '0';
+        return &buffer[3];
     }
     else if (length == 1) {
         // 1e30
@@ -179,14 +206,15 @@ inline char* Prettify(char* buffer, int length, int k) {
     }
     else {
         // 1234e30 -> 1.234e33
-        std::memmove(&buffer[2], &buffer[1], length - 1);
+        std::memmove(&buffer[2], &buffer[1], static_cast<size_t>(length - 1));
         buffer[1] = '.';
         buffer[length + 1] = 'e';
         return WriteExponent(kk - 1, &buffer[0 + length + 2]);
     }
 }
 
-inline char* dtoa(double value, char* buffer) {
+inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) {
+    RAPIDJSON_ASSERT(maxDecimalPlaces >= 1);
     Double d(value);
     if (d.IsZero()) {
         if (d.Sign())
@@ -203,7 +231,7 @@ inline char* dtoa(double value, char* buffer) {
         }
         int length, K;
         Grisu2(value, buffer, &length, &K);
-        return Prettify(buffer, length, K);
+        return Prettify(buffer, length, K, maxDecimalPlaces);
     }
 }
 
diff --git a/rapidjson/internal/ieee754.h b/rapidjson/internal/ieee754.h
index e3f0336..c2684ba 100644
--- a/rapidjson/internal/ieee754.h
+++ b/rapidjson/internal/ieee754.h
@@ -40,6 +40,7 @@ class Double {
 
     bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; }
     bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; }
+    bool IsNanOrInf() const { return (u_ & kExponentMask) == kExponentMask; }
     bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; }
     bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; }
 
@@ -47,7 +48,7 @@ class Double {
     int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
     uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; }
 
-    static unsigned EffectiveSignificandSize(int order) {
+    static int EffectiveSignificandSize(int order) {
         if (order >= -1021)
             return 53;
         else if (order <= -1074)
diff --git a/rapidjson/internal/regex.h b/rapidjson/internal/regex.h
new file mode 100644
index 0000000..e1a2faa
--- /dev/null
+++ b/rapidjson/internal/regex.h
@@ -0,0 +1,734 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_REGEX_H_
+#define RAPIDJSON_INTERNAL_REGEX_H_
+
+#include "../allocators.h"
+#include "../stream.h"
+#include "stack.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(implicit-fallthrough)
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#if __GNUC__ >= 7
+RAPIDJSON_DIAG_OFF(implicit-fallthrough)
+#endif
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+#ifndef RAPIDJSON_REGEX_VERBOSE
+#define RAPIDJSON_REGEX_VERBOSE 0
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////////////
+// DecodedStream
+
+template <typename SourceStream, typename Encoding>
+class DecodedStream {
+public:
+    DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
+    unsigned Peek() { return codepoint_; }
+    unsigned Take() {
+        unsigned c = codepoint_;
+        if (c) // No further decoding when '\0'
+            Decode();
+        return c;
+    }
+
+private:
+    void Decode() {
+        if (!Encoding::Decode(ss_, &codepoint_))
+            codepoint_ = 0;
+    }
+
+    SourceStream& ss_;
+    unsigned codepoint_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericRegex
+
+static const SizeType kRegexInvalidState = ~SizeType(0);  //!< Represents an invalid index in GenericRegex::State::out, out1
+static const SizeType kRegexInvalidRange = ~SizeType(0);
+
+template <typename Encoding, typename Allocator>
+class GenericRegexSearch;
+
+//! Regular expression engine with subset of ECMAscript grammar.
+/*!
+    Supported regular expression syntax:
+    - \c ab     Concatenation
+    - \c a|b    Alternation
+    - \c a?     Zero or one
+    - \c a*     Zero or more
+    - \c a+     One or more
+    - \c a{3}   Exactly 3 times
+    - \c a{3,}  At least 3 times
+    - \c a{3,5} 3 to 5 times
+    - \c (ab)   Grouping
+    - \c ^a     At the beginning
+    - \c a$     At the end
+    - \c .      Any character
+    - \c [abc]  Character classes
+    - \c [a-c]  Character class range
+    - \c [a-z0-9_] Character class combination
+    - \c [^abc] Negated character classes
+    - \c [^a-c] Negated character class range
+    - \c [\b]   Backspace (U+0008)
+    - \c \\| \\\\ ...  Escape characters
+    - \c \\f Form feed (U+000C)
+    - \c \\n Line feed (U+000A)
+    - \c \\r Carriage return (U+000D)
+    - \c \\t Tab (U+0009)
+    - \c \\v Vertical tab (U+000B)
+
+    \note This is a Thompson NFA engine, implemented with reference to 
+        Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 
+        https://swtch.com/~rsc/regexp/regexp1.html 
+*/
+template <typename Encoding, typename Allocator = CrtAllocator>
+class GenericRegex {
+public:
+    typedef Encoding EncodingType;
+    typedef typename Encoding::Ch Ch;
+    template <typename, typename> friend class GenericRegexSearch;
+
+    GenericRegex(const Ch* source, Allocator* allocator = 0) : 
+        states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 
+        anchorBegin_(), anchorEnd_()
+    {
+        GenericStringStream<Encoding> ss(source);
+        DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
+        Parse(ds);
+    }
+
+    ~GenericRegex() {}
+
+    bool IsValid() const {
+        return root_ != kRegexInvalidState;
+    }
+
+private:
+    enum Operator {
+        kZeroOrOne,
+        kZeroOrMore,
+        kOneOrMore,
+        kConcatenation,
+        kAlternation,
+        kLeftParenthesis
+    };
+
+    static const unsigned kAnyCharacterClass = 0xFFFFFFFF;   //!< For '.'
+    static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
+    static const unsigned kRangeNegationFlag = 0x80000000;
+
+    struct Range {
+        unsigned start; // 
+        unsigned end;
+        SizeType next;
+    };
+
+    struct State {
+        SizeType out;     //!< Equals to kInvalid for matching state
+        SizeType out1;    //!< Equals to non-kInvalid for split
+        SizeType rangeStart;
+        unsigned codepoint;
+    };
+
+    struct Frag {
+        Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
+        SizeType start;
+        SizeType out; //!< link-list of all output states
+        SizeType minIndex;
+    };
+
+    State& GetState(SizeType index) {
+        RAPIDJSON_ASSERT(index < stateCount_);
+        return states_.template Bottom<State>()[index];
+    }
+
+    const State& GetState(SizeType index) const {
+        RAPIDJSON_ASSERT(index < stateCount_);
+        return states_.template Bottom<State>()[index];
+    }
+
+    Range& GetRange(SizeType index) {
+        RAPIDJSON_ASSERT(index < rangeCount_);
+        return ranges_.template Bottom<Range>()[index];
+    }
+
+    const Range& GetRange(SizeType index) const {
+        RAPIDJSON_ASSERT(index < rangeCount_);
+        return ranges_.template Bottom<Range>()[index];
+    }
+
+    template <typename InputStream>
+    void Parse(DecodedStream<InputStream, Encoding>& ds) {
+        Allocator allocator;
+        Stack<Allocator> operandStack(&allocator, 256);     // Frag
+        Stack<Allocator> operatorStack(&allocator, 256);    // Operator
+        Stack<Allocator> atomCountStack(&allocator, 256);   // unsigned (Atom per parenthesis)
+
+        *atomCountStack.template Push<unsigned>() = 0;
+
+        unsigned codepoint;
+        while (ds.Peek() != 0) {
+            switch (codepoint = ds.Take()) {
+                case '^':
+                    anchorBegin_ = true;
+                    break;
+
+                case '$':
+                    anchorEnd_ = true;
+                    break;
+
+                case '|':
+                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
+                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                            return;
+                    *operatorStack.template Push<Operator>() = kAlternation;
+                    *atomCountStack.template Top<unsigned>() = 0;
+                    break;
+
+                case '(':
+                    *operatorStack.template Push<Operator>() = kLeftParenthesis;
+                    *atomCountStack.template Push<unsigned>() = 0;
+                    break;
+
+                case ')':
+                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
+                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                            return;
+                    if (operatorStack.Empty())
+                        return;
+                    operatorStack.template Pop<Operator>(1);
+                    atomCountStack.template Pop<unsigned>(1);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '?':
+                    if (!Eval(operandStack, kZeroOrOne))
+                        return;
+                    break;
+
+                case '*':
+                    if (!Eval(operandStack, kZeroOrMore))
+                        return;
+                    break;
+
+                case '+':
+                    if (!Eval(operandStack, kOneOrMore))
+                        return;
+                    break;
+
+                case '{':
+                    {
+                        unsigned n, m;
+                        if (!ParseUnsigned(ds, &n))
+                            return;
+
+                        if (ds.Peek() == ',') {
+                            ds.Take();
+                            if (ds.Peek() == '}')
+                                m = kInfinityQuantifier;
+                            else if (!ParseUnsigned(ds, &m) || m < n)
+                                return;
+                        }
+                        else
+                            m = n;
+
+                        if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
+                            return;
+                        ds.Take();
+                    }
+                    break;
+
+                case '.':
+                    PushOperand(operandStack, kAnyCharacterClass);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '[':
+                    {
+                        SizeType range;
+                        if (!ParseRange(ds, &range))
+                            return;
+                        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
+                        GetState(s).rangeStart = range;
+                        *operandStack.template Push<Frag>() = Frag(s, s, s);
+                    }
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '\\': // Escape character
+                    if (!CharacterEscape(ds, &codepoint))
+                        return; // Unsupported escape character
+                    // fall through to default
+
+                default: // Pattern character
+                    PushOperand(operandStack, codepoint);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+            }
+        }
+
+        while (!operatorStack.Empty())
+            if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                return;
+
+        // Link the operand to matching state.
+        if (operandStack.GetSize() == sizeof(Frag)) {
+            Frag* e = operandStack.template Pop<Frag>(1);
+            Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
+            root_ = e->start;
+
+#if RAPIDJSON_REGEX_VERBOSE
+            printf("root: %d\n", root_);
+            for (SizeType i = 0; i < stateCount_ ; i++) {
+                State& s = GetState(i);
+                printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
+            }
+            printf("\n");
+#endif
+        }
+    }
+
+    SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
+        State* s = states_.template Push<State>();
+        s->out = out;
+        s->out1 = out1;
+        s->codepoint = codepoint;
+        s->rangeStart = kRegexInvalidRange;
+        return stateCount_++;
+    }
+
+    void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
+        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
+        *operandStack.template Push<Frag>() = Frag(s, s, s);
+    }
+
+    void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
+        if (*atomCountStack.template Top<unsigned>())
+            *operatorStack.template Push<Operator>() = kConcatenation;
+        (*atomCountStack.template Top<unsigned>())++;
+    }
+
+    SizeType Append(SizeType l1, SizeType l2) {
+        SizeType old = l1;
+        while (GetState(l1).out != kRegexInvalidState)
+            l1 = GetState(l1).out;
+        GetState(l1).out = l2;
+        return old;
+    }
+
+    void Patch(SizeType l, SizeType s) {
+        for (SizeType next; l != kRegexInvalidState; l = next) {
+            next = GetState(l).out;
+            GetState(l).out = s;
+        }
+    }
+
+    bool Eval(Stack<Allocator>& operandStack, Operator op) {
+        switch (op) {
+            case kConcatenation:
+                RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
+                {
+                    Frag e2 = *operandStack.template Pop<Frag>(1);
+                    Frag e1 = *operandStack.template Pop<Frag>(1);
+                    Patch(e1.out, e2.start);
+                    *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
+                }
+                return true;
+
+            case kAlternation:
+                if (operandStack.GetSize() >= sizeof(Frag) * 2) {
+                    Frag e2 = *operandStack.template Pop<Frag>(1);
+                    Frag e1 = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(e1.start, e2.start, 0);
+                    *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
+                    return true;
+                }
+                return false;
+
+            case kZeroOrOne:
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
+                    return true;
+                }
+                return false;
+
+            case kZeroOrMore:
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    Patch(e.out, s);
+                    *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
+                    return true;
+                }
+                return false;
+
+            default: 
+                RAPIDJSON_ASSERT(op == kOneOrMore);
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    Patch(e.out, s);
+                    *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
+                    return true;
+                }
+                return false;
+        }
+    }
+
+    bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
+        RAPIDJSON_ASSERT(n <= m);
+        RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
+
+        if (n == 0) {
+            if (m == 0)                             // a{0} not support
+                return false;
+            else if (m == kInfinityQuantifier)
+                Eval(operandStack, kZeroOrMore);    // a{0,} -> a*
+            else {
+                Eval(operandStack, kZeroOrOne);         // a{0,5} -> a?
+                for (unsigned i = 0; i < m - 1; i++)
+                    CloneTopOperand(operandStack);      // a{0,5} -> a? a? a? a? a?
+                for (unsigned i = 0; i < m - 1; i++)
+                    Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
+            }
+            return true;
+        }
+
+        for (unsigned i = 0; i < n - 1; i++)        // a{3} -> a a a
+            CloneTopOperand(operandStack);
+
+        if (m == kInfinityQuantifier)
+            Eval(operandStack, kOneOrMore);         // a{3,} -> a a a+
+        else if (m > n) {
+            CloneTopOperand(operandStack);          // a{3,5} -> a a a a
+            Eval(operandStack, kZeroOrOne);         // a{3,5} -> a a a a?
+            for (unsigned i = n; i < m - 1; i++)
+                CloneTopOperand(operandStack);      // a{3,5} -> a a a a? a?
+            for (unsigned i = n; i < m; i++)
+                Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
+        }
+
+        for (unsigned i = 0; i < n - 1; i++)
+            Eval(operandStack, kConcatenation);     // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
+
+        return true;
+    }
+
+    static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
+
+    void CloneTopOperand(Stack<Allocator>& operandStack) {
+        const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
+        SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
+        State* s = states_.template Push<State>(count);
+        memcpy(s, &GetState(src.minIndex), count * sizeof(State));
+        for (SizeType j = 0; j < count; j++) {
+            if (s[j].out != kRegexInvalidState)
+                s[j].out += count;
+            if (s[j].out1 != kRegexInvalidState)
+                s[j].out1 += count;
+        }
+        *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
+        stateCount_ += count;
+    }
+
+    template <typename InputStream>
+    bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
+        unsigned r = 0;
+        if (ds.Peek() < '0' || ds.Peek() > '9')
+            return false;
+        while (ds.Peek() >= '0' && ds.Peek() <= '9') {
+            if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
+                return false; // overflow
+            r = r * 10 + (ds.Take() - '0');
+        }
+        *u = r;
+        return true;
+    }
+
+    template <typename InputStream>
+    bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
+        bool isBegin = true;
+        bool negate = false;
+        int step = 0;
+        SizeType start = kRegexInvalidRange;
+        SizeType current = kRegexInvalidRange;
+        unsigned codepoint;
+        while ((codepoint = ds.Take()) != 0) {
+            if (isBegin) {
+                isBegin = false;
+                if (codepoint == '^') {
+                    negate = true;
+                    continue;
+                }
+            }
+
+            switch (codepoint) {
+            case ']':
+                if (start == kRegexInvalidRange)
+                    return false;   // Error: nothing inside []
+                if (step == 2) { // Add trailing '-'
+                    SizeType r = NewRange('-');
+                    RAPIDJSON_ASSERT(current != kRegexInvalidRange);
+                    GetRange(current).next = r;
+                }
+                if (negate)
+                    GetRange(start).start |= kRangeNegationFlag;
+                *range = start;
+                return true;
+
+            case '\\':
+                if (ds.Peek() == 'b') {
+                    ds.Take();
+                    codepoint = 0x0008; // Escape backspace character
+                }
+                else if (!CharacterEscape(ds, &codepoint))
+                    return false;
+                // fall through to default
+
+            default:
+                switch (step) {
+                case 1:
+                    if (codepoint == '-') {
+                        step++;
+                        break;
+                    }
+                    // fall through to step 0 for other characters
+
+                case 0:
+                    {
+                        SizeType r = NewRange(codepoint);
+                        if (current != kRegexInvalidRange)
+                            GetRange(current).next = r;
+                        if (start == kRegexInvalidRange)
+                            start = r;
+                        current = r;
+                    }
+                    step = 1;
+                    break;
+
+                default:
+                    RAPIDJSON_ASSERT(step == 2);
+                    GetRange(current).end = codepoint;
+                    step = 0;
+                }
+            }
+        }
+        return false;
+    }
+    
+    SizeType NewRange(unsigned codepoint) {
+        Range* r = ranges_.template Push<Range>();
+        r->start = r->end = codepoint;
+        r->next = kRegexInvalidRange;
+        return rangeCount_++;
+    }
+
+    template <typename InputStream>
+    bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
+        unsigned codepoint;
+        switch (codepoint = ds.Take()) {
+            case '^':
+            case '$':
+            case '|':
+            case '(':
+            case ')':
+            case '?':
+            case '*':
+            case '+':
+            case '.':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '\\':
+                *escapedCodepoint = codepoint; return true;
+            case 'f': *escapedCodepoint = 0x000C; return true;
+            case 'n': *escapedCodepoint = 0x000A; return true;
+            case 'r': *escapedCodepoint = 0x000D; return true;
+            case 't': *escapedCodepoint = 0x0009; return true;
+            case 'v': *escapedCodepoint = 0x000B; return true;
+            default:
+                return false; // Unsupported escape character
+        }
+    }
+
+    Stack<Allocator> states_;
+    Stack<Allocator> ranges_;
+    SizeType root_;
+    SizeType stateCount_;
+    SizeType rangeCount_;
+
+    static const unsigned kInfinityQuantifier = ~0u;
+
+    // For SearchWithAnchoring()
+    bool anchorBegin_;
+    bool anchorEnd_;
+};
+
+template <typename RegexType, typename Allocator = CrtAllocator>
+class GenericRegexSearch {
+public:
+    typedef typename RegexType::EncodingType Encoding;
+    typedef typename Encoding::Ch Ch;
+
+    GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 
+        regex_(regex), allocator_(allocator), ownAllocator_(0),
+        state0_(allocator, 0), state1_(allocator, 0), stateSet_()
+    {
+        RAPIDJSON_ASSERT(regex_.IsValid());
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+        stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
+        state0_.template Reserve<SizeType>(regex_.stateCount_);
+        state1_.template Reserve<SizeType>(regex_.stateCount_);
+    }
+
+    ~GenericRegexSearch() {
+        Allocator::Free(stateSet_);
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    template <typename InputStream>
+    bool Match(InputStream& is) {
+        return SearchWithAnchoring(is, true, true);
+    }
+
+    bool Match(const Ch* s) {
+        GenericStringStream<Encoding> is(s);
+        return Match(is);
+    }
+
+    template <typename InputStream>
+    bool Search(InputStream& is) {
+        return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
+    }
+
+    bool Search(const Ch* s) {
+        GenericStringStream<Encoding> is(s);
+        return Search(is);
+    }
+
+private:
+    typedef typename RegexType::State State;
+    typedef typename RegexType::Range Range;
+
+    template <typename InputStream>
+    bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
+        DecodedStream<InputStream, Encoding> ds(is);
+
+        state0_.Clear();
+        Stack<Allocator> *current = &state0_, *next = &state1_;
+        const size_t stateSetSize = GetStateSetSize();
+        std::memset(stateSet_, 0, stateSetSize);
+
+        bool matched = AddState(*current, regex_.root_);
+        unsigned codepoint;
+        while (!current->Empty() && (codepoint = ds.Take()) != 0) {
+            std::memset(stateSet_, 0, stateSetSize);
+            next->Clear();
+            matched = false;
+            for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
+                const State& sr = regex_.GetState(*s);
+                if (sr.codepoint == codepoint ||
+                    sr.codepoint == RegexType::kAnyCharacterClass || 
+                    (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
+                {
+                    matched = AddState(*next, sr.out) || matched;
+                    if (!anchorEnd && matched)
+                        return true;
+                }
+                if (!anchorBegin)
+                    AddState(*next, regex_.root_);
+            }
+            internal::Swap(current, next);
+        }
+
+        return matched;
+    }
+
+    size_t GetStateSetSize() const {
+        return (regex_.stateCount_ + 31) / 32 * 4;
+    }
+
+    // Return whether the added states is a match state
+    bool AddState(Stack<Allocator>& l, SizeType index) {
+        RAPIDJSON_ASSERT(index != kRegexInvalidState);
+
+        const State& s = regex_.GetState(index);
+        if (s.out1 != kRegexInvalidState) { // Split
+            bool matched = AddState(l, s.out);
+            return AddState(l, s.out1) || matched;
+        }
+        else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
+            stateSet_[index >> 5] |= (1u << (index & 31));
+            *l.template PushUnsafe<SizeType>() = index;
+        }
+        return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
+    }
+
+    bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
+        bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
+        while (rangeIndex != kRegexInvalidRange) {
+            const Range& r = regex_.GetRange(rangeIndex);
+            if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
+                return yes;
+            rangeIndex = r.next;
+        }
+        return !yes;
+    }
+
+    const RegexType& regex_;
+    Allocator* allocator_;
+    Allocator* ownAllocator_;
+    Stack<Allocator> state0_;
+    Stack<Allocator> state1_;
+    uint32_t* stateSet_;
+};
+
+typedef GenericRegex<UTF8<> > Regex;
+typedef GenericRegexSearch<Regex> RegexSearch;
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_INTERNAL_REGEX_H_
diff --git a/rapidjson/internal/stack.h b/rapidjson/internal/stack.h
index 722d569..5c5398c 100644
--- a/rapidjson/internal/stack.h
+++ b/rapidjson/internal/stack.h
@@ -15,7 +15,13 @@
 #ifndef RAPIDJSON_INTERNAL_STACK_H_
 #define RAPIDJSON_INTERNAL_STACK_H_
 
-#include "../rapidjson.h"
+#include "../allocators.h"
+#include "swap.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
 
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
@@ -32,7 +38,6 @@ class Stack {
     // Optimization note: Do not allocate memory for stack_ in constructor.
     // Do it lazily when first Push() -> Expand() -> Resize().
     Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
-        RAPIDJSON_ASSERT(stackCapacity > 0);
     }
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
@@ -81,6 +86,15 @@ class Stack {
     }
 #endif
 
+    void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT {
+        internal::Swap(allocator_, rhs.allocator_);
+        internal::Swap(ownAllocator_, rhs.ownAllocator_);
+        internal::Swap(stack_, rhs.stack_);
+        internal::Swap(stackTop_, rhs.stackTop_);
+        internal::Swap(stackEnd_, rhs.stackEnd_);
+        internal::Swap(initialCapacity_, rhs.initialCapacity_);
+    }
+
     void Clear() { stackTop_ = stack_; }
 
     void ShrinkToFit() { 
@@ -98,11 +112,22 @@ class Stack {
     // Optimization note: try to minimize the size of this function for force inline.
     // Expansion is run very infrequently, so it is moved to another (probably non-inline) function.
     template<typename T>
-    RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) {
+    RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) {
          // Expand the stack if needed
-        if (stackTop_ + sizeof(T) * count >= stackEnd_)
+        if (RAPIDJSON_UNLIKELY(stackTop_ + sizeof(T) * count > stackEnd_))
             Expand<T>(count);
+    }
+
+    template<typename T>
+    RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) {
+        Reserve<T>(count);
+        return PushUnsafe<T>(count);
+    }
 
+    template<typename T>
+    RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) {
+        RAPIDJSON_ASSERT(stackTop_);
+        RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count <= stackEnd_);
         T* ret = reinterpret_cast<T*>(stackTop_);
         stackTop_ += sizeof(T) * count;
         return ret;
@@ -122,9 +147,32 @@ class Stack {
     }
 
     template<typename T>
-    T* Bottom() { return (T*)stack_; }
+    const T* Top() const {
+        RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
+        return reinterpret_cast<T*>(stackTop_ - sizeof(T));
+    }
+
+    template<typename T>
+    T* End() { return reinterpret_cast<T*>(stackTop_); }
+
+    template<typename T>
+    const T* End() const { return reinterpret_cast<T*>(stackTop_); }
+
+    template<typename T>
+    T* Bottom() { return reinterpret_cast<T*>(stack_); }
+
+    template<typename T>
+    const T* Bottom() const { return reinterpret_cast<T*>(stack_); }
+
+    bool HasAllocator() const {
+        return allocator_ != 0;
+    }
+
+    Allocator& GetAllocator() {
+        RAPIDJSON_ASSERT(allocator_);
+        return *allocator_;
+    }
 
-    Allocator& GetAllocator() { return *allocator_; }
     bool Empty() const { return stackTop_ == stack_; }
     size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); }
     size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); }
@@ -136,7 +184,7 @@ class Stack {
         size_t newCapacity;
         if (stack_ == 0) {
             if (!allocator_)
-                ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+                ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
             newCapacity = initialCapacity_;
         } else {
             newCapacity = GetCapacity();
@@ -151,7 +199,7 @@ class Stack {
 
     void Resize(size_t newCapacity) {
         const size_t size = GetSize();  // Backup the current size
-        stack_ = (char*)allocator_->Realloc(stack_, GetCapacity(), newCapacity);
+        stack_ = static_cast<char*>(allocator_->Realloc(stack_, GetCapacity(), newCapacity));
         stackTop_ = stack_ + size;
         stackEnd_ = stack_ + newCapacity;
     }
@@ -176,4 +224,8 @@ class Stack {
 } // namespace internal
 RAPIDJSON_NAMESPACE_END
 
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_STACK_H_
diff --git a/rapidjson/internal/strfunc.h b/rapidjson/internal/strfunc.h
index 8440506..226439a 100644
--- a/rapidjson/internal/strfunc.h
+++ b/rapidjson/internal/strfunc.h
@@ -15,7 +15,8 @@
 #ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
 #define RAPIDJSON_INTERNAL_STRFUNC_H_
 
-#include "../rapidjson.h"
+#include "../stream.h"
+#include <cwchar>
 
 RAPIDJSON_NAMESPACE_BEGIN
 namespace internal {
@@ -28,11 +29,40 @@ namespace internal {
 */
 template <typename Ch>
 inline SizeType StrLen(const Ch* s) {
+    RAPIDJSON_ASSERT(s != 0);
     const Ch* p = s;
     while (*p) ++p;
     return SizeType(p - s);
 }
 
+template <>
+inline SizeType StrLen(const char* s) {
+    return SizeType(std::strlen(s));
+}
+
+template <>
+inline SizeType StrLen(const wchar_t* s) {
+    return SizeType(std::wcslen(s));
+}
+
+//! Returns number of code points in a encoded string.
+template<typename Encoding>
+bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
+    RAPIDJSON_ASSERT(s != 0);
+    RAPIDJSON_ASSERT(outCount != 0);
+    GenericStringStream<Encoding> is(s);
+    const typename Encoding::Ch* end = s + length;
+    SizeType count = 0;
+    while (is.src_ < end) {
+        unsigned codepoint;
+        if (!Encoding::Decode(is, &codepoint))
+            return false;
+        count++;
+    }
+    *outCount = count;
+    return true;
+}
+
 } // namespace internal
 RAPIDJSON_NAMESPACE_END
 
diff --git a/rapidjson/internal/strtod.h b/rapidjson/internal/strtod.h
index bb2e3cd..adf49e3 100644
--- a/rapidjson/internal/strtod.h
+++ b/rapidjson/internal/strtod.h
@@ -15,7 +15,6 @@
 #ifndef RAPIDJSON_STRTOD_
 #define RAPIDJSON_STRTOD_
 
-#include "../rapidjson.h"
 #include "ieee754.h"
 #include "biginteger.h"
 #include "diyfp.h"
@@ -95,13 +94,13 @@ inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) {
     hS_Exp2 -= common_Exp2;
 
     BigInteger dS = d;
-    dS.MultiplyPow5(dS_Exp5) <<= dS_Exp2;
+    dS.MultiplyPow5(static_cast<unsigned>(dS_Exp5)) <<= static_cast<unsigned>(dS_Exp2);
 
     BigInteger bS(bInt);
-    bS.MultiplyPow5(bS_Exp5) <<= bS_Exp2;
+    bS.MultiplyPow5(static_cast<unsigned>(bS_Exp5)) <<= static_cast<unsigned>(bS_Exp2);
 
     BigInteger hS(1);
-    hS.MultiplyPow5(hS_Exp5) <<= hS_Exp2;
+    hS.MultiplyPow5(static_cast<unsigned>(hS_Exp5)) <<= static_cast<unsigned>(hS_Exp2);
 
     BigInteger delta(0);
     dS.Difference(bS, &delta);
@@ -134,22 +133,22 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
         if (significand  >  RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
             (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5'))
             break;
-        significand = significand * 10 + (decimals[i] - '0');
+        significand = significand * 10u + static_cast<unsigned>(decimals[i] - '0');
     }
     
     if (i < length && decimals[i] >= '5') // Rounding
         significand++;
 
     size_t remaining = length - i;
-    const unsigned kUlpShift = 3;
-    const unsigned kUlp = 1 << kUlpShift;
-    int error = (remaining == 0) ? 0 : kUlp / 2;
+    const int kUlpShift = 3;
+    const int kUlp = 1 << kUlpShift;
+    int64_t error = (remaining == 0) ? 0 : kUlp / 2;
 
     DiyFp v(significand, 0);
     v = v.Normalize();
     error <<= -v.e;
 
-    const int dExp = (int)decimalPosition - (int)i + exp;
+    const int dExp = static_cast<int>(decimalPosition) - static_cast<int>(i) + exp;
 
     int actualExp;
     DiyFp cachedPower = GetCachedPower10(dExp, &actualExp);
@@ -163,10 +162,10 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
             DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 00000000), -44),  // 10^6
             DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 00000000), -40)   // 10^7
         };
-        int adjustment = dExp - actualExp - 1;
+        int  adjustment = dExp - actualExp - 1;
         RAPIDJSON_ASSERT(adjustment >= 0 && adjustment < 7);
         v = v * kPow10[adjustment];
-        if (length + adjustment > 19) // has more digits than decimal digits in 64-bit
+        if (length + static_cast<unsigned>(adjustment)> 19u) // has more digits than decimal digits in 64-bit
             error += kUlp / 2;
     }
 
@@ -178,10 +177,10 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
     v = v.Normalize();
     error <<= oldExp - v.e;
 
-    const unsigned effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
-    unsigned precisionSize = 64 - effectiveSignificandSize;
+    const int effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
+    int precisionSize = 64 - effectiveSignificandSize;
     if (precisionSize + kUlpShift >= 64) {
-        unsigned scaleExp = (precisionSize + kUlpShift) - 63;
+        int scaleExp = (precisionSize + kUlpShift) - 63;
         v.f >>= scaleExp;
         v.e += scaleExp; 
         error = (error >> scaleExp) + 1 + kUlp;
@@ -191,7 +190,7 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
     DiyFp rounded(v.f >> precisionSize, v.e + precisionSize);
     const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp;
     const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp;
-    if (precisionBits >= halfWay + error) {
+    if (precisionBits >= halfWay + static_cast<unsigned>(error)) {
         rounded.f++;
         if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340)
             rounded.f >>= 1;
@@ -201,12 +200,12 @@ inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosit
 
     *result = rounded.ToDouble();
 
-    return halfWay - error >= precisionBits || precisionBits >= halfWay + error;
+    return halfWay - static_cast<unsigned>(error) >= precisionBits || precisionBits >= halfWay + static_cast<unsigned>(error);
 }
 
 inline double StrtodBigInteger(double approx, const char* decimals, size_t length, size_t decimalPosition, int exp) {
     const BigInteger dInt(decimals, length);
-    const int dExp = (int)decimalPosition - (int)length + exp;
+    const int dExp = static_cast<int>(decimalPosition) - static_cast<int>(length) + exp;
     Double a(approx);
     int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp);
     if (cmp < 0)
@@ -246,10 +245,10 @@ inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t
 
     // Trim right-most digits
     const int kMaxDecimalDigit = 780;
-    if ((int)length > kMaxDecimalDigit) {
-        int delta = (int(length) - kMaxDecimalDigit);
+    if (static_cast<int>(length) > kMaxDecimalDigit) {
+        int delta = (static_cast<int>(length) - kMaxDecimalDigit);
         exp += delta;
-        decimalPosition -= (size_t)delta;
+        decimalPosition -= static_cast<unsigned>(delta);
         length = kMaxDecimalDigit;
     }
 
diff --git a/rapidjson/internal/swap.h b/rapidjson/internal/swap.h
new file mode 100644
index 0000000..666e49f
--- /dev/null
+++ b/rapidjson/internal/swap.h
@@ -0,0 +1,46 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_SWAP_H_
+#define RAPIDJSON_INTERNAL_SWAP_H_
+
+#include "../rapidjson.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+//! Custom swap() to avoid dependency on C++ <algorithm> header
+/*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only.
+    \note This has the same semantics as std::swap().
+*/
+template <typename T>
+inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT {
+    T tmp = a;
+        a = b;
+        b = tmp;
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_INTERNAL_SWAP_H_
diff --git a/rapidjson/istreamwrapper.h b/rapidjson/istreamwrapper.h
new file mode 100644
index 0000000..8639c8c
--- /dev/null
+++ b/rapidjson/istreamwrapper.h
@@ -0,0 +1,115 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ISTREAMWRAPPER_H_
+#define RAPIDJSON_ISTREAMWRAPPER_H_
+
+#include "stream.h"
+#include <iosfwd>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4351) // new behavior: elements of array 'array' will be default initialized
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Wrapper of \c std::basic_istream into RapidJSON's Stream concept.
+/*!
+    The classes can be wrapped including but not limited to:
+
+    - \c std::istringstream
+    - \c std::stringstream
+    - \c std::wistringstream
+    - \c std::wstringstream
+    - \c std::ifstream
+    - \c std::fstream
+    - \c std::wifstream
+    - \c std::wfstream
+
+    \tparam StreamType Class derived from \c std::basic_istream.
+*/
+   
+template <typename StreamType>
+class BasicIStreamWrapper {
+public:
+    typedef typename StreamType::char_type Ch;
+    BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {}
+
+    Ch Peek() const { 
+        typename StreamType::int_type c = stream_.peek();
+        return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : static_cast<Ch>('\0');
+    }
+
+    Ch Take() { 
+        typename StreamType::int_type c = stream_.get();
+        if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) {
+            count_++;
+            return static_cast<Ch>(c);
+        }
+        else
+            return '\0';
+    }
+
+    // tellg() may return -1 when failed. So we count by ourself.
+    size_t Tell() const { return count_; }
+
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    // For encoding detection only.
+    const Ch* Peek4() const {
+        RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream.
+        int i;
+        bool hasError = false;
+        for (i = 0; i < 4; ++i) {
+            typename StreamType::int_type c = stream_.get();
+            if (c == StreamType::traits_type::eof()) {
+                hasError = true;
+                stream_.clear();
+                break;
+            }
+            peekBuffer_[i] = static_cast<Ch>(c);
+        }
+        for (--i; i >= 0; --i)
+            stream_.putback(peekBuffer_[i]);
+        return !hasError ? peekBuffer_ : 0;
+    }
+
+private:
+    BasicIStreamWrapper(const BasicIStreamWrapper&);
+    BasicIStreamWrapper& operator=(const BasicIStreamWrapper&);
+
+    StreamType& stream_;
+    size_t count_;  //!< Number of characters read. Note:
+    mutable Ch peekBuffer_[4];
+};
+
+typedef BasicIStreamWrapper<std::istream> IStreamWrapper;
+typedef BasicIStreamWrapper<std::wistream> WIStreamWrapper;
+
+#if defined(__clang__) || defined(_MSC_VER)
+RAPIDJSON_DIAG_POP
+#endif
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_ISTREAMWRAPPER_H_
diff --git a/rapidjson/memorybuffer.h b/rapidjson/memorybuffer.h
index 2484b21..39bee1d 100644
--- a/rapidjson/memorybuffer.h
+++ b/rapidjson/memorybuffer.h
@@ -15,7 +15,7 @@
 #ifndef RAPIDJSON_MEMORYBUFFER_H_
 #define RAPIDJSON_MEMORYBUFFER_H_
 
-#include "rapidjson.h"
+#include "stream.h"
 #include "internal/stack.h"
 
 RAPIDJSON_NAMESPACE_BEGIN
diff --git a/rapidjson/memorystream.h b/rapidjson/memorystream.h
index 99feae5..1d71d8a 100644
--- a/rapidjson/memorystream.h
+++ b/rapidjson/memorystream.h
@@ -15,7 +15,13 @@
 #ifndef RAPIDJSON_MEMORYSTREAM_H_
 #define RAPIDJSON_MEMORYSTREAM_H_
 
-#include "rapidjson.h"
+#include "stream.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(missing-noreturn)
+#endif
 
 RAPIDJSON_NAMESPACE_BEGIN
 
@@ -36,8 +42,8 @@ struct MemoryStream {
 
     MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {}
 
-    Ch Peek() const { return (src_ == end_) ? '\0' : *src_; }
-    Ch Take() { return (src_ == end_) ? '\0' : *src_++; }
+    Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; }
+    Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; }
     size_t Tell() const { return static_cast<size_t>(src_ - begin_); }
 
     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
@@ -58,4 +64,8 @@ struct MemoryStream {
 
 RAPIDJSON_NAMESPACE_END
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_MEMORYBUFFER_H_
diff --git a/rapidjson/msinttypes/stdint.h b/rapidjson/msinttypes/stdint.h
index a26fff4..3d4477b 100644
--- a/rapidjson/msinttypes/stdint.h
+++ b/rapidjson/msinttypes/stdint.h
@@ -89,14 +89,14 @@
 #include <limits.h>
 
 // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
-// or compiler give many errors like this:
+// compiling for ARM we have to wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler would give many errors like this:
 //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(_M_ARM)
 extern "C" {
 #endif
 #  include <wchar.h>
-#ifdef __cplusplus
+#if defined(__cplusplus) && !defined(_M_ARM)
 }
 #endif
 
diff --git a/rapidjson/ostreamwrapper.h b/rapidjson/ostreamwrapper.h
new file mode 100644
index 0000000..6f4667c
--- /dev/null
+++ b/rapidjson/ostreamwrapper.h
@@ -0,0 +1,81 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_OSTREAMWRAPPER_H_
+#define RAPIDJSON_OSTREAMWRAPPER_H_
+
+#include "stream.h"
+#include <iosfwd>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Wrapper of \c std::basic_ostream into RapidJSON's Stream concept.
+/*!
+    The classes can be wrapped including but not limited to:
+
+    - \c std::ostringstream
+    - \c std::stringstream
+    - \c std::wpstringstream
+    - \c std::wstringstream
+    - \c std::ifstream
+    - \c std::fstream
+    - \c std::wofstream
+    - \c std::wfstream
+
+    \tparam StreamType Class derived from \c std::basic_ostream.
+*/
+   
+template <typename StreamType>
+class BasicOStreamWrapper {
+public:
+    typedef typename StreamType::char_type Ch;
+    BasicOStreamWrapper(StreamType& stream) : stream_(stream) {}
+
+    void Put(Ch c) {
+        stream_.put(c);
+    }
+
+    void Flush() {
+        stream_.flush();
+    }
+
+    // Not implemented
+    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
+    char Take() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    BasicOStreamWrapper(const BasicOStreamWrapper&);
+    BasicOStreamWrapper& operator=(const BasicOStreamWrapper&);
+
+    StreamType& stream_;
+};
+
+typedef BasicOStreamWrapper<std::ostream> OStreamWrapper;
+typedef BasicOStreamWrapper<std::wostream> WOStreamWrapper;
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_OSTREAMWRAPPER_H_
diff --git a/rapidjson/pointer.h b/rapidjson/pointer.h
new file mode 100644
index 0000000..0f377ef
--- /dev/null
+++ b/rapidjson/pointer.h
@@ -0,0 +1,1358 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_POINTER_H_
+#define RAPIDJSON_POINTER_H_
+
+#include "document.h"
+#include "internal/itoa.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(switch-enum)
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+static const SizeType kPointerInvalidIndex = ~SizeType(0);  //!< Represents an invalid index in GenericPointer::Token
+
+//! Error code of parsing.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode
+*/
+enum PointerParseErrorCode {
+    kPointerParseErrorNone = 0,                     //!< The parse is successful
+
+    kPointerParseErrorTokenMustBeginWithSolidus,    //!< A token must begin with a '/'
+    kPointerParseErrorInvalidEscape,                //!< Invalid escape
+    kPointerParseErrorInvalidPercentEncoding,       //!< Invalid percent encoding in URI fragment
+    kPointerParseErrorCharacterMustPercentEncode    //!< A character must percent encoded in URI fragment
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericPointer
+
+//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator.
+/*!
+    This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" 
+    (https://tools.ietf.org/html/rfc6901).
+
+    A JSON pointer is for identifying a specific value in a JSON document
+    (GenericDocument). It can simplify coding of DOM tree manipulation, because it
+    can access multiple-level depth of DOM tree with single API call.
+
+    After it parses a string representation (e.g. "/foo/0" or URI fragment 
+    representation (e.g. "#/foo/0") into its internal representation (tokens),
+    it can be used to resolve a specific value in multiple documents, or sub-tree 
+    of documents.
+
+    Contrary to GenericValue, Pointer can be copy constructed and copy assigned.
+    Apart from assignment, a Pointer cannot be modified after construction.
+
+    Although Pointer is very convenient, please aware that constructing Pointer
+    involves parsing and dynamic memory allocation. A special constructor with user-
+    supplied tokens eliminates these.
+
+    GenericPointer depends on GenericDocument and GenericValue.
+    
+    \tparam ValueType The value type of the DOM tree. E.g. GenericValue<UTF8<> >
+    \tparam Allocator The allocator type for allocating memory for internal representation.
+    
+    \note GenericPointer uses same encoding of ValueType.
+    However, Allocator of GenericPointer is independent of Allocator of Value.
+*/
+template <typename ValueType, typename Allocator = CrtAllocator>
+class GenericPointer {
+public:
+    typedef typename ValueType::EncodingType EncodingType;  //!< Encoding type from Value
+    typedef typename ValueType::Ch Ch;                      //!< Character type from Value
+
+    //! A token is the basic units of internal representation.
+    /*!
+        A JSON pointer string representation "/foo/123" is parsed to two tokens: 
+        "foo" and 123. 123 will be represented in both numeric form and string form.
+        They are resolved according to the actual value type (object or array).
+
+        For token that are not numbers, or the numeric value is out of bound
+        (greater than limits of SizeType), they are only treated as string form
+        (i.e. the token's index will be equal to kPointerInvalidIndex).
+
+        This struct is public so that user can create a Pointer without parsing and 
+        allocation, using a special constructor.
+    */
+    struct Token {
+        const Ch* name;             //!< Name of the token. It has null character at the end but it can contain null character.
+        SizeType length;            //!< Length of the name.
+        SizeType index;             //!< A valid array index, if it is not equal to kPointerInvalidIndex.
+    };
+
+    //!@name Constructors and destructor.
+    //@{
+
+    //! Default constructor.
+    GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+
+    //! Constructor that parses a string or URI fragment representation.
+    /*!
+        \param source A null-terminated, string or URI fragment representation of JSON pointer.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+    */
+    explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source, internal::StrLen(source));
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Constructor that parses a string or URI fragment representation.
+    /*!
+        \param source A string or URI fragment representation of JSON pointer.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+    */
+    explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source.c_str(), source.size());
+    }
+#endif
+
+    //! Constructor that parses a string or URI fragment representation, with length of the source string.
+    /*!
+        \param source A string or URI fragment representation of JSON pointer.
+        \param length Length of source.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+        \note Slightly faster than the overload without length.
+    */
+    GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source, length);
+    }
+
+    //! Constructor with user-supplied tokens.
+    /*!
+        This constructor let user supplies const array of tokens.
+        This prevents the parsing process and eliminates allocation.
+        This is preferred for memory constrained environments.
+
+        \param tokens An constant array of tokens representing the JSON pointer.
+        \param tokenCount Number of tokens.
+
+        \b Example
+        \code
+        #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex }
+        #define INDEX(i) { #i, sizeof(#i) - 1, i }
+
+        static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) };
+        static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0]));
+        // Equivalent to static const Pointer p("/foo/123");
+
+        #undef NAME
+        #undef INDEX
+        \endcode
+    */
+    GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+
+    //! Copy constructor.
+    GenericPointer(const GenericPointer& rhs, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        *this = rhs;
+    }
+
+    //! Destructor.
+    ~GenericPointer() {
+        if (nameBuffer_)    // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated.
+            Allocator::Free(tokens_);
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    //! Assignment operator.
+    GenericPointer& operator=(const GenericPointer& rhs) {
+        if (this != &rhs) {
+            // Do not delete ownAllcator
+            if (nameBuffer_)
+                Allocator::Free(tokens_);
+
+            tokenCount_ = rhs.tokenCount_;
+            parseErrorOffset_ = rhs.parseErrorOffset_;
+            parseErrorCode_ = rhs.parseErrorCode_;
+
+            if (rhs.nameBuffer_)
+                CopyFromRaw(rhs); // Normally parsed tokens.
+            else {
+                tokens_ = rhs.tokens_; // User supplied const tokens.
+                nameBuffer_ = 0;
+            }
+        }
+        return *this;
+    }
+
+    //@}
+
+    //!@name Append token
+    //@{
+
+    //! Append a token and return a new Pointer
+    /*!
+        \param token Token to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const Token& token, Allocator* allocator = 0) const {
+        GenericPointer r;
+        r.allocator_ = allocator;
+        Ch *p = r.CopyFromRaw(*this, 1, token.length + 1);
+        std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch));
+        r.tokens_[tokenCount_].name = p;
+        r.tokens_[tokenCount_].length = token.length;
+        r.tokens_[tokenCount_].index = token.index;
+        return r;
+    }
+
+    //! Append a name token with length, and return a new Pointer
+    /*!
+        \param name Name to be appended.
+        \param length Length of name.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const {
+        Token token = { name, length, kPointerInvalidIndex };
+        return Append(token, allocator);
+    }
+
+    //! Append a name token without length, and return a new Pointer
+    /*!
+        \param name Name (const Ch*) to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer))
+    Append(T* name, Allocator* allocator = 0) const {
+        return Append(name, internal::StrLen(name), allocator);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Append a name token, and return a new Pointer
+    /*!
+        \param name Name to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const {
+        return Append(name.c_str(), static_cast<SizeType>(name.size()), allocator);
+    }
+#endif
+
+    //! Append a index token, and return a new Pointer
+    /*!
+        \param index Index to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(SizeType index, Allocator* allocator = 0) const {
+        char buffer[21];
+        char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer);
+        SizeType length = static_cast<SizeType>(end - buffer);
+        buffer[length] = '\0';
+
+        if (sizeof(Ch) == 1) {
+            Token token = { reinterpret_cast<Ch*>(buffer), length, index };
+            return Append(token, allocator);
+        }
+        else {
+            Ch name[21];
+            for (size_t i = 0; i <= length; i++)
+                name[i] = static_cast<Ch>(buffer[i]);
+            Token token = { name, length, index };
+            return Append(token, allocator);
+        }
+    }
+
+    //! Append a token by value, and return a new Pointer
+    /*!
+        \param token token to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const {
+        if (token.IsString())
+            return Append(token.GetString(), token.GetStringLength(), allocator);
+        else {
+            RAPIDJSON_ASSERT(token.IsUint64());
+            RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0));
+            return Append(static_cast<SizeType>(token.GetUint64()), allocator);
+        }
+    }
+
+    //!@name Handling Parse Error
+    //@{
+
+    //! Check whether this is a valid pointer.
+    bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; }
+
+    //! Get the parsing error offset in code unit.
+    size_t GetParseErrorOffset() const { return parseErrorOffset_; }
+
+    //! Get the parsing error code.
+    PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; }
+
+    //@}
+
+    //! Get the allocator of this pointer.
+    Allocator& GetAllocator() { return *allocator_; }
+
+    //!@name Tokens
+    //@{
+
+    //! Get the token array (const version only).
+    const Token* GetTokens() const { return tokens_; }
+
+    //! Get the number of tokens.
+    size_t GetTokenCount() const { return tokenCount_; }
+
+    //@}
+
+    //!@name Equality/inequality operators
+    //@{
+
+    //! Equality operator.
+    /*!
+        \note When any pointers are invalid, always returns false.
+    */
+    bool operator==(const GenericPointer& rhs) const {
+        if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_)
+            return false;
+
+        for (size_t i = 0; i < tokenCount_; i++) {
+            if (tokens_[i].index != rhs.tokens_[i].index ||
+                tokens_[i].length != rhs.tokens_[i].length || 
+                (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0))
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    //! Inequality operator.
+    /*!
+        \note When any pointers are invalid, always returns true.
+    */
+    bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); }
+
+    //@}
+
+    //!@name Stringify
+    //@{
+
+    //! Stringify the pointer into string representation.
+    /*!
+        \tparam OutputStream Type of output stream.
+        \param os The output stream.
+    */
+    template<typename OutputStream>
+    bool Stringify(OutputStream& os) const {
+        return Stringify<false, OutputStream>(os);
+    }
+
+    //! Stringify the pointer into URI fragment representation.
+    /*!
+        \tparam OutputStream Type of output stream.
+        \param os The output stream.
+    */
+    template<typename OutputStream>
+    bool StringifyUriFragment(OutputStream& os) const {
+        return Stringify<true, OutputStream>(os);
+    }
+
+    //@}
+
+    //!@name Create value
+    //@{
+
+    //! Create a value in a subtree.
+    /*!
+        If the value is not exist, it creates all parent values and a JSON Null value.
+        So it always succeed and return the newly created or existing value.
+
+        Remind that it may change types of parents according to tokens, so it 
+        potentially removes previously stored values. For example, if a document 
+        was an array, and "/foo" is used to create a value, then the document 
+        will be changed to an object, and all existing array elements are lost.
+
+        \param root Root value of a DOM subtree to be resolved. It can be any value other than document root.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
+        \return The resolved newly created (a JSON Null value), or already exists value.
+    */
+    ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const {
+        RAPIDJSON_ASSERT(IsValid());
+        ValueType* v = &root;
+        bool exist = true;
+        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            if (v->IsArray() && t->name[0] == '-' && t->length == 1) {
+                v->PushBack(ValueType().Move(), allocator);
+                v = &((*v)[v->Size() - 1]);
+                exist = false;
+            }
+            else {
+                if (t->index == kPointerInvalidIndex) { // must be object name
+                    if (!v->IsObject())
+                        v->SetObject(); // Change to Object
+                }
+                else { // object name or array index
+                    if (!v->IsArray() && !v->IsObject())
+                        v->SetArray(); // Change to Array
+                }
+
+                if (v->IsArray()) {
+                    if (t->index >= v->Size()) {
+                        v->Reserve(t->index + 1, allocator);
+                        while (t->index >= v->Size())
+                            v->PushBack(ValueType().Move(), allocator);
+                        exist = false;
+                    }
+                    v = &((*v)[t->index]);
+                }
+                else {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd()) {
+                        v->AddMember(ValueType(t->name, t->length, allocator).Move(), ValueType().Move(), allocator);
+                        v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end
+                        exist = false;
+                    }
+                    else
+                        v = &m->value;
+                }
+            }
+        }
+
+        if (alreadyExist)
+            *alreadyExist = exist;
+
+        return *v;
+    }
+
+    //! Creates a value in a document.
+    /*!
+        \param document A document to be resolved.
+        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
+        \return The resolved newly created, or already exists value.
+    */
+    template <typename stackAllocator>
+    ValueType& Create(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, bool* alreadyExist = 0) const {
+        return Create(document, document.GetAllocator(), alreadyExist);
+    }
+
+    //@}
+
+    //!@name Query value
+    //@{
+
+    //! Query a value in a subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token.
+        \return Pointer to the value if it can be resolved. Otherwise null.
+
+        \note
+        There are only 3 situations when a value cannot be resolved:
+        1. A value in the path is not an array nor object.
+        2. An object value does not contain the token.
+        3. A token is out of range of an array value.
+
+        Use unresolvedTokenIndex to retrieve the token index.
+    */
+    ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const {
+        RAPIDJSON_ASSERT(IsValid());
+        ValueType* v = &root;
+        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            switch (v->GetType()) {
+            case kObjectType:
+                {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd())
+                        break;
+                    v = &m->value;
+                }
+                continue;
+            case kArrayType:
+                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                    break;
+                v = &((*v)[t->index]);
+                continue;
+            default:
+                break;
+            }
+
+            // Error: unresolved token
+            if (unresolvedTokenIndex)
+                *unresolvedTokenIndex = static_cast<size_t>(t - tokens_);
+            return 0;
+        }
+        return v;
+    }
+
+    //! Query a const value in a const subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \return Pointer to the value if it can be resolved. Otherwise null.
+    */
+    const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { 
+        return Get(const_cast<ValueType&>(root), unresolvedTokenIndex);
+    }
+
+    //@}
+
+    //!@name Query a value with default
+    //@{
+
+    //! Query a value in a subtree with default value.
+    /*!
+        Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value.
+        So that this function always succeed.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param defaultValue Default value to be cloned if the value was not exists.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        Value& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.CopyFrom(defaultValue, allocator);
+    }
+
+    //! Query a value in a subtree with default null-terminated string.
+    ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        Value& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.SetString(defaultValue, allocator);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Query a value in a subtree with default std::basic_string.
+    ValueType& GetWithDefault(ValueType& root, const std::basic_string<Ch>& defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        Value& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.SetString(defaultValue, allocator);
+    }
+#endif
+
+    //! Query a value in a subtree with default primitive value.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const {
+        return GetWithDefault(root, ValueType(defaultValue).Move(), allocator);
+    }
+
+    //! Query a value in a document with default value.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+
+    //! Query a value in a document with default null-terminated string.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+    
+#if RAPIDJSON_HAS_STDSTRING
+    //! Query a value in a document with default std::basic_string.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+#endif
+
+    //! Query a value in a document with default primitive value.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T, typename stackAllocator>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+
+    //@}
+
+    //!@name Set a value
+    //@{
+
+    //! Set a value in a subtree, with move semantics.
+    /*!
+        It creates all parents if they are not exist or types are different to the tokens.
+        So this function always succeeds but potentially remove existing values.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param value Value to be set.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = value;
+    }
+
+    //! Set a value in a subtree, with copy semantics.
+    ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator).CopyFrom(value, allocator);
+    }
+
+    //! Set a null-terminated string in a subtree.
+    ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value, allocator).Move();
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Set a std::basic_string in a subtree.
+    ValueType& Set(ValueType& root, const std::basic_string<Ch>& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value, allocator).Move();
+    }
+#endif
+
+    //! Set a primitive value in a subtree.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value).Move();
+    }
+
+    //! Set a value in a document, with move semantics.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+        return Create(document) = value;
+    }
+
+    //! Set a value in a document, with copy semantics.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& value) const {
+        return Create(document).CopyFrom(value, document.GetAllocator());
+    }
+
+    //! Set a null-terminated string in a document.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* value) const {
+        return Create(document) = ValueType(value, document.GetAllocator()).Move();
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Sets a std::basic_string in a document.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& value) const {
+        return Create(document) = ValueType(value, document.GetAllocator()).Move();
+    }
+#endif
+
+    //! Set a primitive value in a document.
+    /*!
+    \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T, typename stackAllocator>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+        Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T value) const {
+            return Create(document) = value;
+    }
+
+    //@}
+
+    //!@name Swap a value
+    //@{
+
+    //! Swap a value with a value in a subtree.
+    /*!
+        It creates all parents if they are not exist or types are different to the tokens.
+        So this function always succeeds but potentially remove existing values.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param value Value to be swapped.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator).Swap(value);
+    }
+
+    //! Swap a value with a value in a document.
+    template <typename stackAllocator>
+    ValueType& Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+        return Create(document).Swap(value);
+    }
+
+    //@}
+
+    //! Erase a value in a subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \return Whether the resolved value is found and erased.
+
+        \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false.
+    */
+    bool Erase(ValueType& root) const {
+        RAPIDJSON_ASSERT(IsValid());
+        if (tokenCount_ == 0) // Cannot erase the root
+            return false;
+
+        ValueType* v = &root;
+        const Token* last = tokens_ + (tokenCount_ - 1);
+        for (const Token *t = tokens_; t != last; ++t) {
+            switch (v->GetType()) {
+            case kObjectType:
+                {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd())
+                        return false;
+                    v = &m->value;
+                }
+                break;
+            case kArrayType:
+                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                    return false;
+                v = &((*v)[t->index]);
+                break;
+            default:
+                return false;
+            }
+        }
+
+        switch (v->GetType()) {
+        case kObjectType:
+            return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
+        case kArrayType:
+            if (last->index == kPointerInvalidIndex || last->index >= v->Size())
+                return false;
+            v->Erase(v->Begin() + last->index);
+            return true;
+        default:
+            return false;
+        }
+    }
+
+private:
+    //! Clone the content from rhs to this.
+    /*!
+        \param rhs Source pointer.
+        \param extraToken Extra tokens to be allocated.
+        \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated.
+        \return Start of non-occupied name buffer, for storing extra names.
+    */
+    Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) {
+        if (!allocator_) // allocator is independently owned.
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens
+        for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
+            nameBufferSize += t->length;
+
+        tokenCount_ = rhs.tokenCount_ + extraToken;
+        tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch)));
+        nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
+        if (rhs.tokenCount_ > 0) {
+            std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token));
+        }
+        if (nameBufferSize > 0) {
+            std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch));
+        }
+
+        // Adjust pointers to name buffer
+        std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_;
+        for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t)
+            t->name += diff;
+
+        return nameBuffer_ + nameBufferSize;
+    }
+
+    //! Check whether a character should be percent-encoded.
+    /*!
+        According to RFC 3986 2.3 Unreserved Characters.
+        \param c The character (code unit) to be tested.
+    */
+    bool NeedPercentEncode(Ch c) const {
+        return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~');
+    }
+
+    //! Parse a JSON String or its URI fragment representation into tokens.
+#ifndef __clang__ // -Wdocumentation
+    /*!
+        \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated.
+        \param length Length of the source string.
+        \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped.
+    */
+#endif
+    void Parse(const Ch* source, size_t length) {
+        RAPIDJSON_ASSERT(source != NULL);
+        RAPIDJSON_ASSERT(nameBuffer_ == 0);
+        RAPIDJSON_ASSERT(tokens_ == 0);
+
+        // Create own allocator if user did not supply.
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        // Count number of '/' as tokenCount
+        tokenCount_ = 0;
+        for (const Ch* s = source; s != source + length; s++) 
+            if (*s == '/')
+                tokenCount_++;
+
+        Token* token = tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch)));
+        Ch* name = nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
+        size_t i = 0;
+
+        // Detect if it is a URI fragment
+        bool uriFragment = false;
+        if (source[i] == '#') {
+            uriFragment = true;
+            i++;
+        }
+
+        if (i != length && source[i] != '/') {
+            parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus;
+            goto error;
+        }
+
+        while (i < length) {
+            RAPIDJSON_ASSERT(source[i] == '/');
+            i++; // consumes '/'
+
+            token->name = name;
+            bool isNumber = true;
+
+            while (i < length && source[i] != '/') {
+                Ch c = source[i];
+                if (uriFragment) {
+                    // Decoding percent-encoding for URI fragment
+                    if (c == '%') {
+                        PercentDecodeStream is(&source[i], source + length);
+                        GenericInsituStringStream<EncodingType> os(name);
+                        Ch* begin = os.PutBegin();
+                        if (!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid()) {
+                            parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding;
+                            goto error;
+                        }
+                        size_t len = os.PutEnd(begin);
+                        i += is.Tell() - 1;
+                        if (len == 1)
+                            c = *name;
+                        else {
+                            name += len;
+                            isNumber = false;
+                            i++;
+                            continue;
+                        }
+                    }
+                    else if (NeedPercentEncode(c)) {
+                        parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode;
+                        goto error;
+                    }
+                }
+
+                i++;
+                
+                // Escaping "~0" -> '~', "~1" -> '/'
+                if (c == '~') {
+                    if (i < length) {
+                        c = source[i];
+                        if (c == '0')       c = '~';
+                        else if (c == '1')  c = '/';
+                        else {
+                            parseErrorCode_ = kPointerParseErrorInvalidEscape;
+                            goto error;
+                        }
+                        i++;
+                    }
+                    else {
+                        parseErrorCode_ = kPointerParseErrorInvalidEscape;
+                        goto error;
+                    }
+                }
+
+                // First check for index: all of characters are digit
+                if (c < '0' || c > '9')
+                    isNumber = false;
+
+                *name++ = c;
+            }
+            token->length = static_cast<SizeType>(name - token->name);
+            if (token->length == 0)
+                isNumber = false;
+            *name++ = '\0'; // Null terminator
+
+            // Second check for index: more than one digit cannot have leading zero
+            if (isNumber && token->length > 1 && token->name[0] == '0')
+                isNumber = false;
+
+            // String to SizeType conversion
+            SizeType n = 0;
+            if (isNumber) {
+                for (size_t j = 0; j < token->length; j++) {
+                    SizeType m = n * 10 + static_cast<SizeType>(token->name[j] - '0');
+                    if (m < n) {   // overflow detection
+                        isNumber = false;
+                        break;
+                    }
+                    n = m;
+                }
+            }
+
+            token->index = isNumber ? n : kPointerInvalidIndex;
+            token++;
+        }
+
+        RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer
+        parseErrorCode_ = kPointerParseErrorNone;
+        return;
+
+    error:
+        Allocator::Free(tokens_);
+        nameBuffer_ = 0;
+        tokens_ = 0;
+        tokenCount_ = 0;
+        parseErrorOffset_ = i;
+        return;
+    }
+
+    //! Stringify to string or URI fragment representation.
+    /*!
+        \tparam uriFragment True for stringifying to URI fragment representation. False for string representation.
+        \tparam OutputStream type of output stream.
+        \param os The output stream.
+    */
+    template<bool uriFragment, typename OutputStream>
+    bool Stringify(OutputStream& os) const {
+        RAPIDJSON_ASSERT(IsValid());
+
+        if (uriFragment)
+            os.Put('#');
+
+        for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            os.Put('/');
+            for (size_t j = 0; j < t->length; j++) {
+                Ch c = t->name[j];
+                if (c == '~') {
+                    os.Put('~');
+                    os.Put('0');
+                }
+                else if (c == '/') {
+                    os.Put('~');
+                    os.Put('1');
+                }
+                else if (uriFragment && NeedPercentEncode(c)) { 
+                    // Transcode to UTF8 sequence
+                    GenericStringStream<typename ValueType::EncodingType> source(&t->name[j]);
+                    PercentEncodeStream<OutputStream> target(os);
+                    if (!Transcoder<EncodingType, UTF8<> >().Validate(source, target))
+                        return false;
+                    j += source.Tell() - 1;
+                }
+                else
+                    os.Put(c);
+            }
+        }
+        return true;
+    }
+
+    //! A helper stream for decoding a percent-encoded sequence into code unit.
+    /*!
+        This stream decodes %XY triplet into code unit (0-255).
+        If it encounters invalid characters, it sets output code unit as 0 and 
+        mark invalid, and to be checked by IsValid().
+    */
+    class PercentDecodeStream {
+    public:
+        typedef typename ValueType::Ch Ch;
+
+        //! Constructor
+        /*!
+            \param source Start of the stream
+            \param end Past-the-end of the stream.
+        */
+        PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {}
+
+        Ch Take() {
+            if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet
+                valid_ = false;
+                return 0;
+            }
+            src_++;
+            Ch c = 0;
+            for (int j = 0; j < 2; j++) {
+                c = static_cast<Ch>(c << 4);
+                Ch h = *src_;
+                if      (h >= '0' && h <= '9') c = static_cast<Ch>(c + h - '0');
+                else if (h >= 'A' && h <= 'F') c = static_cast<Ch>(c + h - 'A' + 10);
+                else if (h >= 'a' && h <= 'f') c = static_cast<Ch>(c + h - 'a' + 10);
+                else {
+                    valid_ = false;
+                    return 0;
+                }
+                src_++;
+            }
+            return c;
+        }
+
+        size_t Tell() const { return static_cast<size_t>(src_ - head_); }
+        bool IsValid() const { return valid_; }
+
+    private:
+        const Ch* src_;     //!< Current read position.
+        const Ch* head_;    //!< Original head of the string.
+        const Ch* end_;     //!< Past-the-end position.
+        bool valid_;        //!< Whether the parsing is valid.
+    };
+
+    //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence.
+    template <typename OutputStream>
+    class PercentEncodeStream {
+    public:
+        PercentEncodeStream(OutputStream& os) : os_(os) {}
+        void Put(char c) { // UTF-8 must be byte
+            unsigned char u = static_cast<unsigned char>(c);
+            static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+            os_.Put('%');
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u >> 4]));
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u & 15]));
+        }
+    private:
+        OutputStream& os_;
+    };
+
+    Allocator* allocator_;                  //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
+    Allocator* ownAllocator_;               //!< Allocator owned by this Pointer.
+    Ch* nameBuffer_;                        //!< A buffer containing all names in tokens.
+    Token* tokens_;                         //!< A list of tokens.
+    size_t tokenCount_;                     //!< Number of tokens in tokens_.
+    size_t parseErrorOffset_;               //!< Offset in code unit when parsing fail.
+    PointerParseErrorCode parseErrorCode_;  //!< Parsing error code.
+};
+
+//! GenericPointer for Value (UTF-8, default allocator).
+typedef GenericPointer<Value> Pointer;
+
+//!@name Helper functions for GenericPointer
+//@{
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::AllocatorType& a) {
+    return pointer.Create(root, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Create(root, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer) {
+    return pointer.Create(document);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Create(document);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType* GetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+    return pointer.Get(root, unresolvedTokenIndex);
+}
+
+template <typename T>
+const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+    return pointer.Get(root, unresolvedTokenIndex);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
+}
+
+template <typename T, typename CharType, size_t N>
+const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+#endif
+
+template <typename T, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+#endif
+
+template <typename T, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+#endif
+
+template <typename DocumentType, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+#endif
+
+template <typename DocumentType, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+#endif
+
+template <typename T, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+#endif
+
+template <typename T, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* value) {
+    return pointer.Set(document, value);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& value) {
+    return pointer.Set(document, value);
+}
+#endif
+
+template <typename DocumentType, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+#endif
+
+template <typename DocumentType, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Swap(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Swap(root, value, a);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+    return pointer.Swap(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Swap(document, value);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer) {
+    return pointer.Erase(root);
+}
+
+template <typename T, typename CharType, size_t N>
+bool EraseValueByPointer(T& root, const CharType(&source)[N]) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Erase(root);
+}
+
+//@}
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_POINTER_H_
diff --git a/rapidjson/prettywriter.h b/rapidjson/prettywriter.h
index 416dd49..98dfb30 100644
--- a/rapidjson/prettywriter.h
+++ b/rapidjson/prettywriter.h
@@ -22,8 +22,21 @@ RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
 #endif
 
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
+//! Combination of PrettyWriter format flags.
+/*! \see PrettyWriter::SetFormatOptions
+ */
+enum PrettyFormatOptions {
+    kFormatDefault = 0,         //!< Default pretty formatting.
+    kFormatSingleLineArray = 1  //!< Format arrays on a single line.
+};
+
 //! Writer with indentation and spacing.
 /*!
     \tparam OutputStream Type of ouptut os.
@@ -31,10 +44,10 @@ RAPIDJSON_NAMESPACE_BEGIN
     \tparam TargetEncoding Encoding of output stream.
     \tparam StackAllocator Type of allocator for allocating memory of stack.
 */
-template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator>
-class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator> {
+template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
+class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> {
 public:
-    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator> Base;
+    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> Base;
     typedef typename Base::Ch Ch;
 
     //! Constructor
@@ -42,8 +55,17 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         \param allocator User supplied allocator. If it is null, it will create a private one.
         \param levelDepth Initial capacity of stack.
     */
-    PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
-        Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
+    explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
+        Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {}
+
+
+    explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
+        Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    PrettyWriter(PrettyWriter&& rhs) :
+        Base(std::forward<PrettyWriter>(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_) {}
+#endif
 
     //! Set custom indentation.
     /*! \param indentChar       Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r').
@@ -57,6 +79,14 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         return *this;
     }
 
+    //! Set pretty writer formatting options.
+    /*! \param options Formatting options.
+    */
+    PrettyWriter& SetFormatOptions(PrettyFormatOptions options) {
+        formatOptions_ = options;
+        return *this;
+    }
+
     /*! @name Implementation of Handler
         \see Handler
     */
@@ -70,7 +100,15 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
     bool Uint64(uint64_t u64)   { PrettyPrefix(kNumberType); return Base::WriteUint64(u64);  }
     bool Double(double d)       { PrettyPrefix(kNumberType); return Base::WriteDouble(d); }
 
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        PrettyPrefix(kNumberType);
+        return Base::WriteString(str, length);
+    }
+
     bool String(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         PrettyPrefix(kStringType);
         return Base::WriteString(str, length);
@@ -89,11 +127,19 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
     }
 
     bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool Key(const std::basic_string<Ch>& str) {
+        return Key(str.data(), SizeType(str.size()));
+    }
+#endif
 	
     bool EndObject(SizeType memberCount = 0) {
         (void)memberCount;
-        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
-        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray);
+        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount % 2); // Object has a Key without a Value
+       
         bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
 
         if (!empty) {
@@ -104,7 +150,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
         if (Base::level_stack_.Empty()) // end of json text
-            Base::os_->Flush();
+            Base::Flush();
         return true;
     }
 
@@ -120,7 +166,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray);
         bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
 
-        if (!empty) {
+        if (!empty && !(formatOptions_ & kFormatSingleLineArray)) {
             Base::os_->Put('\n');
             WriteIndent();
         }
@@ -128,7 +174,7 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
         (void)ret;
         RAPIDJSON_ASSERT(ret == true);
         if (Base::level_stack_.Empty()) // end of json text
-            Base::os_->Flush();
+            Base::Flush();
         return true;
     }
 
@@ -142,6 +188,22 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
     bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); }
 
     //@}
+
+    //! Write a raw JSON value.
+    /*!
+        For user to write a stringified JSON as a value.
+
+        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
+        \param length Length of the json.
+        \param type Type of the root of json.
+        \note When using PrettyWriter::RawValue(), the result json may not be indented correctly.
+    */
+    bool RawValue(const Ch* json, size_t length, Type type) {
+        RAPIDJSON_ASSERT(json != 0);
+        PrettyPrefix(type);
+        return Base::WriteRawValue(json, length);
+    }
+
 protected:
     void PrettyPrefix(Type type) {
         (void)type;
@@ -151,11 +213,14 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
             if (level->inArray) {
                 if (level->valueCount > 0) {
                     Base::os_->Put(','); // add comma if it is not the first element in array
-                    Base::os_->Put('\n');
+                    if (formatOptions_ & kFormatSingleLineArray)
+                        Base::os_->Put(' ');
                 }
-                else
+
+                if (!(formatOptions_ & kFormatSingleLineArray)) {
                     Base::os_->Put('\n');
-                WriteIndent();
+                    WriteIndent();
+                }
             }
             else {  // in object
                 if (level->valueCount > 0) {
@@ -186,11 +251,12 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
 
     void WriteIndent()  {
         size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
-        PutN(*Base::os_, indentChar_, count);
+        PutN(*Base::os_, static_cast<typename OutputStream::Ch>(indentChar_), count);
     }
 
     Ch indentChar_;
     unsigned indentCharCount_;
+    PrettyFormatOptions formatOptions_;
 
 private:
     // Prohibit copy constructor & assignment operator.
@@ -200,6 +266,10 @@ class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding,
 
 RAPIDJSON_NAMESPACE_END
 
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
 #ifdef __GNUC__
 RAPIDJSON_DIAG_POP
 #endif
diff --git a/rapidjson/rapidjson.h b/rapidjson/rapidjson.h
index f22130d..57ab851 100644
--- a/rapidjson/rapidjson.h
+++ b/rapidjson/rapidjson.h
@@ -49,6 +49,11 @@
 // token stringification
 #define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x)
 #define RAPIDJSON_DO_STRINGIFY(x) #x
+
+// token concatenation
+#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y)
+#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y)
+#define RAPIDJSON_DO_JOIN2(X, Y) X##Y
 //!@endcond
 
 /*! \def RAPIDJSON_MAJOR_VERSION
@@ -68,8 +73,8 @@
     \brief Version of RapidJSON in "<major>.<minor>.<patch>" string format.
 */
 #define RAPIDJSON_MAJOR_VERSION 1
-#define RAPIDJSON_MINOR_VERSION 0
-#define RAPIDJSON_PATCH_VERSION 2
+#define RAPIDJSON_MINOR_VERSION 1
+#define RAPIDJSON_PATCH_VERSION 0
 #define RAPIDJSON_VERSION_STRING \
     RAPIDJSON_STRINGIFY(RAPIDJSON_MAJOR_VERSION.RAPIDJSON_MINOR_VERSION.RAPIDJSON_PATCH_VERSION)
 
@@ -119,6 +124,31 @@
 #define RAPIDJSON_NAMESPACE_END }
 #endif
 
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_HAS_STDSTRING
+
+#ifndef RAPIDJSON_HAS_STDSTRING
+#ifdef RAPIDJSON_DOXYGEN_RUNNING
+#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation
+#else
+#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default
+#endif
+/*! \def RAPIDJSON_HAS_STDSTRING
+    \ingroup RAPIDJSON_CONFIG
+    \brief Enable RapidJSON support for \c std::string
+
+    By defining this preprocessor symbol to \c 1, several convenience functions for using
+    \ref rapidjson::GenericValue with \c std::string are enabled, especially
+    for construction and comparison.
+
+    \hideinitializer
+*/
+#endif // !defined(RAPIDJSON_HAS_STDSTRING)
+
+#if RAPIDJSON_HAS_STDSTRING
+#include <string>
+#endif // RAPIDJSON_HAS_STDSTRING
+
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_NO_INT64DEFINE
 
@@ -134,7 +164,7 @@
 */
 #ifndef RAPIDJSON_NO_INT64DEFINE
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && (_MSC_VER < 1800)	// Visual Studio 2013
 #include "msinttypes/stdint.h"
 #include "msinttypes/inttypes.h"
 #else
@@ -153,9 +183,9 @@
 
 #ifndef RAPIDJSON_FORCEINLINE
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
-#if defined(_MSC_VER) && !defined(NDEBUG)
+#if defined(_MSC_VER) && defined(NDEBUG)
 #define RAPIDJSON_FORCEINLINE __forceinline
-#elif defined(__GNUC__) && __GNUC__ >= 4 && !defined(NDEBUG)
+#elif defined(__GNUC__) && __GNUC__ >= 4 && defined(NDEBUG)
 #define RAPIDJSON_FORCEINLINE __attribute__((always_inline))
 #else
 #define RAPIDJSON_FORCEINLINE
@@ -211,6 +241,8 @@
 #    define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
 #  elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__)
 #    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#  elif defined(_MSC_VER) && defined(_M_ARM)
+#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
 #  elif defined(RAPIDJSON_DOXYGEN_RUNNING)
 #    define RAPIDJSON_ENDIAN
 #  else
@@ -223,7 +255,7 @@
 
 //! Whether using 64-bit architecture
 #ifndef RAPIDJSON_64BIT
-#if defined(__LP64__) || defined(_WIN64)
+#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || defined(__EMSCRIPTEN__)
 #define RAPIDJSON_64BIT 1
 #else
 #define RAPIDJSON_64BIT 0
@@ -238,13 +270,14 @@
     \param x pointer to align
 
     Some machines require strict data alignment. Currently the default uses 4 bytes
-    alignment. User can customize by defining the RAPIDJSON_ALIGN function macro.,
+    alignment on 32-bit platforms and 8 bytes alignment for 64-bit platforms.
+    User can customize by defining the RAPIDJSON_ALIGN function macro.
 */
 #ifndef RAPIDJSON_ALIGN
 #if RAPIDJSON_64BIT == 1
-#define RAPIDJSON_ALIGN(x) ((x + 7u) & ~7u)
+#define RAPIDJSON_ALIGN(x) (((x) + static_cast<uint64_t>(7u)) & ~static_cast<uint64_t>(7u))
 #else
-#define RAPIDJSON_ALIGN(x) ((x + 3u) & ~3u)
+#define RAPIDJSON_ALIGN(x) (((x) + 3u) & ~3u)
 #endif
 #endif
 
@@ -262,17 +295,47 @@
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
-// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD
+// RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+//! Use only lower 48-bit address for some pointers.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+
+    This optimization uses the fact that current X86-64 architecture only implement lower 48-bit virtual address.
+    The higher 16-bit can be used for storing other data.
+    \c GenericValue uses this optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture.
+*/
+#ifndef RAPIDJSON_48BITPOINTER_OPTIMIZATION
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1
+#else
+#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0
+#endif
+#endif // RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+#if RAPIDJSON_48BITPOINTER_OPTIMIZATION == 1
+#if RAPIDJSON_64BIT != 1
+#error RAPIDJSON_48BITPOINTER_OPTIMIZATION can only be set to 1 when RAPIDJSON_64BIT=1
+#endif
+#define RAPIDJSON_SETPOINTER(type, p, x) (p = reinterpret_cast<type *>((reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | reinterpret_cast<uintptr_t>(reinterpret_cast<const void*>(x))))
+#define RAPIDJSON_GETPOINTER(type, p) (reinterpret_cast<type *>(reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF))))
+#else
+#define RAPIDJSON_SETPOINTER(type, p, x) (p = (x))
+#define RAPIDJSON_GETPOINTER(type, p) (p)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_NEON/RAPIDJSON_SIMD
 
 /*! \def RAPIDJSON_SIMD
     \ingroup RAPIDJSON_CONFIG
-    \brief Enable SSE2/SSE4.2 optimization.
+    \brief Enable SSE2/SSE4.2/Neon optimization.
 
     RapidJSON supports optimized implementations for some parsing operations
-    based on the SSE2 or SSE4.2 SIMD extensions on modern Intel-compatible
-    processors.
+    based on the SSE2, SSE4.2 or NEon SIMD extensions on modern Intel
+    or ARM compatible processors.
 
-    To enable these optimizations, two different symbols can be defined;
+    To enable these optimizations, three different symbols can be defined;
     \code
     // Enable SSE2 optimization.
     #define RAPIDJSON_SSE2
@@ -281,13 +344,17 @@
     #define RAPIDJSON_SSE42
     \endcode
 
-    \c RAPIDJSON_SSE42 takes precedence, if both are defined.
+    // Enable ARM Neon optimization.
+    #define RAPIDJSON_NEON
+    \endcode
+
+    \c RAPIDJSON_SSE42 takes precedence over SSE2, if both are defined.
 
     If any of these symbols is defined, RapidJSON defines the macro
     \c RAPIDJSON_SIMD to indicate the availability of the optimized code.
 */
 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \
-    || defined(RAPIDJSON_DOXYGEN_RUNNING)
+    || defined(RAPIDJSON_NEON) || defined(RAPIDJSON_DOXYGEN_RUNNING)
 #define RAPIDJSON_SIMD
 #endif
 
@@ -347,25 +414,33 @@ RAPIDJSON_NAMESPACE_END
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_STATIC_ASSERT
 
-// Adopt from boost
+// Prefer C++11 static_assert, if available
 #ifndef RAPIDJSON_STATIC_ASSERT
+#if __cplusplus >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
+#define RAPIDJSON_STATIC_ASSERT(x) \
+   static_assert(x, RAPIDJSON_STRINGIFY(x))
+#endif // C++11
+#endif // RAPIDJSON_STATIC_ASSERT
+
+// Adopt C++03 implementation from boost
+#ifndef RAPIDJSON_STATIC_ASSERT
+#ifndef __clang__
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#endif
 RAPIDJSON_NAMESPACE_BEGIN
 template <bool x> struct STATIC_ASSERTION_FAILURE;
 template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
-template<int x> struct StaticAssertTest {};
+template <size_t x> struct StaticAssertTest {};
 RAPIDJSON_NAMESPACE_END
 
-#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y)
-#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y)
-#define RAPIDJSON_DO_JOIN2(X, Y) X##Y
-
 #if defined(__GNUC__)
 #define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused))
 #else
 #define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE 
 #endif
+#ifndef __clang__
 //!@endcond
+#endif
 
 /*! \def RAPIDJSON_STATIC_ASSERT
     \brief (Internal) macro to check for conditions at compile-time
@@ -376,6 +451,35 @@ RAPIDJSON_NAMESPACE_END
     typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \
       sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x) >)> \
     RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE
+#endif // RAPIDJSON_STATIC_ASSERT
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_LIKELY, RAPIDJSON_UNLIKELY
+
+//! Compiler branching hint for expression with high probability to be true.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+    \param x Boolean expression likely to be true.
+*/
+#ifndef RAPIDJSON_LIKELY
+#if defined(__GNUC__) || defined(__clang__)
+#define RAPIDJSON_LIKELY(x) __builtin_expect(!!(x), 1)
+#else
+#define RAPIDJSON_LIKELY(x) (x)
+#endif
+#endif
+
+//! Compiler branching hint for expression with low probability to be true.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+    \param x Boolean expression unlikely to be true.
+*/
+#ifndef RAPIDJSON_UNLIKELY
+#if defined(__GNUC__) || defined(__clang__)
+#define RAPIDJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#define RAPIDJSON_UNLIKELY(x) (x)
+#endif
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -438,8 +542,12 @@ RAPIDJSON_NAMESPACE_END
 
 #ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS
 #if defined(__clang__)
-#define RAPIDJSON_HAS_CXX11_RVALUE_REFS __has_feature(cxx_rvalue_references) && \
+#if __has_feature(cxx_rvalue_references) && \
     (defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306)
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#else
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0
+#endif
 #elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
       (defined(_MSC_VER) && _MSC_VER >= 1600)
 
@@ -470,6 +578,17 @@ RAPIDJSON_NAMESPACE_END
 #define RAPIDJSON_HAS_CXX11_TYPETRAITS 0
 #endif
 
+#ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR
+#if defined(__clang__)
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for)
+#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
+      (defined(_MSC_VER) && _MSC_VER >= 1700)
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
+#else
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 0
+#endif
+#endif // RAPIDJSON_HAS_CXX11_RANGE_FOR
+
 //!@endcond
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -477,7 +596,7 @@ RAPIDJSON_NAMESPACE_END
 
 #ifndef RAPIDJSON_NEW
 ///! customization point for global \c new
-#define RAPIDJSON_NEW(x) new x
+#define RAPIDJSON_NEW(TypeName) new TypeName
 #endif
 #ifndef RAPIDJSON_DELETE
 ///! customization point for global \c delete
@@ -485,10 +604,7 @@ RAPIDJSON_NAMESPACE_END
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
-// Allocators and Encodings
-
-#include "allocators.h"
-#include "encodings.h"
+// Type
 
 /*! \namespace rapidjson
     \brief main RapidJSON namespace
@@ -496,148 +612,6 @@ RAPIDJSON_NAMESPACE_END
 */
 RAPIDJSON_NAMESPACE_BEGIN
 
-///////////////////////////////////////////////////////////////////////////////
-//  Stream
-
-/*! \class rapidjson::Stream
-    \brief Concept for reading and writing characters.
-
-    For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd().
-
-    For write-only stream, only need to implement Put() and Flush().
-
-\code
-concept Stream {
-    typename Ch;    //!< Character type of the stream.
-
-    //! Read the current character from stream without moving the read cursor.
-    Ch Peek() const;
-
-    //! Read the current character from stream and moving the read cursor to next character.
-    Ch Take();
-
-    //! Get the current read cursor.
-    //! \return Number of characters read from start.
-    size_t Tell();
-
-    //! Begin writing operation at the current read pointer.
-    //! \return The begin writer pointer.
-    Ch* PutBegin();
-
-    //! Write a character.
-    void Put(Ch c);
-
-    //! Flush the buffer.
-    void Flush();
-
-    //! End the writing operation.
-    //! \param begin The begin write pointer returned by PutBegin().
-    //! \return Number of characters written.
-    size_t PutEnd(Ch* begin);
-}
-\endcode
-*/
-
-//! Provides additional information for stream.
-/*!
-    By using traits pattern, this type provides a default configuration for stream.
-    For custom stream, this type can be specialized for other configuration.
-    See TEST(Reader, CustomStringStream) in readertest.cpp for example.
-*/
-template<typename Stream>
-struct StreamTraits {
-    //! Whether to make local copy of stream for optimization during parsing.
-    /*!
-        By default, for safety, streams do not use local copy optimization.
-        Stream that can be copied fast should specialize this, like StreamTraits<StringStream>.
-    */
-    enum { copyOptimization = 0 };
-};
-
-//! Put N copies of a character to a stream.
-template<typename Stream, typename Ch>
-inline void PutN(Stream& stream, Ch c, size_t n) {
-    for (size_t i = 0; i < n; i++)
-        stream.Put(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// StringStream
-
-//! Read-only string stream.
-/*! \note implements Stream concept
-*/
-template <typename Encoding>
-struct GenericStringStream {
-    typedef typename Encoding::Ch Ch;
-
-    GenericStringStream(const Ch *src) : src_(src), head_(src) {}
-
-    Ch Peek() const { return *src_; }
-    Ch Take() { return *src_++; }
-    size_t Tell() const { return static_cast<size_t>(src_ - head_); }
-
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
-
-    const Ch* src_;     //!< Current read position.
-    const Ch* head_;    //!< Original head of the string.
-};
-
-template <typename Encoding>
-struct StreamTraits<GenericStringStream<Encoding> > {
-    enum { copyOptimization = 1 };
-};
-
-//! String stream with UTF8 encoding.
-typedef GenericStringStream<UTF8<> > StringStream;
-
-///////////////////////////////////////////////////////////////////////////////
-// InsituStringStream
-
-//! A read-write string stream.
-/*! This string stream is particularly designed for in-situ parsing.
-    \note implements Stream concept
-*/
-template <typename Encoding>
-struct GenericInsituStringStream {
-    typedef typename Encoding::Ch Ch;
-
-    GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
-
-    // Read
-    Ch Peek() { return *src_; }
-    Ch Take() { return *src_++; }
-    size_t Tell() { return static_cast<size_t>(src_ - head_); }
-
-    // Write
-    void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
-
-    Ch* PutBegin() { return dst_ = src_; }
-    size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); }
-    void Flush() {}
-
-    Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
-    void Pop(size_t count) { dst_ -= count; }
-
-    Ch* src_;
-    Ch* dst_;
-    Ch* head_;
-};
-
-template <typename Encoding>
-struct StreamTraits<GenericInsituStringStream<Encoding> > {
-    enum { copyOptimization = 1 };
-};
-
-//! Insitu string stream with UTF8 encoding.
-typedef GenericInsituStringStream<UTF8<> > InsituStringStream;
-
-///////////////////////////////////////////////////////////////////////////////
-// Type
-
 //! Type of JSON value
 enum Type {
     kNullType = 0,      //!< null
diff --git a/rapidjson/reader.h b/rapidjson/reader.h
index c5ecf4b..120c311 100644
--- a/rapidjson/reader.h
+++ b/rapidjson/reader.h
@@ -1,5 +1,5 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
+//
 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
@@ -7,9 +7,9 @@
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_READER_H_
@@ -17,11 +17,13 @@
 
 /*! \file reader.h */
 
-#include "rapidjson.h"
-#include "encodings.h"
+#include "allocators.h"
+#include "stream.h"
+#include "encodedstream.h"
 #include "internal/meta.h"
 #include "internal/stack.h"
 #include "internal/strtod.h"
+#include <limits>
 
 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
 #include <intrin.h>
@@ -31,6 +33,8 @@
 #include <nmmintrin.h>
 #elif defined(RAPIDJSON_SSE2)
 #include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
 #endif
 
 #ifdef _MSC_VER
@@ -39,6 +43,13 @@ RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(old-style-cast)
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+#endif
+
 #ifdef __GNUC__
 RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
@@ -49,7 +60,7 @@ RAPIDJSON_DIAG_OFF(effc++)
 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
     RAPIDJSON_MULTILINEMACRO_BEGIN \
-    if (HasParseError()) { return value; } \
+    if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
     RAPIDJSON_MULTILINEMACRO_END
 #endif
 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
@@ -120,7 +131,7 @@ RAPIDJSON_NAMESPACE_BEGIN
 ///////////////////////////////////////////////////////////////////////////////
 // ParseFlag
 
-/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS 
+/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
     \ingroup RAPIDJSON_CONFIG
     \brief User-defined kParseDefaultFlags definition.
 
@@ -140,6 +151,10 @@ enum ParseFlag {
     kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
     kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
     kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
+    kParseCommentsFlag = 32,        //!< Allow one-line (//) and multi-line (/**/) comments.
+    kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
+    kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
+    kParseNanAndInfFlag = 256,      //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
     kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
 };
 
@@ -148,7 +163,7 @@ enum ParseFlag {
 
 /*! \class rapidjson::Handler
     \brief Concept for receiving events from GenericReader upon parsing.
-    The functions return true if no error occurs. If they return false, 
+    The functions return true if no error occurs. If they return false,
     the event publisher should terminate the process.
 \code
 concept Handler {
@@ -161,6 +176,8 @@ concept Handler {
     bool Int64(int64_t i);
     bool Uint64(uint64_t i);
     bool Double(double d);
+    /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
+    bool RawNumber(const Ch* str, SizeType length, bool copy);
     bool String(const Ch* str, SizeType length, bool copy);
     bool StartObject();
     bool Key(const Ch* str, SizeType length, bool copy);
@@ -191,6 +208,8 @@ struct BaseReaderHandler {
     bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
     bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
     bool Double(double) { return static_cast<Override&>(*this).Default(); }
+    /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
+    bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
     bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
     bool StartObject() { return static_cast<Override&>(*this).Default(); }
     bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
@@ -248,10 +267,17 @@ void SkipWhitespace(InputStream& is) {
     internal::StreamLocalCopy<InputStream> copy(is);
     InputStream& s(copy.s);
 
-    while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
+    typename InputStream::Ch c;
+    while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
         s.Take();
 }
 
+inline const char* SkipWhitespace(const char* p, const char* end) {
+    while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    return p;
+}
+
 #ifdef RAPIDJSON_SSE42
 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
 inline const char *SkipWhitespace_SIMD(const char* p) {
@@ -262,7 +288,7 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
         return p;
 
     // 16-byte align to the next boundary
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
     while (p != nextAligned)
         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
             ++p;
@@ -271,23 +297,37 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
 
     // The rest of string using SIMD
     static const char whitespace[16] = " \n\r\t";
-    const __m128i w = _mm_load_si128((const __m128i *)&whitespace[0]);
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
 
     for (;; p += 16) {
-        const __m128i s = _mm_load_si128((const __m128i *)p);
-        const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
-        if (r != 0) {   // some of characters is non-whitespace
-#ifdef _MSC_VER         // Find the index of first non-whitespace
-            unsigned long offset;
-            _BitScanForward(&offset, r);
-            return p + offset;
-#else
-            return p + __builtin_ffs(r) - 1;
-#endif
-        }
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
+        if (r != 16)    // some of characters is non-whitespace
+            return p + r;
     }
 }
 
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    // The middle of string using SIMD
+    static const char whitespace[16] = " \n\r\t";
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
+
+    for (; p <= end - 16; p += 16) {
+        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
+        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
+        if (r != 16)    // some of characters is non-whitespace
+            return p + r;
+    }
+
+    return SkipWhitespace(p, end);
+}
+
 #elif defined(RAPIDJSON_SSE2)
 
 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
@@ -299,7 +339,7 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
         return p;
 
     // 16-byte align to the next boundary
-    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
     while (p != nextAligned)
         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
             ++p;
@@ -307,24 +347,58 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
             return p;
 
     // The rest of string
-    static const char whitespaces[4][17] = {
-        "                ",
-        "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
-        "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
-        "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
+    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
+    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
+    #undef C16
 
-        const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
-        const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
-        const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
-        const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
 
     for (;; p += 16) {
-        const __m128i s = _mm_load_si128((const __m128i *)p);
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        __m128i x = _mm_cmpeq_epi8(s, w0);
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
+        unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
+        if (r != 0) {   // some of characters may be non-whitespace
+#ifdef _MSC_VER         // Find the index of first non-whitespace
+            unsigned long offset;
+            _BitScanForward(&offset, r);
+            return p + offset;
+#else
+            return p + __builtin_ffs(r) - 1;
+#endif
+        }
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    // The rest of string
+    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
+    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
+    #undef C16
+
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
+
+    for (; p <= end - 16; p += 16) {
+        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
         __m128i x = _mm_cmpeq_epi8(s, w0);
         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
-        unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
+        unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
         if (r != 0) {   // some of characters may be non-whitespace
 #ifdef _MSC_VER         // Find the index of first non-whitespace
             unsigned long offset;
@@ -335,13 +409,100 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
 #endif
         }
     }
+
+    return SkipWhitespace(p, end);
+}
+
+#elif defined(RAPIDJSON_NEON)
+
+//! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
+inline const char *SkipWhitespace_SIMD(const char* p) {
+    // Fast return for single non-whitespace
+    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+        ++p;
+    else
+        return p;
+
+    // 16-byte align to the next boundary
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    while (p != nextAligned)
+        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+            ++p;
+        else
+            return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (;; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz =__builtin_clzll(high);;
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);;
+            return p + (lz >> 3);
+        }
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (; p <= end - 16; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz = __builtin_clzll(high);
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);
+            return p + (lz >> 3);
+        }
+    }
+
+    return SkipWhitespace(p, end);
 }
 
-#endif // RAPIDJSON_SSE2
+#endif // RAPIDJSON_NEON
 
 #ifdef RAPIDJSON_SIMD
 //! Template function specialization for InsituStringStream
-template<> inline void SkipWhitespace(InsituStringStream& is) { 
+template<> inline void SkipWhitespace(InsituStringStream& is) {
     is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
 }
 
@@ -349,23 +510,27 @@ template<> inline void SkipWhitespace(InsituStringStream& is) {
 template<> inline void SkipWhitespace(StringStream& is) {
     is.src_ = SkipWhitespace_SIMD(is.src_);
 }
+
+template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
+    is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
+}
 #endif // RAPIDJSON_SIMD
 
 ///////////////////////////////////////////////////////////////////////////////
 // GenericReader
 
 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
-/*! GenericReader parses JSON text from a stream, and send events synchronously to an 
+/*! GenericReader parses JSON text from a stream, and send events synchronously to an
     object implementing Handler concept.
 
-    It needs to allocate a stack for storing a single decoded string during 
+    It needs to allocate a stack for storing a single decoded string during
     non-destructive parsing.
 
-    For in-situ parsing, the decoded string is directly written to the source 
+    For in-situ parsing, the decoded string is directly written to the source
     text string, no temporary buffer is required.
 
     A GenericReader object can be reused for parsing multiple JSON text.
-    
+
     \tparam SourceEncoding Encoding of the input stream.
     \tparam TargetEncoding Encoding of the parse output.
     \tparam StackAllocator Allocator type for stack.
@@ -398,9 +563,10 @@ class GenericReader {
 
         ClearStackOnExit scope(*this);
 
-        SkipWhitespace(is);
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
-        if (is.Peek() == '\0') {
+        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
             RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         }
@@ -409,9 +575,10 @@ class GenericReader {
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
             if (!(parseFlags & kParseStopWhenDoneFlag)) {
-                SkipWhitespace(is);
+                SkipWhitespaceAndComments<parseFlags>(is);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
 
-                if (is.Peek() != '\0') {
+                if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
                     RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
                 }
@@ -433,9 +600,86 @@ class GenericReader {
         return Parse<kParseDefaultFlags>(is, handler);
     }
 
+    //! Initialize JSON text token-by-token parsing
+    /*!
+     */
+    void IterativeParseInit() {
+        parseResult_.Clear();
+        state_ = IterativeParsingStartState;
+    }
+    
+    //! Parse one token from JSON text
+    /*! \tparam InputStream Type of input stream, implementing Stream concept
+        \tparam Handler Type of handler, implementing Handler concept.
+        \param is Input stream to be parsed.
+        \param handler The handler to receive events.
+        \return Whether the parsing is successful.
+     */
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    bool IterativeParseNext(InputStream& is, Handler& handler) {
+        while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
+            SkipWhitespaceAndComments<parseFlags>(is);
+            
+            Token t = Tokenize(is.Peek());
+            IterativeParsingState n = Predict(state_, t);
+            IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
+            
+            // If we've finished or hit an error...
+            if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
+                // Report errors.
+                if (d == IterativeParsingErrorState) {
+                    HandleError(state_, is);
+                    return false;
+                }
+            
+                // Transition to the finish state.
+                RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
+                state_ = d;
+                
+                // If StopWhenDone is not set...
+                if (!(parseFlags & kParseStopWhenDoneFlag)) {
+                    // ... and extra non-whitespace data is found...
+                    SkipWhitespaceAndComments<parseFlags>(is);
+                    if (is.Peek() != '\0') {
+                        // ... this is considered an error.
+                        HandleError(state_, is);
+                        return false;
+                    }
+                }
+                
+                // Success! We are done!
+                return true;
+            }
+            
+            // Transition to the new state.
+            state_ = d;
+
+            // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
+            if (!IsIterativeParsingDelimiterState(n))
+                return true;
+        }
+        
+        // We reached the end of file.
+        stack_.Clear();
+
+        if (state_ != IterativeParsingFinishState) {
+            HandleError(state_, is);
+            return false;
+        }
+        
+        return true;
+    }
+    
+    //! Check if token-by-token parsing JSON text is complete
+    /*! \return Whether the JSON has been fully decoded.
+     */
+    RAPIDJSON_FORCEINLINE bool IterativeParseComplete() {
+        return IsIterativeParsingCompleteState(state_);
+    }
+
     //! Whether a parse error has occured in the last parsing.
     bool HasParseError() const { return parseResult_.IsError(); }
-    
+
     //! Get the \ref ParseErrorCode of last parsing.
     ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
 
@@ -462,52 +706,98 @@ class GenericReader {
         ClearStackOnExit& operator=(const ClearStackOnExit&);
     };
 
+    template<unsigned parseFlags, typename InputStream>
+    void SkipWhitespaceAndComments(InputStream& is) {
+        SkipWhitespace(is);
+
+        if (parseFlags & kParseCommentsFlag) {
+            while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
+                if (Consume(is, '*')) {
+                    while (true) {
+                        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
+                        else if (Consume(is, '*')) {
+                            if (Consume(is, '/'))
+                                break;
+                        }
+                        else
+                            is.Take();
+                    }
+                }
+                else if (RAPIDJSON_LIKELY(Consume(is, '/')))
+                    while (is.Peek() != '\0' && is.Take() != '\n') {}
+                else
+                    RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
+
+                SkipWhitespace(is);
+            }
+        }
+    }
+
     // Parse object: { string : value, ... }
     template<unsigned parseFlags, typename InputStream, typename Handler>
     void ParseObject(InputStream& is, Handler& handler) {
         RAPIDJSON_ASSERT(is.Peek() == '{');
         is.Take();  // Skip '{'
-        
-        if (!handler.StartObject())
+
+        if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
 
-        SkipWhitespace(is);
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-        if (is.Peek() == '}') {
-            is.Take();
-            if (!handler.EndObject(0))  // empty object
+        if (Consume(is, '}')) {
+            if (RAPIDJSON_UNLIKELY(!handler.EndObject(0)))  // empty object
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
             return;
         }
 
         for (SizeType memberCount = 0;;) {
-            if (is.Peek() != '"')
+            if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
 
             ParseString<parseFlags>(is, handler, true);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            SkipWhitespace(is);
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            if (is.Take() != ':')
+            if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
 
-            SkipWhitespace(is);
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
             ParseValue<parseFlags>(is, handler);
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            SkipWhitespace(is);
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
             ++memberCount;
 
-            switch (is.Take()) {
-                case ',': SkipWhitespace(is); break;
-                case '}': 
-                    if (!handler.EndObject(memberCount))
+            switch (is.Peek()) {
+                case ',':
+                    is.Take();
+                    SkipWhitespaceAndComments<parseFlags>(is);
+                    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                    break;
+                case '}':
+                    is.Take();
+                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
                     return;
-                default:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
+                default:
+                    RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
+            }
+
+            if (parseFlags & kParseTrailingCommasFlag) {
+                if (is.Peek() == '}') {
+                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
+                        RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                    is.Take();
+                    return;
+                }
             }
         }
     }
@@ -517,15 +807,15 @@ class GenericReader {
     void ParseArray(InputStream& is, Handler& handler) {
         RAPIDJSON_ASSERT(is.Peek() == '[');
         is.Take();  // Skip '['
-        
-        if (!handler.StartArray())
+
+        if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
-        
-        SkipWhitespace(is);
 
-        if (is.Peek() == ']') {
-            is.Take();
-            if (!handler.EndArray(0)) // empty array
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+        if (Consume(is, ']')) {
+            if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
             return;
         }
@@ -535,15 +825,28 @@ class GenericReader {
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
             ++elementCount;
-            SkipWhitespace(is);
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
 
-            switch (is.Take()) {
-                case ',': SkipWhitespace(is); break;
-                case ']': 
-                    if (!handler.EndArray(elementCount))
+            if (Consume(is, ',')) {
+                SkipWhitespaceAndComments<parseFlags>(is);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+            }
+            else if (Consume(is, ']')) {
+                if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
+                    RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                return;
+            }
+            else
+                RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
+
+            if (parseFlags & kParseTrailingCommasFlag) {
+                if (is.Peek() == ']') {
+                    if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                    is.Take();
                     return;
-                default:  RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
+                }
             }
         }
     }
@@ -553,12 +856,12 @@ class GenericReader {
         RAPIDJSON_ASSERT(is.Peek() == 'n');
         is.Take();
 
-        if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') {
-            if (!handler.Null())
+        if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Null()))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
-            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
     }
 
     template<unsigned parseFlags, typename InputStream, typename Handler>
@@ -566,12 +869,12 @@ class GenericReader {
         RAPIDJSON_ASSERT(is.Peek() == 't');
         is.Take();
 
-        if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') {
-            if (!handler.Bool(true))
+        if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
-            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
     }
 
     template<unsigned parseFlags, typename InputStream, typename Handler>
@@ -579,20 +882,30 @@ class GenericReader {
         RAPIDJSON_ASSERT(is.Peek() == 'f');
         is.Take();
 
-        if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') {
-            if (!handler.Bool(false))
+        if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
         }
         else
-            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
+    }
+
+    template<typename InputStream>
+    RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
+        if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
+            is.Take();
+            return true;
+        }
+        else
+            return false;
     }
 
     // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
     template<typename InputStream>
-    unsigned ParseHex4(InputStream& is) {
+    unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
         unsigned codepoint = 0;
         for (int i = 0; i < 4; i++) {
-            Ch c = is.Take();
+            Ch c = is.Peek();
             codepoint <<= 4;
             codepoint += static_cast<unsigned>(c);
             if (c >= '0' && c <= '9')
@@ -602,9 +915,10 @@ class GenericReader {
             else if (c >= 'a' && c <= 'f')
                 codepoint -= 'a' - 10;
             else {
-                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1);
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
             }
+            is.Take();
         }
         return codepoint;
     }
@@ -619,7 +933,14 @@ class GenericReader {
             *stack_.template Push<Ch>() = c;
             ++length_;
         }
+
+        RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
+            length_ += count;
+            return stack_.template Push<Ch>(count);
+        }
+
         size_t Length() const { return length_; }
+
         Ch* Pop() {
             return stack_.template Pop<Ch>(length_);
         }
@@ -638,6 +959,9 @@ class GenericReader {
         internal::StreamLocalCopy<InputStream> copy(is);
         InputStream& s(copy.s);
 
+        RAPIDJSON_ASSERT(s.Peek() == '\"');
+        s.Take();  // Skip '\"'
+
         bool success = false;
         if (parseFlags & kParseInsituFlag) {
             typename InputStream::Ch *head = s.PutBegin();
@@ -645,7 +969,7 @@ class GenericReader {
             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
             size_t length = s.PutEnd(head) - 1;
             RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
-            const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head;
+            const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
             success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
         }
         else {
@@ -656,7 +980,7 @@ class GenericReader {
             const typename TargetEncoding::Ch* const str = stackStream.Pop();
             success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
         }
-        if (!success)
+        if (RAPIDJSON_UNLIKELY(!success))
             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
     }
 
@@ -667,74 +991,421 @@ class GenericReader {
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
         static const char escape[256] = {
-            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', 
-            Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, 
-            0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, 
-            0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
+            Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
+            0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
+            0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
         };
 #undef Z16
 //!@endcond
 
-        RAPIDJSON_ASSERT(is.Peek() == '\"');
-        is.Take();  // Skip '\"'
-
         for (;;) {
+            // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
+            if (!(parseFlags & kParseValidateEncodingFlag))
+                ScanCopyUnescapedString(is, os);
+
             Ch c = is.Peek();
-            if (c == '\\') {    // Escape
+            if (RAPIDJSON_UNLIKELY(c == '\\')) {    // Escape
+                size_t escapeOffset = is.Tell();    // For invalid escaping, report the inital '\\' as error offset
                 is.Take();
-                Ch e = is.Take();
-                if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
-                    os.Put(escape[(unsigned char)e]);
+                Ch e = is.Peek();
+                if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
+                    is.Take();
+                    os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
                 }
-                else if (e == 'u') {    // Unicode
-                    unsigned codepoint = ParseHex4(is);
+                else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
+                    is.Take();
+                    unsigned codepoint = ParseHex4(is, escapeOffset);
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                    if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
+                    if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
                         // Handle UTF-16 surrogate pair
-                        if (is.Take() != '\\' || is.Take() != 'u')
-                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
-                        unsigned codepoint2 = ParseHex4(is);
+                        if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                        unsigned codepoint2 = ParseHex4(is, escapeOffset);
                         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                        if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)
-                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
+                        if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
                         codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
                     }
                     TEncoding::Encode(os, codepoint);
                 }
                 else
-                    RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
             }
-            else if (c == '"') {    // Closing double quote
+            else if (RAPIDJSON_UNLIKELY(c == '"')) {    // Closing double quote
                 is.Take();
                 os.Put('\0');   // null-terminate the string
                 return;
             }
-            else if (c == '\0')
-                RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1);
-            else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
-                RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
-            else {
-                if (parseFlags & kParseValidateEncodingFlag ? 
-                    !Transcoder<SEncoding, TEncoding>::Validate(is, os) : 
-                    !Transcoder<SEncoding, TEncoding>::Transcode(is, os))
+            else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+                if (c == '\0')
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
+                else
                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
             }
+            else {
+                size_t offset = is.Tell();
+                if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
+                    !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
+                    !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
+            }
         }
     }
 
-    template<typename InputStream, bool backup>
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
+            // Do nothing for generic version
+    }
+
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+    // StringStream -> StackStream<char>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+        const char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                return;
+            }
+            else
+                os.Put(*p++);
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                SizeType length;
+    #ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+    #else
+                length = static_cast<SizeType>(__builtin_ffs(r) - 1);
+    #endif
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
+
+                    p += length;
+                }
+                break;
+            }
+            _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
+        }
+
+        is.src_ = p;
+    }
+
+    // InsituStringStream -> InsituStringStream
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+        RAPIDJSON_ASSERT(&is == &os);
+        (void)os;
+
+        if (is.src_ == is.dst_) {
+            SkipUnescapedString(is);
+            return;
+        }
+
+        char* p = is.src_;
+        char *q = is.dst_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                is.dst_ = q;
+                return;
+            }
+            else
+                *q++ = *p++;
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16, q += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                size_t length;
+#ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+#else
+                length = static_cast<size_t>(__builtin_ffs(r) - 1);
+#endif
+                for (const char* pend = p + length; p != pend; )
+                    *q++ = *p++;
+                break;
+            }
+            _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
+        }
+
+        is.src_ = p;
+        is.dst_ = q;
+    }
+
+    // When read/write pointers are the same for insitu stream, just skip unescaped characters
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+        RAPIDJSON_ASSERT(is.src_ == is.dst_);
+        char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        for (; p != nextAligned; p++)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = is.dst_ = p;
+                return;
+            }
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                size_t length;
+#ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+#else
+                length = static_cast<size_t>(__builtin_ffs(r) - 1);
+#endif
+                p += length;
+                break;
+            }
+        }
+
+        is.src_ = is.dst_ = p;
+    }
+#elif defined(RAPIDJSON_NEON)
+    // StringStream -> StackStream<char>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+        const char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                return;
+            }
+            else
+                os.Put(*p++);
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);;
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);;
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
+
+                    p += length;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
+        }
+
+        is.src_ = p;
+    }
+
+    // InsituStringStream -> InsituStringStream
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+        RAPIDJSON_ASSERT(&is == &os);
+        (void)os;
+
+        if (is.src_ == is.dst_) {
+            SkipUnescapedString(is);
+            return;
+        }
+
+        char* p = is.src_;
+        char *q = is.dst_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                is.dst_ = q;
+                return;
+            }
+            else
+                *q++ = *p++;
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16, q += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                for (const char* pend = p + length; p != pend; ) {
+                    *q++ = *p++;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
+        }
+
+        is.src_ = p;
+        is.dst_ = q;
+    }
+
+    // When read/write pointers are the same for insitu stream, just skip unescaped characters
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+        RAPIDJSON_ASSERT(is.src_ == is.dst_);
+        char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        for (; p != nextAligned; p++)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = is.dst_ = p;
+                return;
+            }
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            if (low == 0) {
+                if (high != 0) {
+                    int lz = __builtin_clzll(high);
+                    p += 8 + (lz >> 3);
+                    break;
+                }
+            } else {
+                int lz = __builtin_clzll(low);
+                p += lz >> 3;
+                break;
+            }
+        }
+
+        is.src_ = is.dst_ = p;
+    }
+#endif // RAPIDJSON_NEON
+
+    template<typename InputStream, bool backup, bool pushOnTake>
     class NumberStream;
 
     template<typename InputStream>
-    class NumberStream<InputStream, false> {
+    class NumberStream<InputStream, false, false> {
     public:
+        typedef typename InputStream::Ch Ch;
+
         NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
-        ~NumberStream() {}
 
         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
+		  RAPIDJSON_FORCEINLINE void Push(char) {}
+
         size_t Tell() { return is.Tell(); }
         size_t Length() { return 0; }
         const char* Pop() { return 0; }
@@ -746,17 +1417,20 @@ class GenericReader {
     };
 
     template<typename InputStream>
-    class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
-        typedef NumberStream<InputStream, false> Base;
+    class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
+        typedef NumberStream<InputStream, false, false> Base;
     public:
-        NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
-        ~NumberStream() {}
+        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
 
         RAPIDJSON_FORCEINLINE Ch TakePush() {
-            stackStream.Put((char)Base::is.Peek());
+            stackStream.Put(static_cast<char>(Base::is.Peek()));
             return Base::is.Take();
         }
 
+        RAPIDJSON_FORCEINLINE void Push(char c) {
+            stackStream.Put(c);
+        }
+
         size_t Length() { return stackStream.Length(); }
 
         const char* Pop() {
@@ -768,34 +1442,48 @@ class GenericReader {
         StackStream<char> stackStream;
     };
 
+    template<typename InputStream>
+    class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
+        typedef NumberStream<InputStream, true, false> Base;
+    public:
+        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
+
+        RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
+    };
+
     template<unsigned parseFlags, typename InputStream, typename Handler>
     void ParseNumber(InputStream& is, Handler& handler) {
         internal::StreamLocalCopy<InputStream> copy(is);
-        NumberStream<InputStream, (parseFlags & kParseFullPrecisionFlag) != 0> s(*this, copy.s);
+        NumberStream<InputStream,
+            ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
+                ((parseFlags & kParseInsituFlag) == 0) :
+                ((parseFlags & kParseFullPrecisionFlag) != 0),
+            (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
+                (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
+
+        size_t startOffset = s.Tell();
+        double d = 0.0;
+        bool useNanOrInf = false;
 
         // Parse minus
-        bool minus = false;
-        if (s.Peek() == '-') {
-            minus = true;
-            s.Take();
-        }
+        bool minus = Consume(s, '-');
 
         // Parse int: zero / ( digit1-9 *DIGIT )
         unsigned i = 0;
         uint64_t i64 = 0;
         bool use64bit = false;
         int significandDigit = 0;
-        if (s.Peek() == '0') {
+        if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
             i = 0;
             s.TakePush();
         }
-        else if (s.Peek() >= '1' && s.Peek() <= '9') {
+        else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
             i = static_cast<unsigned>(s.TakePush() - '0');
 
             if (minus)
-                while (s.Peek() >= '0' && s.Peek() <= '9') {
-                    if (i >= 214748364) { // 2^31 = 2147483648
-                        if (i != 214748364 || s.Peek() > '8') {
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
+                        if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
                             i64 = i;
                             use64bit = true;
                             break;
@@ -805,9 +1493,9 @@ class GenericReader {
                     significandDigit++;
                 }
             else
-                while (s.Peek() >= '0' && s.Peek() <= '9') {
-                    if (i >= 429496729) { // 2^32 - 1 = 4294967295
-                        if (i != 429496729 || s.Peek() > '5') {
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
+                        if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
                             i64 = i;
                             use64bit = true;
                             break;
@@ -817,18 +1505,41 @@ class GenericReader {
                     significandDigit++;
                 }
         }
+        // Parse NaN or Infinity here
+        else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
+            if (Consume(s, 'N')) {
+                if (Consume(s, 'a') && Consume(s, 'N')) {
+                    d = std::numeric_limits<double>::quiet_NaN();
+                    useNanOrInf = true;
+                }
+            }
+            else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
+                if (Consume(s, 'n') && Consume(s, 'f')) {
+                    d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
+                    useNanOrInf = true;
+
+                    if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
+                                                                && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
+                        RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+                    }
+                }
+            }
+            
+            if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
+                RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+            }
+        }
         else
             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
 
         // Parse 64bit int
         bool useDouble = false;
-        double d = 0.0;
         if (use64bit) {
-            if (minus) 
-                while (s.Peek() >= '0' && s.Peek() <= '9') {                    
-                     if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808
-                        if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') {
-                            d = i64;
+            if (minus)
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                     if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
+                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
+                            d = static_cast<double>(i64);
                             useDouble = true;
                             break;
                         }
@@ -836,10 +1547,10 @@ class GenericReader {
                     significandDigit++;
                 }
             else
-                while (s.Peek() >= '0' && s.Peek() <= '9') {                    
-                    if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615
-                        if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') {
-                            d = i64;
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
+                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
+                            d = static_cast<double>(i64);
                             useDouble = true;
                             break;
                         }
@@ -850,9 +1561,9 @@ class GenericReader {
 
         // Force double for big integer
         if (useDouble) {
-            while (s.Peek() >= '0' && s.Peek() <= '9') {
-                if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
-                    RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
+            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
+                    RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
                 d = d * 10 + (s.TakePush() - '0');
             }
         }
@@ -860,11 +1571,10 @@ class GenericReader {
         // Parse frac = decimal-point 1*DIGIT
         int expFrac = 0;
         size_t decimalPosition;
-        if (s.Peek() == '.') {
-            s.Take();
+        if (Consume(s, '.')) {
             decimalPosition = s.Length();
 
-            if (!(s.Peek() >= '0' && s.Peek() <= '9'))
+            if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
 
             if (!useDouble) {
@@ -872,8 +1582,8 @@ class GenericReader {
                 // Use i64 to store significand in 64-bit architecture
                 if (!use64bit)
                     i64 = i;
-        
-                while (s.Peek() >= '0' && s.Peek() <= '9') {
+
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
                     if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
                         break;
                     else {
@@ -884,19 +1594,19 @@ class GenericReader {
                     }
                 }
 
-                d = (double)i64;
+                d = static_cast<double>(i64);
 #else
                 // Use double to store significand in 32-bit architecture
-                d = use64bit ? (double)i64 : (double)i;
+                d = static_cast<double>(use64bit ? i64 : i);
 #endif
                 useDouble = true;
             }
 
-            while (s.Peek() >= '0' && s.Peek() <= '9') {
+            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
                 if (significandDigit < 17) {
                     d = d * 10.0 + (s.TakePush() - '0');
                     --expFrac;
-                    if (d > 0.0)
+                    if (RAPIDJSON_LIKELY(d > 0.0))
                         significandDigit++;
                 }
                 else
@@ -908,38 +1618,35 @@ class GenericReader {
 
         // Parse exp = e [ minus / plus ] 1*DIGIT
         int exp = 0;
-        if (s.Peek() == 'e' || s.Peek() == 'E') {
+        if (Consume(s, 'e') || Consume(s, 'E')) {
             if (!useDouble) {
-                d = use64bit ? i64 : i;
+                d = static_cast<double>(use64bit ? i64 : i);
                 useDouble = true;
             }
-            s.Take();
 
             bool expMinus = false;
-            if (s.Peek() == '+')
-                s.Take();
-            else if (s.Peek() == '-') {
-                s.Take();
+            if (Consume(s, '+'))
+                ;
+            else if (Consume(s, '-'))
                 expMinus = true;
-            }
 
-            if (s.Peek() >= '0' && s.Peek() <= '9') {
-                exp = s.Take() - '0';
+            if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                exp = static_cast<int>(s.Take() - '0');
                 if (expMinus) {
-                    while (s.Peek() >= '0' && s.Peek() <= '9') {
-                        exp = exp * 10 + (s.Take() - '0');
+                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                        exp = exp * 10 + static_cast<int>(s.Take() - '0');
                         if (exp >= 214748364) {                         // Issue #313: prevent overflow exponent
-                            while (s.Peek() >= '0' && s.Peek() <= '9')  // Consume the rest of exponent
+                            while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9'))  // Consume the rest of exponent
                                 s.Take();
                         }
                     }
                 }
                 else {  // positive exp
                     int maxExp = 308 - expFrac;
-                    while (s.Peek() >= '0' && s.Peek() <= '9') {
-                        exp = exp * 10 + (s.Take() - '0');
-                        if (exp > maxExp)
-                            RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
+                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                        exp = exp * 10 + static_cast<int>(s.Take() - '0');
+                        if (RAPIDJSON_UNLIKELY(exp > maxExp))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
                     }
                 }
             }
@@ -952,34 +1659,63 @@ class GenericReader {
 
         // Finish parsing, call event according to the type of number.
         bool cont = true;
-        size_t length = s.Length();
-        const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
-
-        if (useDouble) {
-            int p = exp + expFrac;
-            if (parseFlags & kParseFullPrecisionFlag)
-                d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
-            else
-                d = internal::StrtodNormalPrecision(d, p);
 
-            cont = handler.Double(minus ? -d : d);
-        }
-        else {
-            if (use64bit) {
-                if (minus)
-                    cont = handler.Int64(-(int64_t)i64);
-                else
-                    cont = handler.Uint64(i64);
+        if (parseFlags & kParseNumbersAsStringsFlag) {
+            if (parseFlags & kParseInsituFlag) {
+                s.Pop();  // Pop stack no matter if it will be used or not.
+                typename InputStream::Ch* head = is.PutBegin();
+                const size_t length = s.Tell() - startOffset;
+                RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
+                // unable to insert the \0 character here, it will erase the comma after this number
+                const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
+                cont = handler.RawNumber(str, SizeType(length), false);
             }
             else {
-                if (minus)
-                    cont = handler.Int(-(int)i);
-                else
-                    cont = handler.Uint(i);
+                SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
+                StringStream srcStream(s.Pop());
+                StackStream<typename TargetEncoding::Ch> dstStream(stack_);
+                while (numCharsToCopy--) {
+                    Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
+                }
+                dstStream.Put('\0');
+                const typename TargetEncoding::Ch* str = dstStream.Pop();
+                const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
+                cont = handler.RawNumber(str, SizeType(length), true);
             }
         }
-        if (!cont)
-            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
+        else {
+           size_t length = s.Length();
+           const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
+
+           if (useDouble) {
+               int p = exp + expFrac;
+               if (parseFlags & kParseFullPrecisionFlag)
+                   d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
+               else
+                   d = internal::StrtodNormalPrecision(d, p);
+
+               cont = handler.Double(minus ? -d : d);
+           }
+           else if (useNanOrInf) {
+               cont = handler.Double(d);
+           }
+           else {
+               if (use64bit) {
+                   if (minus)
+                       cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
+                   else
+                       cont = handler.Uint64(i64);
+               }
+               else {
+                   if (minus)
+                       cont = handler.Int(static_cast<int32_t>(~i + 1));
+                   else
+                       cont = handler.Uint(i);
+               }
+           }
+        }
+        if (RAPIDJSON_UNLIKELY(!cont))
+            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
     }
 
     // Parse any JSON value
@@ -992,7 +1728,10 @@ class GenericReader {
             case '"': ParseString<parseFlags>(is, handler); break;
             case '{': ParseObject<parseFlags>(is, handler); break;
             case '[': ParseArray <parseFlags>(is, handler); break;
-            default : ParseNumber<parseFlags>(is, handler);
+            default :
+                      ParseNumber<parseFlags>(is, handler);
+                      break;
+
         }
     }
 
@@ -1000,27 +1739,29 @@ class GenericReader {
 
     // States
     enum IterativeParsingState {
-        IterativeParsingStartState = 0,
-        IterativeParsingFinishState,
-        IterativeParsingErrorState,
+        IterativeParsingFinishState = 0, // sink states at top
+        IterativeParsingErrorState,      // sink states at top
+        IterativeParsingStartState,
 
         // Object states
         IterativeParsingObjectInitialState,
         IterativeParsingMemberKeyState,
-        IterativeParsingKeyValueDelimiterState,
         IterativeParsingMemberValueState,
-        IterativeParsingMemberDelimiterState,
         IterativeParsingObjectFinishState,
 
         // Array states
         IterativeParsingArrayInitialState,
         IterativeParsingElementState,
-        IterativeParsingElementDelimiterState,
         IterativeParsingArrayFinishState,
 
         // Single value state
         IterativeParsingValueState,
-
+        
+        // Delimiter states (at bottom)
+        IterativeParsingElementDelimiterState,
+        IterativeParsingMemberDelimiterState,
+        IterativeParsingKeyValueDelimiterState,
+        
         cIterativeParsingStateCount
     };
 
@@ -1064,9 +1805,9 @@ class GenericReader {
 #undef N
 #undef N16
 //!@endcond
-        
-        if (sizeof(Ch) == 1 || unsigned(c) < 256)
-            return (Token)tokenMap[(unsigned char)c];
+
+        if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
+            return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
         else
             return NumberToken;
     }
@@ -1074,6 +1815,18 @@ class GenericReader {
     RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
         // current state x one lookahead token -> new state
         static const char G[cIterativeParsingStateCount][kTokenCount] = {
+            // Finish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Error(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
             // Start
             {
                 IterativeParsingArrayInitialState,  // Left bracket
@@ -1088,18 +1841,6 @@ class GenericReader {
                 IterativeParsingValueState,         // Null
                 IterativeParsingValueState          // Number
             },
-            // Finish(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
-            // Error(sink state)
-            {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            },
             // ObjectInitial
             {
                 IterativeParsingErrorState,         // Left bracket
@@ -1128,20 +1869,6 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
-            // KeyValueDelimiter
-            {
-                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right bracket
-                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
-                IterativeParsingErrorState,             // Right curly bracket
-                IterativeParsingErrorState,             // Comma
-                IterativeParsingErrorState,             // Colon
-                IterativeParsingMemberValueState,       // String
-                IterativeParsingMemberValueState,       // False
-                IterativeParsingMemberValueState,       // True
-                IterativeParsingMemberValueState,       // Null
-                IterativeParsingMemberValueState        // Number
-            },
             // MemberValue
             {
                 IterativeParsingErrorState,             // Left bracket
@@ -1156,20 +1883,6 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
-            // MemberDelimiter
-            {
-                IterativeParsingErrorState,         // Left bracket
-                IterativeParsingErrorState,         // Right bracket
-                IterativeParsingErrorState,         // Left curly bracket
-                IterativeParsingErrorState,         // Right curly bracket
-                IterativeParsingErrorState,         // Comma
-                IterativeParsingErrorState,         // Colon
-                IterativeParsingMemberKeyState,     // String
-                IterativeParsingErrorState,         // False
-                IterativeParsingErrorState,         // True
-                IterativeParsingErrorState,         // Null
-                IterativeParsingErrorState          // Number
-            },
             // ObjectFinish(sink state)
             {
                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
@@ -1204,10 +1917,22 @@ class GenericReader {
                 IterativeParsingErrorState,             // Null
                 IterativeParsingErrorState              // Number
             },
+            // ArrayFinish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Single Value (sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
             // ElementDelimiter
             {
                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
-                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingArrayFinishState,       // Right bracket
                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
                 IterativeParsingErrorState,             // Right curly bracket
                 IterativeParsingErrorState,             // Comma
@@ -1218,21 +1943,37 @@ class GenericReader {
                 IterativeParsingElementState,           // Null
                 IterativeParsingElementState            // Number
             },
-            // ArrayFinish(sink state)
+            // MemberDelimiter
             {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
+                IterativeParsingErrorState,         // Left bracket
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingErrorState,         // Left curly bracket
+                IterativeParsingObjectFinishState,  // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingMemberKeyState,     // String
+                IterativeParsingErrorState,         // False
+                IterativeParsingErrorState,         // True
+                IterativeParsingErrorState,         // Null
+                IterativeParsingErrorState          // Number
             },
-            // Single Value (sink state)
+            // KeyValueDelimiter
             {
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
-                IterativeParsingErrorState
-            }
+                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingMemberValueState,       // String
+                IterativeParsingMemberValueState,       // False
+                IterativeParsingMemberValueState,       // True
+                IterativeParsingMemberValueState,       // Null
+                IterativeParsingMemberValueState        // Number
+            },
         }; // End of G
 
-        return (IterativeParsingState)G[state][token];
+        return static_cast<IterativeParsingState>(G[state][token]);
     }
 
     // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
@@ -1309,6 +2050,11 @@ class GenericReader {
 
         case IterativeParsingObjectFinishState:
         {
+            // Transit from delimiter is only allowed when trailing commas are enabled
+            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
+                return IterativeParsingErrorState;
+            }
             // Get member count.
             SizeType c = *stack_.template Pop<SizeType>(1);
             // If the object is not empty, count the last member.
@@ -1334,6 +2080,11 @@ class GenericReader {
 
         case IterativeParsingArrayFinishState:
         {
+            // Transit from delimiter is only allowed when trailing commas are enabled
+            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
+                return IterativeParsingErrorState;
+            }
             // Get element count.
             SizeType c = *stack_.template Pop<SizeType>(1);
             // If the array is not empty, count the last element.
@@ -1385,55 +2136,68 @@ class GenericReader {
             // Error flag has been set.
             return;
         }
-        
+
         switch (src) {
-        case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell());
-        case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
+        case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
+        case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
         case IterativeParsingObjectInitialState:
-        case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
-        case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
-        case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
-        case IterativeParsingElementState:          RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
-        default:                                    RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
-        }       
+        case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
+        case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
+        case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
+        case IterativeParsingKeyValueDelimiterState:
+        case IterativeParsingArrayInitialState:
+        case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
+        default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
+        }
     }
 
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) {
+        return s >= IterativeParsingElementDelimiterState;
+    }
+    
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) {
+        return s <= IterativeParsingErrorState;
+    }
+    
     template <unsigned parseFlags, typename InputStream, typename Handler>
     ParseResult IterativeParse(InputStream& is, Handler& handler) {
         parseResult_.Clear();
         ClearStackOnExit scope(*this);
         IterativeParsingState state = IterativeParsingStartState;
-
-        SkipWhitespace(is);
+        
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         while (is.Peek() != '\0') {
             Token t = Tokenize(is.Peek());
             IterativeParsingState n = Predict(state, t);
             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
-
+            
             if (d == IterativeParsingErrorState) {
                 HandleError(state, is);
                 break;
             }
-
+            
             state = d;
-
+            
             // Do not further consume streams if a root JSON has been parsed.
             if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
                 break;
-
-            SkipWhitespace(is);
+            
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
         }
-
+        
         // Handle the end of file.
         if (state != IterativeParsingFinishState)
             HandleError(state, is);
-
+        
         return parseResult_;
     }
 
     static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
     internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
     ParseResult parseResult_;
+    IterativeParsingState state_;
 }; // class GenericReader
 
 //! Reader with UTF8 encoding and default allocator.
@@ -1441,6 +2205,11 @@ typedef GenericReader<UTF8<>, UTF8<> > Reader;
 
 RAPIDJSON_NAMESPACE_END
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+
 #ifdef __GNUC__
 RAPIDJSON_DIAG_POP
 #endif
diff --git a/rapidjson/schema.h b/rapidjson/schema.h
new file mode 100644
index 0000000..abcf1a1
--- /dev/null
+++ b/rapidjson/schema.h
@@ -0,0 +1,2016 @@
+// Tencent is pleased to support the open source community by making RapidJSON available->
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip-> All rights reserved->
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License-> You may obtain a copy of the License at
+//
+// http://opensource->org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied-> See the License for the 
+// specific language governing permissions and limitations under the License->
+
+#ifndef RAPIDJSON_SCHEMA_H_
+#define RAPIDJSON_SCHEMA_H_
+
+#include "document.h"
+#include "pointer.h"
+#include <cmath> // abs, floor
+
+#if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX)
+#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1
+#else
+#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 0
+#endif
+
+#if !RAPIDJSON_SCHEMA_USE_INTERNALREGEX && !defined(RAPIDJSON_SCHEMA_USE_STDREGEX) && (__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800))
+#define RAPIDJSON_SCHEMA_USE_STDREGEX 1
+#else
+#define RAPIDJSON_SCHEMA_USE_STDREGEX 0
+#endif
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+#include "internal/regex.h"
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+#include <regex>
+#endif
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX || RAPIDJSON_SCHEMA_USE_STDREGEX
+#define RAPIDJSON_SCHEMA_HAS_REGEX 1
+#else
+#define RAPIDJSON_SCHEMA_HAS_REGEX 0
+#endif
+
+#ifndef RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_SCHEMA_VERBOSE 0
+#endif
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#include "stringbuffer.h"
+#endif
+
+RAPIDJSON_DIAG_PUSH
+
+#if defined(__GNUC__)
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_OFF(weak-vtables)
+RAPIDJSON_DIAG_OFF(exit-time-destructors)
+RAPIDJSON_DIAG_OFF(c++98-compat-pedantic)
+RAPIDJSON_DIAG_OFF(variadic-macros)
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// Verbose Utilities
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+
+namespace internal {
+
+inline void PrintInvalidKeyword(const char* keyword) {
+    printf("Fail keyword: %s\n", keyword);
+}
+
+inline void PrintInvalidKeyword(const wchar_t* keyword) {
+    wprintf(L"Fail keyword: %ls\n", keyword);
+}
+
+inline void PrintInvalidDocument(const char* document) {
+    printf("Fail document: %s\n\n", document);
+}
+
+inline void PrintInvalidDocument(const wchar_t* document) {
+    wprintf(L"Fail document: %ls\n\n", document);
+}
+
+inline void PrintValidatorPointers(unsigned depth, const char* s, const char* d) {
+    printf("S: %*s%s\nD: %*s%s\n\n", depth * 4, " ", s, depth * 4, " ", d);
+}
+
+inline void PrintValidatorPointers(unsigned depth, const wchar_t* s, const wchar_t* d) {
+    wprintf(L"S: %*ls%ls\nD: %*ls%ls\n\n", depth * 4, L" ", s, depth * 4, L" ", d);
+}
+
+} // namespace internal
+
+#endif // RAPIDJSON_SCHEMA_VERBOSE
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_INVALID_KEYWORD_RETURN
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) internal::PrintInvalidKeyword(keyword)
+#else
+#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword)
+#endif
+
+#define RAPIDJSON_INVALID_KEYWORD_RETURN(keyword)\
+RAPIDJSON_MULTILINEMACRO_BEGIN\
+    context.invalidKeyword = keyword.GetString();\
+    RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword.GetString());\
+    return false;\
+RAPIDJSON_MULTILINEMACRO_END
+
+///////////////////////////////////////////////////////////////////////////////
+// Forward declarations
+
+template <typename ValueType, typename Allocator>
+class GenericSchemaDocument;
+
+namespace internal {
+
+template <typename SchemaDocumentType>
+class Schema;
+
+///////////////////////////////////////////////////////////////////////////////
+// ISchemaValidator
+
+class ISchemaValidator {
+public:
+    virtual ~ISchemaValidator() {}
+    virtual bool IsValid() const = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// ISchemaStateFactory
+
+template <typename SchemaType>
+class ISchemaStateFactory {
+public:
+    virtual ~ISchemaStateFactory() {}
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&) = 0;
+    virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0;
+    virtual void* CreateHasher() = 0;
+    virtual uint64_t GetHashCode(void* hasher) = 0;
+    virtual void DestroryHasher(void* hasher) = 0;
+    virtual void* MallocState(size_t size) = 0;
+    virtual void FreeState(void* p) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Hasher
+
+// For comparison of compound value
+template<typename Encoding, typename Allocator>
+class Hasher {
+public:
+    typedef typename Encoding::Ch Ch;
+
+    Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {}
+
+    bool Null() { return WriteType(kNullType); }
+    bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); }
+    bool Int(int i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
+    bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
+    bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
+    bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
+    bool Double(double d) { 
+        Number n; 
+        if (d < 0) n.u.i = static_cast<int64_t>(d);
+        else       n.u.u = static_cast<uint64_t>(d); 
+        n.d = d;
+        return WriteNumber(n);
+    }
+
+    bool RawNumber(const Ch* str, SizeType len, bool) {
+        WriteBuffer(kNumberType, str, len * sizeof(Ch));
+        return true;
+    }
+
+    bool String(const Ch* str, SizeType len, bool) {
+        WriteBuffer(kStringType, str, len * sizeof(Ch));
+        return true;
+    }
+
+    bool StartObject() { return true; }
+    bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); }
+    bool EndObject(SizeType memberCount) { 
+        uint64_t h = Hash(0, kObjectType);
+        uint64_t* kv = stack_.template Pop<uint64_t>(memberCount * 2);
+        for (SizeType i = 0; i < memberCount; i++)
+            h ^= Hash(kv[i * 2], kv[i * 2 + 1]);  // Use xor to achieve member order insensitive
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+    
+    bool StartArray() { return true; }
+    bool EndArray(SizeType elementCount) { 
+        uint64_t h = Hash(0, kArrayType);
+        uint64_t* e = stack_.template Pop<uint64_t>(elementCount);
+        for (SizeType i = 0; i < elementCount; i++)
+            h = Hash(h, e[i]); // Use hash to achieve element order sensitive
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+
+    bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); }
+
+    uint64_t GetHashCode() const {
+        RAPIDJSON_ASSERT(IsValid());
+        return *stack_.template Top<uint64_t>();
+    }
+
+private:
+    static const size_t kDefaultSize = 256;
+    struct Number {
+        union U {
+            uint64_t u;
+            int64_t i;
+        }u;
+        double d;
+    };
+
+    bool WriteType(Type type) { return WriteBuffer(type, 0, 0); }
+    
+    bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); }
+    
+    bool WriteBuffer(Type type, const void* data, size_t len) {
+        // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/
+        uint64_t h = Hash(RAPIDJSON_UINT64_C2(0x84222325, 0xcbf29ce4), type);
+        const unsigned char* d = static_cast<const unsigned char*>(data);
+        for (size_t i = 0; i < len; i++)
+            h = Hash(h, d[i]);
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+
+    static uint64_t Hash(uint64_t h, uint64_t d) {
+        static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3);
+        h ^= d;
+        h *= kPrime;
+        return h;
+    }
+
+    Stack<Allocator> stack_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// SchemaValidationContext
+
+template <typename SchemaDocumentType>
+struct SchemaValidationContext {
+    typedef Schema<SchemaDocumentType> SchemaType;
+    typedef ISchemaStateFactory<SchemaType> SchemaValidatorFactoryType;
+    typedef typename SchemaType::ValueType ValueType;
+    typedef typename ValueType::Ch Ch;
+
+    enum PatternValidatorType {
+        kPatternValidatorOnly,
+        kPatternValidatorWithProperty,
+        kPatternValidatorWithAdditionalProperty
+    };
+
+    SchemaValidationContext(SchemaValidatorFactoryType& f, const SchemaType* s) :
+        factory(f),
+        schema(s),
+        valueSchema(),
+        invalidKeyword(),
+        hasher(),
+        arrayElementHashCodes(),
+        validators(),
+        validatorCount(),
+        patternPropertiesValidators(),
+        patternPropertiesValidatorCount(),
+        patternPropertiesSchemas(),
+        patternPropertiesSchemaCount(),
+        valuePatternValidatorType(kPatternValidatorOnly),
+        propertyExist(),
+        inArray(false),
+        valueUniqueness(false),
+        arrayUniqueness(false)
+    {
+    }
+
+    ~SchemaValidationContext() {
+        if (hasher)
+            factory.DestroryHasher(hasher);
+        if (validators) {
+            for (SizeType i = 0; i < validatorCount; i++)
+                factory.DestroySchemaValidator(validators[i]);
+            factory.FreeState(validators);
+        }
+        if (patternPropertiesValidators) {
+            for (SizeType i = 0; i < patternPropertiesValidatorCount; i++)
+                factory.DestroySchemaValidator(patternPropertiesValidators[i]);
+            factory.FreeState(patternPropertiesValidators);
+        }
+        if (patternPropertiesSchemas)
+            factory.FreeState(patternPropertiesSchemas);
+        if (propertyExist)
+            factory.FreeState(propertyExist);
+    }
+
+    SchemaValidatorFactoryType& factory;
+    const SchemaType* schema;
+    const SchemaType* valueSchema;
+    const Ch* invalidKeyword;
+    void* hasher; // Only validator access
+    void* arrayElementHashCodes; // Only validator access this
+    ISchemaValidator** validators;
+    SizeType validatorCount;
+    ISchemaValidator** patternPropertiesValidators;
+    SizeType patternPropertiesValidatorCount;
+    const SchemaType** patternPropertiesSchemas;
+    SizeType patternPropertiesSchemaCount;
+    PatternValidatorType valuePatternValidatorType;
+    PatternValidatorType objectPatternValidatorType;
+    SizeType arrayElementIndex;
+    bool* propertyExist;
+    bool inArray;
+    bool valueUniqueness;
+    bool arrayUniqueness;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Schema
+
+template <typename SchemaDocumentType>
+class Schema {
+public:
+    typedef typename SchemaDocumentType::ValueType ValueType;
+    typedef typename SchemaDocumentType::AllocatorType AllocatorType;
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename EncodingType::Ch Ch;
+    typedef SchemaValidationContext<SchemaDocumentType> Context;
+    typedef Schema<SchemaDocumentType> SchemaType;
+    typedef GenericValue<EncodingType, AllocatorType> SValue;
+    friend class GenericSchemaDocument<ValueType, AllocatorType>;
+
+    Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) :
+        allocator_(allocator),
+        typeless_(schemaDocument->GetTypeless()),
+        enum_(),
+        enumCount_(),
+        not_(),
+        type_((1 << kTotalSchemaType) - 1), // typeless
+        validatorCount_(),
+        properties_(),
+        additionalPropertiesSchema_(),
+        patternProperties_(),
+        patternPropertyCount_(),
+        propertyCount_(),
+        minProperties_(),
+        maxProperties_(SizeType(~0)),
+        additionalProperties_(true),
+        hasDependencies_(),
+        hasRequired_(),
+        hasSchemaDependencies_(),
+        additionalItemsSchema_(),
+        itemsList_(),
+        itemsTuple_(),
+        itemsTupleCount_(),
+        minItems_(),
+        maxItems_(SizeType(~0)),
+        additionalItems_(true),
+        uniqueItems_(false),
+        pattern_(),
+        minLength_(0),
+        maxLength_(~SizeType(0)),
+        exclusiveMinimum_(false),
+        exclusiveMaximum_(false)
+    {
+        typedef typename SchemaDocumentType::ValueType ValueType;
+        typedef typename ValueType::ConstValueIterator ConstValueIterator;
+        typedef typename ValueType::ConstMemberIterator ConstMemberIterator;
+
+        if (!value.IsObject())
+            return;
+
+        if (const ValueType* v = GetMember(value, GetTypeString())) {
+            type_ = 0;
+            if (v->IsString())
+                AddType(*v);
+            else if (v->IsArray())
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
+                    AddType(*itr);
+        }
+
+        if (const ValueType* v = GetMember(value, GetEnumString()))
+            if (v->IsArray() && v->Size() > 0) {
+                enum_ = static_cast<uint64_t*>(allocator_->Malloc(sizeof(uint64_t) * v->Size()));
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) {
+                    typedef Hasher<EncodingType, MemoryPoolAllocator<> > EnumHasherType;
+                    char buffer[256 + 24];
+                    MemoryPoolAllocator<> hasherAllocator(buffer, sizeof(buffer));
+                    EnumHasherType h(&hasherAllocator, 256);
+                    itr->Accept(h);
+                    enum_[enumCount_++] = h.GetHashCode();
+                }
+            }
+
+        if (schemaDocument) {
+            AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document);
+            AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document);
+            AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document);
+        }
+
+        if (const ValueType* v = GetMember(value, GetNotString())) {
+            schemaDocument->CreateSchema(&not_, p.Append(GetNotString(), allocator_), *v, document);
+            notValidatorIndex_ = validatorCount_;
+            validatorCount_++;
+        }
+
+        // Object
+
+        const ValueType* properties = GetMember(value, GetPropertiesString());
+        const ValueType* required = GetMember(value, GetRequiredString());
+        const ValueType* dependencies = GetMember(value, GetDependenciesString());
+        {
+            // Gather properties from properties/required/dependencies
+            SValue allProperties(kArrayType);
+
+            if (properties && properties->IsObject())
+                for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr)
+                    AddUniqueElement(allProperties, itr->name);
+            
+            if (required && required->IsArray())
+                for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                    if (itr->IsString())
+                        AddUniqueElement(allProperties, *itr);
+
+            if (dependencies && dependencies->IsObject())
+                for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
+                    AddUniqueElement(allProperties, itr->name);
+                    if (itr->value.IsArray())
+                        for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i)
+                            if (i->IsString())
+                                AddUniqueElement(allProperties, *i);
+                }
+
+            if (allProperties.Size() > 0) {
+                propertyCount_ = allProperties.Size();
+                properties_ = static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_));
+                for (SizeType i = 0; i < propertyCount_; i++) {
+                    new (&properties_[i]) Property();
+                    properties_[i].name = allProperties[i];
+                    properties_[i].schema = typeless_;
+                }
+            }
+        }
+
+        if (properties && properties->IsObject()) {
+            PointerType q = p.Append(GetPropertiesString(), allocator_);
+            for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) {
+                SizeType index;
+                if (FindPropertyIndex(itr->name, &index))
+                    schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document);
+            }
+        }
+
+        if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) {
+            PointerType q = p.Append(GetPatternPropertiesString(), allocator_);
+            patternProperties_ = static_cast<PatternProperty*>(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount()));
+            patternPropertyCount_ = 0;
+
+            for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) {
+                new (&patternProperties_[patternPropertyCount_]) PatternProperty();
+                patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name);
+                schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document);
+                patternPropertyCount_++;
+            }
+        }
+
+        if (required && required->IsArray())
+            for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                if (itr->IsString()) {
+                    SizeType index;
+                    if (FindPropertyIndex(*itr, &index)) {
+                        properties_[index].required = true;
+                        hasRequired_ = true;
+                    }
+                }
+
+        if (dependencies && dependencies->IsObject()) {
+            PointerType q = p.Append(GetDependenciesString(), allocator_);
+            hasDependencies_ = true;
+            for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
+                SizeType sourceIndex;
+                if (FindPropertyIndex(itr->name, &sourceIndex)) {
+                    if (itr->value.IsArray()) {
+                        properties_[sourceIndex].dependencies = static_cast<bool*>(allocator_->Malloc(sizeof(bool) * propertyCount_));
+                        std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_);
+                        for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) {
+                            SizeType targetIndex;
+                            if (FindPropertyIndex(*targetItr, &targetIndex))
+                                properties_[sourceIndex].dependencies[targetIndex] = true;
+                        }
+                    }
+                    else if (itr->value.IsObject()) {
+                        hasSchemaDependencies_ = true;
+                        schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document);
+                        properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_;
+                        validatorCount_++;
+                    }
+                }
+            }
+        }
+
+        if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) {
+            if (v->IsBool())
+                additionalProperties_ = v->GetBool();
+            else if (v->IsObject())
+                schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document);
+        }
+
+        AssignIfExist(minProperties_, value, GetMinPropertiesString());
+        AssignIfExist(maxProperties_, value, GetMaxPropertiesString());
+
+        // Array
+        if (const ValueType* v = GetMember(value, GetItemsString())) {
+            PointerType q = p.Append(GetItemsString(), allocator_);
+            if (v->IsObject()) // List validation
+                schemaDocument->CreateSchema(&itemsList_, q, *v, document);
+            else if (v->IsArray()) { // Tuple validation
+                itemsTuple_ = static_cast<const Schema**>(allocator_->Malloc(sizeof(const Schema*) * v->Size()));
+                SizeType index = 0;
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++)
+                    schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document);
+            }
+        }
+
+        AssignIfExist(minItems_, value, GetMinItemsString());
+        AssignIfExist(maxItems_, value, GetMaxItemsString());
+
+        if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) {
+            if (v->IsBool())
+                additionalItems_ = v->GetBool();
+            else if (v->IsObject())
+                schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document);
+        }
+
+        AssignIfExist(uniqueItems_, value, GetUniqueItemsString());
+
+        // String
+        AssignIfExist(minLength_, value, GetMinLengthString());
+        AssignIfExist(maxLength_, value, GetMaxLengthString());
+
+        if (const ValueType* v = GetMember(value, GetPatternString()))
+            pattern_ = CreatePattern(*v);
+
+        // Number
+        if (const ValueType* v = GetMember(value, GetMinimumString()))
+            if (v->IsNumber())
+                minimum_.CopyFrom(*v, *allocator_);
+
+        if (const ValueType* v = GetMember(value, GetMaximumString()))
+            if (v->IsNumber())
+                maximum_.CopyFrom(*v, *allocator_);
+
+        AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString());
+        AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString());
+
+        if (const ValueType* v = GetMember(value, GetMultipleOfString()))
+            if (v->IsNumber() && v->GetDouble() > 0.0)
+                multipleOf_.CopyFrom(*v, *allocator_);
+    }
+
+    ~Schema() {
+        AllocatorType::Free(enum_);
+        if (properties_) {
+            for (SizeType i = 0; i < propertyCount_; i++)
+                properties_[i].~Property();
+            AllocatorType::Free(properties_);
+        }
+        if (patternProperties_) {
+            for (SizeType i = 0; i < patternPropertyCount_; i++)
+                patternProperties_[i].~PatternProperty();
+            AllocatorType::Free(patternProperties_);
+        }
+        AllocatorType::Free(itemsTuple_);
+#if RAPIDJSON_SCHEMA_HAS_REGEX
+        if (pattern_) {
+            pattern_->~RegexType();
+            AllocatorType::Free(pattern_);
+        }
+#endif
+    }
+
+    bool BeginValue(Context& context) const {
+        if (context.inArray) {
+            if (uniqueItems_)
+                context.valueUniqueness = true;
+
+            if (itemsList_)
+                context.valueSchema = itemsList_;
+            else if (itemsTuple_) {
+                if (context.arrayElementIndex < itemsTupleCount_)
+                    context.valueSchema = itemsTuple_[context.arrayElementIndex];
+                else if (additionalItemsSchema_)
+                    context.valueSchema = additionalItemsSchema_;
+                else if (additionalItems_)
+                    context.valueSchema = typeless_;
+                else
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString());
+            }
+            else
+                context.valueSchema = typeless_;
+
+            context.arrayElementIndex++;
+        }
+        return true;
+    }
+
+    RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
+        if (context.patternPropertiesValidatorCount > 0) {
+            bool otherValid = false;
+            SizeType count = context.patternPropertiesValidatorCount;
+            if (context.objectPatternValidatorType != Context::kPatternValidatorOnly)
+                otherValid = context.patternPropertiesValidators[--count]->IsValid();
+
+            bool patternValid = true;
+            for (SizeType i = 0; i < count; i++)
+                if (!context.patternPropertiesValidators[i]->IsValid()) {
+                    patternValid = false;
+                    break;
+                }
+
+            if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) {
+                if (!patternValid)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+            }
+            else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) {
+                if (!patternValid || !otherValid)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+            }
+            else if (!patternValid && !otherValid) // kPatternValidatorWithAdditionalProperty)
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+        }
+
+        if (enum_) {
+            const uint64_t h = context.factory.GetHashCode(context.hasher);
+            for (SizeType i = 0; i < enumCount_; i++)
+                if (enum_[i] == h)
+                    goto foundEnum;
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetEnumString());
+            foundEnum:;
+        }
+
+        if (allOf_.schemas)
+            for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++)
+                if (!context.validators[i]->IsValid())
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetAllOfString());
+        
+        if (anyOf_.schemas) {
+            for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++)
+                if (context.validators[i]->IsValid())
+                    goto foundAny;
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAnyOfString());
+            foundAny:;
+        }
+
+        if (oneOf_.schemas) {
+            bool oneValid = false;
+            for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++)
+                if (context.validators[i]->IsValid()) {
+                    if (oneValid)
+                        RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
+                    else
+                        oneValid = true;
+                }
+            if (!oneValid)
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
+        }
+
+        if (not_ && context.validators[notValidatorIndex_]->IsValid())
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetNotString());
+
+        return true;
+    }
+
+    bool Null(Context& context) const { 
+        if (!(type_ & (1 << kNullSchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        return CreateParallelValidator(context);
+    }
+    
+    bool Bool(Context& context, bool) const { 
+        if (!(type_ & (1 << kBooleanSchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        return CreateParallelValidator(context);
+    }
+
+    bool Int(Context& context, int i) const {
+        if (!CheckInt(context, i))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Uint(Context& context, unsigned u) const {
+        if (!CheckUint(context, u))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Int64(Context& context, int64_t i) const {
+        if (!CheckInt(context, i))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Uint64(Context& context, uint64_t u) const {
+        if (!CheckUint(context, u))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Double(Context& context, double d) const {
+        if (!(type_ & (1 << kNumberSchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d))
+            return false;
+
+        if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d))
+            return false;
+        
+        if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d))
+            return false;
+        
+        return CreateParallelValidator(context);
+    }
+    
+    bool String(Context& context, const Ch* str, SizeType length, bool) const {
+        if (!(type_ & (1 << kStringSchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        if (minLength_ != 0 || maxLength_ != SizeType(~0)) {
+            SizeType count;
+            if (internal::CountStringCodePoint<EncodingType>(str, length, &count)) {
+                if (count < minLength_)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinLengthString());
+                if (count > maxLength_)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxLengthString());
+            }
+        }
+
+        if (pattern_ && !IsPatternMatch(pattern_, str, length))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternString());
+
+        return CreateParallelValidator(context);
+    }
+
+    bool StartObject(Context& context) const { 
+        if (!(type_ & (1 << kObjectSchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        if (hasDependencies_ || hasRequired_) {
+            context.propertyExist = static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_));
+            std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_);
+        }
+
+        if (patternProperties_) { // pre-allocate schema array
+            SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType
+            context.patternPropertiesSchemas = static_cast<const SchemaType**>(context.factory.MallocState(sizeof(const SchemaType*) * count));
+            context.patternPropertiesSchemaCount = 0;
+            std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count);
+        }
+
+        return CreateParallelValidator(context);
+    }
+    
+    bool Key(Context& context, const Ch* str, SizeType len, bool) const {
+        if (patternProperties_) {
+            context.patternPropertiesSchemaCount = 0;
+            for (SizeType i = 0; i < patternPropertyCount_; i++)
+                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) {
+                    context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema;
+                    context.valueSchema = typeless_;
+                }
+        }
+
+        SizeType index;
+        if (FindPropertyIndex(ValueType(str, len).Move(), &index)) {
+            if (context.patternPropertiesSchemaCount > 0) {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema;
+                context.valueSchema = typeless_;
+                context.valuePatternValidatorType = Context::kPatternValidatorWithProperty;
+            }
+            else
+                context.valueSchema = properties_[index].schema;
+
+            if (context.propertyExist)
+                context.propertyExist[index] = true;
+
+            return true;
+        }
+
+        if (additionalPropertiesSchema_) {
+            if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_;
+                context.valueSchema = typeless_;
+                context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty;
+            }
+            else
+                context.valueSchema = additionalPropertiesSchema_;
+            return true;
+        }
+        else if (additionalProperties_) {
+            context.valueSchema = typeless_;
+            return true;
+        }
+
+        if (context.patternPropertiesSchemaCount == 0) // patternProperties are not additional properties
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAdditionalPropertiesString());
+
+        return true;
+    }
+
+    bool EndObject(Context& context, SizeType memberCount) const {
+        if (hasRequired_)
+            for (SizeType index = 0; index < propertyCount_; index++)
+                if (properties_[index].required)
+                    if (!context.propertyExist[index])
+                        RAPIDJSON_INVALID_KEYWORD_RETURN(GetRequiredString());
+
+        if (memberCount < minProperties_)
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinPropertiesString());
+
+        if (memberCount > maxProperties_)
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxPropertiesString());
+
+        if (hasDependencies_) {
+            for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++)
+                if (context.propertyExist[sourceIndex]) {
+                    if (properties_[sourceIndex].dependencies) {
+                        for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++)
+                            if (properties_[sourceIndex].dependencies[targetIndex] && !context.propertyExist[targetIndex])
+                                RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString());
+                    }
+                    else if (properties_[sourceIndex].dependenciesSchema)
+                        if (!context.validators[properties_[sourceIndex].dependenciesValidatorIndex]->IsValid())
+                            RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString());
+                }
+        }
+
+        return true;
+    }
+
+    bool StartArray(Context& context) const { 
+        if (!(type_ & (1 << kArraySchemaType)))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        context.arrayElementIndex = 0;
+        context.inArray = true;
+
+        return CreateParallelValidator(context);
+    }
+
+    bool EndArray(Context& context, SizeType elementCount) const { 
+        context.inArray = false;
+        
+        if (elementCount < minItems_)
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinItemsString());
+        
+        if (elementCount > maxItems_)
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxItemsString());
+
+        return true;
+    }
+
+    // Generate functions for string literal according to Ch
+#define RAPIDJSON_STRING_(name, ...) \
+    static const ValueType& Get##name##String() {\
+        static const Ch s[] = { __VA_ARGS__, '\0' };\
+        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1));\
+        return v;\
+    }
+
+    RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l')
+    RAPIDJSON_STRING_(Boolean, 'b', 'o', 'o', 'l', 'e', 'a', 'n')
+    RAPIDJSON_STRING_(Object, 'o', 'b', 'j', 'e', 'c', 't')
+    RAPIDJSON_STRING_(Array, 'a', 'r', 'r', 'a', 'y')
+    RAPIDJSON_STRING_(String, 's', 't', 'r', 'i', 'n', 'g')
+    RAPIDJSON_STRING_(Number, 'n', 'u', 'm', 'b', 'e', 'r')
+    RAPIDJSON_STRING_(Integer, 'i', 'n', 't', 'e', 'g', 'e', 'r')
+    RAPIDJSON_STRING_(Type, 't', 'y', 'p', 'e')
+    RAPIDJSON_STRING_(Enum, 'e', 'n', 'u', 'm')
+    RAPIDJSON_STRING_(AllOf, 'a', 'l', 'l', 'O', 'f')
+    RAPIDJSON_STRING_(AnyOf, 'a', 'n', 'y', 'O', 'f')
+    RAPIDJSON_STRING_(OneOf, 'o', 'n', 'e', 'O', 'f')
+    RAPIDJSON_STRING_(Not, 'n', 'o', 't')
+    RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd')
+    RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's')
+    RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h')
+    RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h')
+    RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n')
+    RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f')
+
+#undef RAPIDJSON_STRING_
+
+private:
+    enum SchemaValueType {
+        kNullSchemaType,
+        kBooleanSchemaType,
+        kObjectSchemaType,
+        kArraySchemaType,
+        kStringSchemaType,
+        kNumberSchemaType,
+        kIntegerSchemaType,
+        kTotalSchemaType
+    };
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+        typedef internal::GenericRegex<EncodingType> RegexType;
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+        typedef std::basic_regex<Ch> RegexType;
+#else
+        typedef char RegexType;
+#endif
+
+    struct SchemaArray {
+        SchemaArray() : schemas(), count() {}
+        ~SchemaArray() { AllocatorType::Free(schemas); }
+        const SchemaType** schemas;
+        SizeType begin; // begin index of context.validators
+        SizeType count;
+    };
+
+    template <typename V1, typename V2>
+    void AddUniqueElement(V1& a, const V2& v) {
+        for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
+            if (*itr == v)
+                return;
+        V1 c(v, *allocator_);
+        a.PushBack(c, *allocator_);
+    }
+
+    static const ValueType* GetMember(const ValueType& value, const ValueType& name) {
+        typename ValueType::ConstMemberIterator itr = value.FindMember(name);
+        return itr != value.MemberEnd() ? &(itr->value) : 0;
+    }
+
+    static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) {
+        if (const ValueType* v = GetMember(value, name))
+            if (v->IsBool())
+                out = v->GetBool();
+    }
+
+    static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) {
+        if (const ValueType* v = GetMember(value, name))
+            if (v->IsUint64() && v->GetUint64() <= SizeType(~0))
+                out = static_cast<SizeType>(v->GetUint64());
+    }
+
+    void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) {
+        if (const ValueType* v = GetMember(value, name)) {
+            if (v->IsArray() && v->Size() > 0) {
+                PointerType q = p.Append(name, allocator_);
+                out.count = v->Size();
+                out.schemas = static_cast<const Schema**>(allocator_->Malloc(out.count * sizeof(const Schema*)));
+                memset(out.schemas, 0, sizeof(Schema*)* out.count);
+                for (SizeType i = 0; i < out.count; i++)
+                    schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document);
+                out.begin = validatorCount_;
+                validatorCount_ += out.count;
+            }
+        }
+    }
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType& value) {
+        if (value.IsString()) {
+            RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString());
+            if (!r->IsValid()) {
+                r->~RegexType();
+                AllocatorType::Free(r);
+                r = 0;
+            }
+            return r;
+        }
+        return 0;
+    }
+
+    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) {
+        GenericRegexSearch<RegexType> rs(*pattern);
+        return rs.Search(str);
+    }
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType& value) {
+        if (value.IsString())
+            try {
+                return new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript);
+            }
+            catch (const std::regex_error&) {
+            }
+        return 0;
+    }
+
+    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) {
+        std::match_results<const Ch*> r;
+        return std::regex_search(str, str + length, r, *pattern);
+    }
+#else
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType&) { return 0; }
+
+    static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; }
+#endif // RAPIDJSON_SCHEMA_USE_STDREGEX
+
+    void AddType(const ValueType& type) {
+        if      (type == GetNullString()   ) type_ |= 1 << kNullSchemaType;
+        else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType;
+        else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType;
+        else if (type == GetArrayString()  ) type_ |= 1 << kArraySchemaType;
+        else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType;
+        else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType;
+        else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType);
+    }
+
+    bool CreateParallelValidator(Context& context) const {
+        if (enum_ || context.arrayUniqueness)
+            context.hasher = context.factory.CreateHasher();
+
+        if (validatorCount_) {
+            RAPIDJSON_ASSERT(context.validators == 0);
+            context.validators = static_cast<ISchemaValidator**>(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_));
+            context.validatorCount = validatorCount_;
+
+            if (allOf_.schemas)
+                CreateSchemaValidators(context, allOf_);
+
+            if (anyOf_.schemas)
+                CreateSchemaValidators(context, anyOf_);
+            
+            if (oneOf_.schemas)
+                CreateSchemaValidators(context, oneOf_);
+            
+            if (not_)
+                context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_);
+            
+            if (hasSchemaDependencies_) {
+                for (SizeType i = 0; i < propertyCount_; i++)
+                    if (properties_[i].dependenciesSchema)
+                        context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema);
+            }
+        }
+
+        return true;
+    }
+
+    void CreateSchemaValidators(Context& context, const SchemaArray& schemas) const {
+        for (SizeType i = 0; i < schemas.count; i++)
+            context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i]);
+    }
+
+    // O(n)
+    bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
+        SizeType len = name.GetStringLength();
+        const Ch* str = name.GetString();
+        for (SizeType index = 0; index < propertyCount_; index++)
+            if (properties_[index].name.GetStringLength() == len && 
+                (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
+            {
+                *outIndex = index;
+                return true;
+            }
+        return false;
+    }
+
+    bool CheckInt(Context& context, int64_t i) const {
+        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType))))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        if (!minimum_.IsNull()) {
+            if (minimum_.IsInt64()) {
+                if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64())
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+            }
+            else if (minimum_.IsUint64()) {
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); // i <= max(int64_t) < minimum.GetUint64()
+            }
+            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!maximum_.IsNull()) {
+            if (maximum_.IsInt64()) {
+                if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64())
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+            }
+            else if (maximum_.IsUint64()) { }
+                /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
+            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!multipleOf_.IsNull()) {
+            if (multipleOf_.IsUint64()) {
+                if (static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+            }
+            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool CheckUint(Context& context, uint64_t i) const {
+        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType))))
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+
+        if (!minimum_.IsNull()) {
+            if (minimum_.IsUint64()) {
+                if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64())
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+            }
+            else if (minimum_.IsInt64())
+                /* do nothing */; // i >= 0 > minimum.Getint64()
+            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!maximum_.IsNull()) {
+            if (maximum_.IsUint64()) {
+                if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64())
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+            }
+            else if (maximum_.IsInt64())
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); // i >= 0 > maximum_
+            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!multipleOf_.IsNull()) {
+            if (multipleOf_.IsUint64()) {
+                if (i % multipleOf_.GetUint64() != 0)
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+            }
+            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool CheckDoubleMinimum(Context& context, double d) const {
+        if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble())
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+        return true;
+    }
+
+    bool CheckDoubleMaximum(Context& context, double d) const {
+        if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble())
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+        return true;
+    }
+
+    bool CheckDoubleMultipleOf(Context& context, double d) const {
+        double a = std::abs(d), b = std::abs(multipleOf_.GetDouble());
+        double q = std::floor(a / b);
+        double r = a - q * b;
+        if (r > 0.0)
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+        return true;
+    }
+
+    struct Property {
+        Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {}
+        ~Property() { AllocatorType::Free(dependencies); }
+        SValue name;
+        const SchemaType* schema;
+        const SchemaType* dependenciesSchema;
+        SizeType dependenciesValidatorIndex;
+        bool* dependencies;
+        bool required;
+    };
+
+    struct PatternProperty {
+        PatternProperty() : schema(), pattern() {}
+        ~PatternProperty() { 
+            if (pattern) {
+                pattern->~RegexType();
+                AllocatorType::Free(pattern);
+            }
+        }
+        const SchemaType* schema;
+        RegexType* pattern;
+    };
+
+    AllocatorType* allocator_;
+    const SchemaType* typeless_;
+    uint64_t* enum_;
+    SizeType enumCount_;
+    SchemaArray allOf_;
+    SchemaArray anyOf_;
+    SchemaArray oneOf_;
+    const SchemaType* not_;
+    unsigned type_; // bitmask of kSchemaType
+    SizeType validatorCount_;
+    SizeType notValidatorIndex_;
+
+    Property* properties_;
+    const SchemaType* additionalPropertiesSchema_;
+    PatternProperty* patternProperties_;
+    SizeType patternPropertyCount_;
+    SizeType propertyCount_;
+    SizeType minProperties_;
+    SizeType maxProperties_;
+    bool additionalProperties_;
+    bool hasDependencies_;
+    bool hasRequired_;
+    bool hasSchemaDependencies_;
+
+    const SchemaType* additionalItemsSchema_;
+    const SchemaType* itemsList_;
+    const SchemaType** itemsTuple_;
+    SizeType itemsTupleCount_;
+    SizeType minItems_;
+    SizeType maxItems_;
+    bool additionalItems_;
+    bool uniqueItems_;
+
+    RegexType* pattern_;
+    SizeType minLength_;
+    SizeType maxLength_;
+
+    SValue minimum_;
+    SValue maximum_;
+    SValue multipleOf_;
+    bool exclusiveMinimum_;
+    bool exclusiveMaximum_;
+};
+
+template<typename Stack, typename Ch>
+struct TokenHelper {
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
+        *documentStack.template Push<Ch>() = '/';
+        char buffer[21];
+        size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
+        for (size_t i = 0; i < length; i++)
+            *documentStack.template Push<Ch>() = static_cast<Ch>(buffer[i]);
+    }
+};
+
+// Partial specialized version for char to prevent buffer copying.
+template <typename Stack>
+struct TokenHelper<Stack, char> {
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
+        if (sizeof(SizeType) == 4) {
+            char *buffer = documentStack.template Push<char>(1 + 10); // '/' + uint
+            *buffer++ = '/';
+            const char* end = internal::u32toa(index, buffer);
+             documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer)));
+        }
+        else {
+            char *buffer = documentStack.template Push<char>(1 + 20); // '/' + uint64
+            *buffer++ = '/';
+            const char* end = internal::u64toa(index, buffer);
+            documentStack.template Pop<char>(static_cast<size_t>(20 - (end - buffer)));
+        }
+    }
+};
+
+} // namespace internal
+
+///////////////////////////////////////////////////////////////////////////////
+// IGenericRemoteSchemaDocumentProvider
+
+template <typename SchemaDocumentType>
+class IGenericRemoteSchemaDocumentProvider {
+public:
+    typedef typename SchemaDocumentType::Ch Ch;
+
+    virtual ~IGenericRemoteSchemaDocumentProvider() {}
+    virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericSchemaDocument
+
+//! JSON schema document.
+/*!
+    A JSON schema document is a compiled version of a JSON schema.
+    It is basically a tree of internal::Schema.
+
+    \note This is an immutable class (i.e. its instance cannot be modified after construction).
+    \tparam ValueT Type of JSON value (e.g. \c Value ), which also determine the encoding.
+    \tparam Allocator Allocator type for allocating memory of this document.
+*/
+template <typename ValueT, typename Allocator = CrtAllocator>
+class GenericSchemaDocument {
+public:
+    typedef ValueT ValueType;
+    typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument> IRemoteSchemaDocumentProviderType;
+    typedef Allocator AllocatorType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename EncodingType::Ch Ch;
+    typedef internal::Schema<GenericSchemaDocument> SchemaType;
+    typedef GenericPointer<ValueType, Allocator> PointerType;
+    friend class internal::Schema<GenericSchemaDocument>;
+    template <typename, typename, typename>
+    friend class GenericSchemaValidator;
+
+    //! Constructor.
+    /*!
+        Compile a JSON document into schema document.
+
+        \param document A JSON document as source.
+        \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null.
+        \param allocator An optional allocator instance for allocating memory. Can be null.
+    */
+    explicit GenericSchemaDocument(const ValueType& document, IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) :
+        remoteProvider_(remoteProvider),
+        allocator_(allocator),
+        ownAllocator_(),
+        root_(),
+        typeless_(),
+        schemaMap_(allocator, kInitialSchemaMapSize),
+        schemaRef_(allocator, kInitialSchemaRefSize)
+    {
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType)));
+        new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), 0);
+
+        // Generate root schema, it will call CreateSchema() to create sub-schemas,
+        // And call AddRefSchema() if there are $ref.
+        CreateSchemaRecursive(&root_, PointerType(), document, document);
+
+        // Resolve $ref
+        while (!schemaRef_.Empty()) {
+            SchemaRefEntry* refEntry = schemaRef_.template Pop<SchemaRefEntry>(1);
+            if (const SchemaType* s = GetSchema(refEntry->target)) {
+                if (refEntry->schema)
+                    *refEntry->schema = s;
+
+                // Create entry in map if not exist
+                if (!GetSchema(refEntry->source)) {
+                    new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(refEntry->source, const_cast<SchemaType*>(s), false, allocator_);
+                }
+            }
+            else if (refEntry->schema)
+                *refEntry->schema = typeless_;
+
+            refEntry->~SchemaRefEntry();
+        }
+
+        RAPIDJSON_ASSERT(root_ != 0);
+
+        schemaRef_.ShrinkToFit(); // Deallocate all memory for ref
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move constructor in C++11
+    GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT :
+        remoteProvider_(rhs.remoteProvider_),
+        allocator_(rhs.allocator_),
+        ownAllocator_(rhs.ownAllocator_),
+        root_(rhs.root_),
+        typeless_(rhs.typeless_),
+        schemaMap_(std::move(rhs.schemaMap_)),
+        schemaRef_(std::move(rhs.schemaRef_))
+    {
+        rhs.remoteProvider_ = 0;
+        rhs.allocator_ = 0;
+        rhs.ownAllocator_ = 0;
+        rhs.typeless_ = 0;
+    }
+#endif
+
+    //! Destructor
+    ~GenericSchemaDocument() {
+        while (!schemaMap_.Empty())
+            schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry();
+
+        if (typeless_) {
+            typeless_->~SchemaType();
+            Allocator::Free(typeless_);
+        }
+
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    //! Get the root schema.
+    const SchemaType& GetRoot() const { return *root_; }
+
+private:
+    //! Prohibit copying
+    GenericSchemaDocument(const GenericSchemaDocument&);
+    //! Prohibit assignment
+    GenericSchemaDocument& operator=(const GenericSchemaDocument&);
+
+    struct SchemaRefEntry {
+        SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {}
+        PointerType source;
+        PointerType target;
+        const SchemaType** schema;
+    };
+
+    struct SchemaEntry {
+        SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {}
+        ~SchemaEntry() {
+            if (owned) {
+                schema->~SchemaType();
+                Allocator::Free(schema);
+            }
+        }
+        PointerType pointer;
+        SchemaType* schema;
+        bool owned;
+    };
+
+    void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
+        if (schema)
+            *schema = typeless_;
+
+        if (v.GetType() == kObjectType) {
+            const SchemaType* s = GetSchema(pointer);
+            if (!s)
+                CreateSchema(schema, pointer, v, document);
+
+            for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr)
+                CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document);
+        }
+        else if (v.GetType() == kArrayType)
+            for (SizeType i = 0; i < v.Size(); i++)
+                CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document);
+    }
+
+    void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
+        RAPIDJSON_ASSERT(pointer.IsValid());
+        if (v.IsObject()) {
+            if (!HandleRefSchema(pointer, schema, v, document)) {
+                SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_);
+                new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(pointer, s, true, allocator_);
+                if (schema)
+                    *schema = s;
+            }
+        }
+    }
+
+    bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) {
+        static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' };
+        static const ValueType kRefValue(kRefString, 4);
+
+        typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue);
+        if (itr == v.MemberEnd())
+            return false;
+
+        if (itr->value.IsString()) {
+            SizeType len = itr->value.GetStringLength();
+            if (len > 0) {
+                const Ch* s = itr->value.GetString();
+                SizeType i = 0;
+                while (i < len && s[i] != '#') // Find the first #
+                    i++;
+
+                if (i > 0) { // Remote reference, resolve immediately
+                    if (remoteProvider_) {
+                        if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) {
+                            PointerType pointer(&s[i], len - i, allocator_);
+                            if (pointer.IsValid()) {
+                                if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) {
+                                    if (schema)
+                                        *schema = sc;
+                                    return true;
+                                }
+                            }
+                        }
+                    }
+                }
+                else if (s[i] == '#') { // Local reference, defer resolution
+                    PointerType pointer(&s[i], len - i, allocator_);
+                    if (pointer.IsValid()) {
+                        if (const ValueType* nv = pointer.Get(document))
+                            if (HandleRefSchema(source, schema, *nv, document))
+                                return true;
+
+                        new (schemaRef_.template Push<SchemaRefEntry>()) SchemaRefEntry(source, pointer, schema, allocator_);
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+    const SchemaType* GetSchema(const PointerType& pointer) const {
+        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
+            if (pointer == target->pointer)
+                return target->schema;
+        return 0;
+    }
+
+    PointerType GetPointer(const SchemaType* schema) const {
+        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
+            if (schema == target->schema)
+                return target->pointer;
+        return PointerType();
+    }
+
+    const SchemaType* GetTypeless() const { return typeless_; }
+
+    static const size_t kInitialSchemaMapSize = 64;
+    static const size_t kInitialSchemaRefSize = 64;
+
+    IRemoteSchemaDocumentProviderType* remoteProvider_;
+    Allocator *allocator_;
+    Allocator *ownAllocator_;
+    const SchemaType* root_;                //!< Root schema.
+    SchemaType* typeless_;
+    internal::Stack<Allocator> schemaMap_;  // Stores created Pointer -> Schemas
+    internal::Stack<Allocator> schemaRef_;  // Stores Pointer from $ref and schema which holds the $ref
+};
+
+//! GenericSchemaDocument using Value type.
+typedef GenericSchemaDocument<Value> SchemaDocument;
+//! IGenericRemoteSchemaDocumentProvider using SchemaDocument.
+typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericSchemaValidator
+
+//! JSON Schema Validator.
+/*!
+    A SAX style JSON schema validator.
+    It uses a \c GenericSchemaDocument to validate SAX events.
+    It delegates the incoming SAX events to an output handler.
+    The default output handler does nothing.
+    It can be reused multiple times by calling \c Reset().
+
+    \tparam SchemaDocumentType Type of schema document.
+    \tparam OutputHandler Type of output handler. Default handler does nothing.
+    \tparam StateAllocator Allocator for storing the internal validation states.
+*/
+template <
+    typename SchemaDocumentType,
+    typename OutputHandler = BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>,
+    typename StateAllocator = CrtAllocator>
+class GenericSchemaValidator :
+    public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>, 
+    public internal::ISchemaValidator
+{
+public:
+    typedef typename SchemaDocumentType::SchemaType SchemaType;
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename SchemaType::EncodingType EncodingType;
+    typedef typename EncodingType::Ch Ch;
+
+    //! Constructor without output handler.
+    /*!
+        \param schemaDocument The schema document to conform to.
+        \param allocator Optional allocator for storing internal validation states.
+        \param schemaStackCapacity Optional initial capacity of schema path stack.
+        \param documentStackCapacity Optional initial capacity of document path stack.
+    */
+    GenericSchemaValidator(
+        const SchemaDocumentType& schemaDocument,
+        StateAllocator* allocator = 0, 
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(schemaDocument.GetRoot()),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(0)
+#endif
+    {
+    }
+
+    //! Constructor with output handler.
+    /*!
+        \param schemaDocument The schema document to conform to.
+        \param allocator Optional allocator for storing internal validation states.
+        \param schemaStackCapacity Optional initial capacity of schema path stack.
+        \param documentStackCapacity Optional initial capacity of document path stack.
+    */
+    GenericSchemaValidator(
+        const SchemaDocumentType& schemaDocument,
+        OutputHandler& outputHandler,
+        StateAllocator* allocator = 0, 
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(schemaDocument.GetRoot()),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(&outputHandler),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(0)
+#endif
+    {
+    }
+
+    //! Destructor.
+    ~GenericSchemaValidator() {
+        Reset();
+        RAPIDJSON_DELETE(ownStateAllocator_);
+    }
+
+    //! Reset the internal states.
+    void Reset() {
+        while (!schemaStack_.Empty())
+            PopSchema();
+        documentStack_.Clear();
+        valid_ = true;
+    }
+
+    //! Checks whether the current state is valid.
+    // Implementation of ISchemaValidator
+    virtual bool IsValid() const { return valid_; }
+
+    //! Gets the JSON pointer pointed to the invalid schema.
+    PointerType GetInvalidSchemaPointer() const {
+        return schemaStack_.Empty() ? PointerType() : schemaDocument_->GetPointer(&CurrentSchema());
+    }
+
+    //! Gets the keyword of invalid schema.
+    const Ch* GetInvalidSchemaKeyword() const {
+        return schemaStack_.Empty() ? 0 : CurrentContext().invalidKeyword;
+    }
+
+    //! Gets the JSON pointer pointed to the invalid value.
+    PointerType GetInvalidDocumentPointer() const {
+        return documentStack_.Empty() ? PointerType() : PointerType(documentStack_.template Bottom<Ch>(), documentStack_.GetSize() / sizeof(Ch));
+    }
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() \
+RAPIDJSON_MULTILINEMACRO_BEGIN\
+    *documentStack_.template Push<Ch>() = '\0';\
+    documentStack_.template Pop<Ch>(1);\
+    internal::PrintInvalidDocument(documentStack_.template Bottom<Ch>());\
+RAPIDJSON_MULTILINEMACRO_END
+#else
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_()
+#endif
+
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\
+    if (!valid_) return false; \
+    if (!BeginValue() || !CurrentSchema().method arg1) {\
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_();\
+        return valid_ = false;\
+    }
+
+#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\
+    for (Context* context = schemaStack_.template Bottom<Context>(); context != schemaStack_.template End<Context>(); context++) {\
+        if (context->hasher)\
+            static_cast<HasherType*>(context->hasher)->method arg2;\
+        if (context->validators)\
+            for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\
+                static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;\
+        if (context->patternPropertiesValidators)\
+            for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\
+                static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_])->method arg2;\
+    }
+
+#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\
+    return valid_ = EndValue() && (!outputHandler_ || outputHandler_->method arg2)
+
+#define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \
+    RAPIDJSON_SCHEMA_HANDLE_BEGIN_   (method, arg1);\
+    RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\
+    RAPIDJSON_SCHEMA_HANDLE_END_     (method, arg2)
+
+    bool Null()             { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null,   (CurrentContext()   ), ( )); }
+    bool Bool(bool b)       { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool,   (CurrentContext(), b), (b)); }
+    bool Int(int i)         { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int,    (CurrentContext(), i), (i)); }
+    bool Uint(unsigned u)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint,   (CurrentContext(), u), (u)); }
+    bool Int64(int64_t i)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64,  (CurrentContext(), i), (i)); }
+    bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); }
+    bool Double(double d)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); }
+    bool RawNumber(const Ch* str, SizeType length, bool copy)
+                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+    bool String(const Ch* str, SizeType length, bool copy)
+                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+
+    bool StartObject() {
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext()));
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ());
+        return valid_ = !outputHandler_ || outputHandler_->StartObject();
+    }
+    
+    bool Key(const Ch* str, SizeType len, bool copy) {
+        if (!valid_) return false;
+        AppendToken(str, len);
+        if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy));
+        return valid_ = !outputHandler_ || outputHandler_->Key(str, len, copy);
+    }
+    
+    bool EndObject(SizeType memberCount) { 
+        if (!valid_) return false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount));
+        if (!CurrentSchema().EndObject(CurrentContext(), memberCount)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount));
+    }
+
+    bool StartArray() {
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext()));
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ());
+        return valid_ = !outputHandler_ || outputHandler_->StartArray();
+    }
+    
+    bool EndArray(SizeType elementCount) {
+        if (!valid_) return false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount));
+        if (!CurrentSchema().EndArray(CurrentContext(), elementCount)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_END_(EndArray, (elementCount));
+    }
+
+#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_
+#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_
+#undef RAPIDJSON_SCHEMA_HANDLE_PARALLEL_
+#undef RAPIDJSON_SCHEMA_HANDLE_VALUE_
+
+    // Implementation of ISchemaStateFactory<SchemaType>
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root) {
+        return new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root,
+#if RAPIDJSON_SCHEMA_VERBOSE
+        depth_ + 1,
+#endif
+        &GetStateAllocator());
+    }
+
+    virtual void DestroySchemaValidator(ISchemaValidator* validator) {
+        GenericSchemaValidator* v = static_cast<GenericSchemaValidator*>(validator);
+        v->~GenericSchemaValidator();
+        StateAllocator::Free(v);
+    }
+
+    virtual void* CreateHasher() {
+        return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator());
+    }
+
+    virtual uint64_t GetHashCode(void* hasher) {
+        return static_cast<HasherType*>(hasher)->GetHashCode();
+    }
+
+    virtual void DestroryHasher(void* hasher) {
+        HasherType* h = static_cast<HasherType*>(hasher);
+        h->~HasherType();
+        StateAllocator::Free(h);
+    }
+
+    virtual void* MallocState(size_t size) {
+        return GetStateAllocator().Malloc(size);
+    }
+
+    virtual void FreeState(void* p) {
+        StateAllocator::Free(p);
+    }
+
+private:
+    typedef typename SchemaType::Context Context;
+    typedef GenericValue<UTF8<>, StateAllocator> HashCodeArray;
+    typedef internal::Hasher<EncodingType, StateAllocator> HasherType;
+
+    GenericSchemaValidator( 
+        const SchemaDocumentType& schemaDocument,
+        const SchemaType& root,
+#if RAPIDJSON_SCHEMA_VERBOSE
+        unsigned depth,
+#endif
+        StateAllocator* allocator = 0,
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(root),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(depth)
+#endif
+    {
+    }
+
+    StateAllocator& GetStateAllocator() {
+        if (!stateAllocator_)
+            stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)();
+        return *stateAllocator_;
+    }
+
+    bool BeginValue() {
+        if (schemaStack_.Empty())
+            PushSchema(root_);
+        else {
+            if (CurrentContext().inArray)
+                internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex);
+
+            if (!CurrentSchema().BeginValue(CurrentContext()))
+                return false;
+
+            SizeType count = CurrentContext().patternPropertiesSchemaCount;
+            const SchemaType** sa = CurrentContext().patternPropertiesSchemas;
+            typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType;
+            bool valueUniqueness = CurrentContext().valueUniqueness;
+            RAPIDJSON_ASSERT(CurrentContext().valueSchema);
+            PushSchema(*CurrentContext().valueSchema);
+
+            if (count > 0) {
+                CurrentContext().objectPatternValidatorType = patternValidatorType;
+                ISchemaValidator**& va = CurrentContext().patternPropertiesValidators;
+                SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount;
+                va = static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count));
+                for (SizeType i = 0; i < count; i++)
+                    va[validatorCount++] = CreateSchemaValidator(*sa[i]);
+            }
+
+            CurrentContext().arrayUniqueness = valueUniqueness;
+        }
+        return true;
+    }
+
+    bool EndValue() {
+        if (!CurrentSchema().EndValue(CurrentContext()))
+            return false;
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+        GenericStringBuffer<EncodingType> sb;
+        schemaDocument_->GetPointer(&CurrentSchema()).Stringify(sb);
+
+        *documentStack_.template Push<Ch>() = '\0';
+        documentStack_.template Pop<Ch>(1);
+        internal::PrintValidatorPointers(depth_, sb.GetString(), documentStack_.template Bottom<Ch>());
+#endif
+
+        uint64_t h = CurrentContext().arrayUniqueness ? static_cast<HasherType*>(CurrentContext().hasher)->GetHashCode() : 0;
+        
+        PopSchema();
+
+        if (!schemaStack_.Empty()) {
+            Context& context = CurrentContext();
+            if (context.valueUniqueness) {
+                HashCodeArray* a = static_cast<HashCodeArray*>(context.arrayElementHashCodes);
+                if (!a)
+                    CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType);
+                for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr)
+                    if (itr->GetUint64() == h)
+                        RAPIDJSON_INVALID_KEYWORD_RETURN(SchemaType::GetUniqueItemsString());
+                a->PushBack(h, GetStateAllocator());
+            }
+        }
+
+        // Remove the last token of document pointer
+        while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/')
+            ;
+
+        return true;
+    }
+
+    void AppendToken(const Ch* str, SizeType len) {
+        documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
+        *documentStack_.template PushUnsafe<Ch>() = '/';
+        for (SizeType i = 0; i < len; i++) {
+            if (str[i] == '~') {
+                *documentStack_.template PushUnsafe<Ch>() = '~';
+                *documentStack_.template PushUnsafe<Ch>() = '0';
+            }
+            else if (str[i] == '/') {
+                *documentStack_.template PushUnsafe<Ch>() = '~';
+                *documentStack_.template PushUnsafe<Ch>() = '1';
+            }
+            else
+                *documentStack_.template PushUnsafe<Ch>() = str[i];
+        }
+    }
+
+    RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); }
+    
+    RAPIDJSON_FORCEINLINE void PopSchema() {
+        Context* c = schemaStack_.template Pop<Context>(1);
+        if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
+            a->~HashCodeArray();
+            StateAllocator::Free(a);
+        }
+        c->~Context();
+    }
+
+    const SchemaType& CurrentSchema() const { return *schemaStack_.template Top<Context>()->schema; }
+    Context& CurrentContext() { return *schemaStack_.template Top<Context>(); }
+    const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); }
+
+    static const size_t kDefaultSchemaStackCapacity = 1024;
+    static const size_t kDefaultDocumentStackCapacity = 256;
+    const SchemaDocumentType* schemaDocument_;
+    const SchemaType& root_;
+    StateAllocator* stateAllocator_;
+    StateAllocator* ownStateAllocator_;
+    internal::Stack<StateAllocator> schemaStack_;    //!< stack to store the current path of schema (BaseSchemaType *)
+    internal::Stack<StateAllocator> documentStack_;  //!< stack to store the current path of validating document (Ch)
+    OutputHandler* outputHandler_;
+    bool valid_;
+#if RAPIDJSON_SCHEMA_VERBOSE
+    unsigned depth_;
+#endif
+};
+
+typedef GenericSchemaValidator<SchemaDocument> SchemaValidator;
+
+///////////////////////////////////////////////////////////////////////////////
+// SchemaValidatingReader
+
+//! A helper class for parsing with validation.
+/*!
+    This helper class is a functor, designed as a parameter of \ref GenericDocument::Populate().
+
+    \tparam parseFlags Combination of \ref ParseFlag.
+    \tparam InputStream Type of input stream, implementing Stream concept.
+    \tparam SourceEncoding Encoding of the input stream.
+    \tparam SchemaDocumentType Type of schema document.
+    \tparam StackAllocator Allocator type for stack.
+*/
+template <
+    unsigned parseFlags,
+    typename InputStream,
+    typename SourceEncoding,
+    typename SchemaDocumentType = SchemaDocument,
+    typename StackAllocator = CrtAllocator>
+class SchemaValidatingReader {
+public:
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename InputStream::Ch Ch;
+
+    //! Constructor
+    /*!
+        \param is Input stream.
+        \param sd Schema document.
+    */
+    SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), isValid_(true) {}
+
+    template <typename Handler>
+    bool operator()(Handler& handler) {
+        GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator> reader;
+        GenericSchemaValidator<SchemaDocumentType, Handler> validator(sd_, handler);
+        parseResult_ = reader.template Parse<parseFlags>(is_, validator);
+
+        isValid_ = validator.IsValid();
+        if (isValid_) {
+            invalidSchemaPointer_ = PointerType();
+            invalidSchemaKeyword_ = 0;
+            invalidDocumentPointer_ = PointerType();
+        }
+        else {
+            invalidSchemaPointer_ = validator.GetInvalidSchemaPointer();
+            invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword();
+            invalidDocumentPointer_ = validator.GetInvalidDocumentPointer();
+        }
+
+        return parseResult_;
+    }
+
+    const ParseResult& GetParseResult() const { return parseResult_; }
+    bool IsValid() const { return isValid_; }
+    const PointerType& GetInvalidSchemaPointer() const { return invalidSchemaPointer_; }
+    const Ch* GetInvalidSchemaKeyword() const { return invalidSchemaKeyword_; }
+    const PointerType& GetInvalidDocumentPointer() const { return invalidDocumentPointer_; }
+
+private:
+    InputStream& is_;
+    const SchemaDocumentType& sd_;
+
+    ParseResult parseResult_;
+    PointerType invalidSchemaPointer_;
+    const Ch* invalidSchemaKeyword_;
+    PointerType invalidDocumentPointer_;
+    bool isValid_;
+};
+
+RAPIDJSON_NAMESPACE_END
+RAPIDJSON_DIAG_POP
+
+#endif // RAPIDJSON_SCHEMA_H_
diff --git a/rapidjson/stream.h b/rapidjson/stream.h
new file mode 100644
index 0000000..fef82c2
--- /dev/null
+++ b/rapidjson/stream.h
@@ -0,0 +1,179 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#include "rapidjson.h"
+
+#ifndef RAPIDJSON_STREAM_H_
+#define RAPIDJSON_STREAM_H_
+
+#include "encodings.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+//  Stream
+
+/*! \class rapidjson::Stream
+    \brief Concept for reading and writing characters.
+
+    For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd().
+
+    For write-only stream, only need to implement Put() and Flush().
+
+\code
+concept Stream {
+    typename Ch;    //!< Character type of the stream.
+
+    //! Read the current character from stream without moving the read cursor.
+    Ch Peek() const;
+
+    //! Read the current character from stream and moving the read cursor to next character.
+    Ch Take();
+
+    //! Get the current read cursor.
+    //! \return Number of characters read from start.
+    size_t Tell();
+
+    //! Begin writing operation at the current read pointer.
+    //! \return The begin writer pointer.
+    Ch* PutBegin();
+
+    //! Write a character.
+    void Put(Ch c);
+
+    //! Flush the buffer.
+    void Flush();
+
+    //! End the writing operation.
+    //! \param begin The begin write pointer returned by PutBegin().
+    //! \return Number of characters written.
+    size_t PutEnd(Ch* begin);
+}
+\endcode
+*/
+
+//! Provides additional information for stream.
+/*!
+    By using traits pattern, this type provides a default configuration for stream.
+    For custom stream, this type can be specialized for other configuration.
+    See TEST(Reader, CustomStringStream) in readertest.cpp for example.
+*/
+template<typename Stream>
+struct StreamTraits {
+    //! Whether to make local copy of stream for optimization during parsing.
+    /*!
+        By default, for safety, streams do not use local copy optimization.
+        Stream that can be copied fast should specialize this, like StreamTraits<StringStream>.
+    */
+    enum { copyOptimization = 0 };
+};
+
+//! Reserve n characters for writing to a stream.
+template<typename Stream>
+inline void PutReserve(Stream& stream, size_t count) {
+    (void)stream;
+    (void)count;
+}
+
+//! Write character to a stream, presuming buffer is reserved.
+template<typename Stream>
+inline void PutUnsafe(Stream& stream, typename Stream::Ch c) {
+    stream.Put(c);
+}
+
+//! Put N copies of a character to a stream.
+template<typename Stream, typename Ch>
+inline void PutN(Stream& stream, Ch c, size_t n) {
+    PutReserve(stream, n);
+    for (size_t i = 0; i < n; i++)
+        PutUnsafe(stream, c);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// StringStream
+
+//! Read-only string stream.
+/*! \note implements Stream concept
+*/
+template <typename Encoding>
+struct GenericStringStream {
+    typedef typename Encoding::Ch Ch;
+
+    GenericStringStream(const Ch *src) : src_(src), head_(src) {}
+
+    Ch Peek() const { return *src_; }
+    Ch Take() { return *src_++; }
+    size_t Tell() const { return static_cast<size_t>(src_ - head_); }
+
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    const Ch* src_;     //!< Current read position.
+    const Ch* head_;    //!< Original head of the string.
+};
+
+template <typename Encoding>
+struct StreamTraits<GenericStringStream<Encoding> > {
+    enum { copyOptimization = 1 };
+};
+
+//! String stream with UTF8 encoding.
+typedef GenericStringStream<UTF8<> > StringStream;
+
+///////////////////////////////////////////////////////////////////////////////
+// InsituStringStream
+
+//! A read-write string stream.
+/*! This string stream is particularly designed for in-situ parsing.
+    \note implements Stream concept
+*/
+template <typename Encoding>
+struct GenericInsituStringStream {
+    typedef typename Encoding::Ch Ch;
+
+    GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
+
+    // Read
+    Ch Peek() { return *src_; }
+    Ch Take() { return *src_++; }
+    size_t Tell() { return static_cast<size_t>(src_ - head_); }
+
+    // Write
+    void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
+
+    Ch* PutBegin() { return dst_ = src_; }
+    size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); }
+    void Flush() {}
+
+    Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
+    void Pop(size_t count) { dst_ -= count; }
+
+    Ch* src_;
+    Ch* dst_;
+    Ch* head_;
+};
+
+template <typename Encoding>
+struct StreamTraits<GenericInsituStringStream<Encoding> > {
+    enum { copyOptimization = 1 };
+};
+
+//! Insitu string stream with UTF8 encoding.
+typedef GenericInsituStringStream<UTF8<> > InsituStringStream;
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_STREAM_H_
diff --git a/rapidjson/stringbuffer.h b/rapidjson/stringbuffer.h
index 1c9c80b..4e38b82 100644
--- a/rapidjson/stringbuffer.h
+++ b/rapidjson/stringbuffer.h
@@ -15,7 +15,8 @@
 #ifndef RAPIDJSON_STRINGBUFFER_H_
 #define RAPIDJSON_STRINGBUFFER_H_
 
-#include "rapidjson.h"
+#include "stream.h"
+#include "internal/stack.h"
 
 #if RAPIDJSON_HAS_CXX11_RVALUE_REFS
 #include <utility> // std::move
@@ -23,6 +24,11 @@
 
 #include "internal/stack.h"
 
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
 //! Represents an in-memory output stream.
@@ -48,6 +54,7 @@ class GenericStringBuffer {
 #endif
 
     void Put(Ch c) { *stack_.template Push<Ch>() = c; }
+    void PutUnsafe(Ch c) { *stack_.template PushUnsafe<Ch>() = c; }
     void Flush() {}
 
     void Clear() { stack_.Clear(); }
@@ -57,7 +64,10 @@ class GenericStringBuffer {
         stack_.ShrinkToFit();
         stack_.template Pop<Ch>(1);
     }
+
+    void Reserve(size_t count) { stack_.template Reserve<Ch>(count); }
     Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
+    Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe<Ch>(count); }
     void Pop(size_t count) { stack_.template Pop<Ch>(count); }
 
     const Ch* GetString() const {
@@ -68,8 +78,12 @@ class GenericStringBuffer {
         return stack_.template Bottom<Ch>();
     }
 
+    //! Get the size of string in bytes in the string buffer.
     size_t GetSize() const { return stack_.GetSize(); }
 
+    //! Get the length of string in Ch in the string buffer.
+    size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); }
+
     static const size_t kDefaultCapacity = 256;
     mutable internal::Stack<Allocator> stack_;
 
@@ -82,6 +96,16 @@ class GenericStringBuffer {
 //! String buffer with UTF8 encoding
 typedef GenericStringBuffer<UTF8<> > StringBuffer;
 
+template<typename Encoding, typename Allocator>
+inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count) {
+    stream.Reserve(count);
+}
+
+template<typename Encoding, typename Allocator>
+inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c) {
+    stream.PutUnsafe(c);
+}
+
 //! Implement specialized version of PutN() with memset() for better performance.
 template<>
 inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
@@ -90,4 +114,8 @@ inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
 
 RAPIDJSON_NAMESPACE_END
 
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_STRINGBUFFER_H_
diff --git a/rapidjson/writer.h b/rapidjson/writer.h
index e1eea38..e610ebb 100644
--- a/rapidjson/writer.h
+++ b/rapidjson/writer.h
@@ -15,7 +15,8 @@
 #ifndef RAPIDJSON_WRITER_H_
 #define RAPIDJSON_WRITER_H_
 
-#include "rapidjson.h"
+#include "stream.h"
+#include "internal/meta.h"
 #include "internal/stack.h"
 #include "internal/strfunc.h"
 #include "internal/dtoa.h"
@@ -23,8 +24,16 @@
 #include "stringbuffer.h"
 #include <new>      // placement new
 
-#if RAPIDJSON_HAS_STDSTRING
-#include <string>
+#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#endif
+#ifdef RAPIDJSON_SSE42
+#include <nmmintrin.h>
+#elif defined(RAPIDJSON_SSE2)
+#include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
 #endif
 
 #ifdef _MSC_VER
@@ -32,8 +41,36 @@ RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
 RAPIDJSON_NAMESPACE_BEGIN
 
+///////////////////////////////////////////////////////////////////////////////
+// WriteFlag
+
+/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS 
+    \ingroup RAPIDJSON_CONFIG
+    \brief User-defined kWriteDefaultFlags definition.
+
+    User can define this as any \c WriteFlag combinations.
+*/
+#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS
+#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags
+#endif
+
+//! Combination of writeFlags
+enum WriteFlag {
+    kWriteNoFlags = 0,              //!< No flags are set.
+    kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings.
+    kWriteNanAndInfFlag = 2,        //!< Allow writing of Infinity, -Infinity and NaN.
+    kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS  //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS
+};
+
 //! JSON writer
 /*! Writer implements the concept Handler.
     It generates JSON text by events to an output os.
@@ -50,11 +87,13 @@ RAPIDJSON_NAMESPACE_BEGIN
     \tparam StackAllocator Type of allocator for allocating memory of stack.
     \note implements Handler concept
 */
-template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator>
+template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
 class Writer {
 public:
     typedef typename SourceEncoding::Ch Ch;
 
+    static const int kDefaultMaxDecimalPlaces = 324;
+
     //! Constructor
     /*! \param os Output stream.
         \param stackAllocator User supplied allocator. If it is null, it will create a private one.
@@ -62,11 +101,18 @@ class Writer {
     */
     explicit
     Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : 
-        os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), hasRoot_(false) {}
+        os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
 
     explicit
     Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
-        os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {}
+        os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    Writer(Writer&& rhs) :
+        os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) {
+        rhs.os_ = 0;
+    }
+#endif
 
     //! Reset the writer with a new stream.
     /*!
@@ -100,29 +146,66 @@ class Writer {
         return hasRoot_ && level_stack_.Empty();
     }
 
+    int GetMaxDecimalPlaces() const {
+        return maxDecimalPlaces_;
+    }
+
+    //! Sets the maximum number of decimal places for double output.
+    /*!
+        This setting truncates the output with specified number of decimal places.
+
+        For example, 
+
+        \code
+        writer.SetMaxDecimalPlaces(3);
+        writer.StartArray();
+        writer.Double(0.12345);                 // "0.123"
+        writer.Double(0.0001);                  // "0.0"
+        writer.Double(1.234567890123456e30);    // "1.234567890123456e30" (do not truncate significand for positive exponent)
+        writer.Double(1.23e-4);                 // "0.0"                  (do truncate significand for negative exponent)
+        writer.EndArray();
+        \endcode
+
+        The default setting does not truncate any decimal places. You can restore to this setting by calling
+        \code
+        writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces);
+        \endcode
+    */
+    void SetMaxDecimalPlaces(int maxDecimalPlaces) {
+        maxDecimalPlaces_ = maxDecimalPlaces;
+    }
+
     /*!@name Implementation of Handler
         \see Handler
     */
     //@{
 
-    bool Null()                 { Prefix(kNullType);   return WriteNull(); }
-    bool Bool(bool b)           { Prefix(b ? kTrueType : kFalseType); return WriteBool(b); }
-    bool Int(int i)             { Prefix(kNumberType); return WriteInt(i); }
-    bool Uint(unsigned u)       { Prefix(kNumberType); return WriteUint(u); }
-    bool Int64(int64_t i64)     { Prefix(kNumberType); return WriteInt64(i64); }
-    bool Uint64(uint64_t u64)   { Prefix(kNumberType); return WriteUint64(u64); }
+    bool Null()                 { Prefix(kNullType);   return EndValue(WriteNull()); }
+    bool Bool(bool b)           { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); }
+    bool Int(int i)             { Prefix(kNumberType); return EndValue(WriteInt(i)); }
+    bool Uint(unsigned u)       { Prefix(kNumberType); return EndValue(WriteUint(u)); }
+    bool Int64(int64_t i64)     { Prefix(kNumberType); return EndValue(WriteInt64(i64)); }
+    bool Uint64(uint64_t u64)   { Prefix(kNumberType); return EndValue(WriteUint64(u64)); }
 
     //! Writes the given \c double value to the stream
     /*!
         \param d The value to be written.
         \return Whether it is succeed.
     */
-    bool Double(double d)       { Prefix(kNumberType); return WriteDouble(d); }
+    bool Double(double d)       { Prefix(kNumberType); return EndValue(WriteDouble(d)); }
+
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        Prefix(kNumberType);
+        return EndValue(WriteString(str, length));
+    }
 
     bool String(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
         (void)copy;
         Prefix(kStringType);
-        return WriteString(str, length);
+        return EndValue(WriteString(str, length));
     }
 
 #if RAPIDJSON_HAS_STDSTRING
@@ -138,16 +221,21 @@ class Writer {
     }
 
     bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool Key(const std::basic_string<Ch>& str)
+    {
+      return Key(str.data(), SizeType(str.size()));
+    }
+#endif
 	
     bool EndObject(SizeType memberCount = 0) {
         (void)memberCount;
-        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
-        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray);
+        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount % 2); // Object has a Key without a Value
         level_stack_.template Pop<Level>(1);
-        bool ret = WriteEndObject();
-        if (level_stack_.Empty())   // end of json text
-            os_->Flush();
-        return ret;
+        return EndValue(WriteEndObject());
     }
 
     bool StartArray() {
@@ -161,10 +249,7 @@ class Writer {
         RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
         RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray);
         level_stack_.template Pop<Level>(1);
-        bool ret = WriteEndArray();
-        if (level_stack_.Empty())   // end of json text
-            os_->Flush();
-        return ret;
+        return EndValue(WriteEndArray());
     }
     //@}
 
@@ -172,11 +257,33 @@ class Writer {
     //@{
 
     //! Simpler but slower overload.
-    bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
-    bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); }
-
+    bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); }
+    bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); }
+    
     //@}
 
+    //! Write a raw JSON value.
+    /*!
+        For user to write a stringified JSON as a value.
+
+        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
+        \param length Length of the json.
+        \param type Type of the root of json.
+    */
+    bool RawValue(const Ch* json, size_t length, Type type) {
+        RAPIDJSON_ASSERT(json != 0);
+        Prefix(type);
+        return EndValue(WriteRawValue(json, length));
+    }
+
+    //! Flush the output stream.
+    /*!
+        Allows the user to flush the output stream immediately.
+     */
+    void Flush() {
+        os_->Flush();
+    }
+
 protected:
     //! Information for each nested level
     struct Level {
@@ -188,15 +295,18 @@ class Writer {
     static const size_t kDefaultLevelDepth = 32;
 
     bool WriteNull()  {
-        os_->Put('n'); os_->Put('u'); os_->Put('l'); os_->Put('l'); return true;
+        PutReserve(*os_, 4);
+        PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true;
     }
 
     bool WriteBool(bool b)  {
         if (b) {
-            os_->Put('t'); os_->Put('r'); os_->Put('u'); os_->Put('e');
+            PutReserve(*os_, 4);
+            PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e');
         }
         else {
-            os_->Put('f'); os_->Put('a'); os_->Put('l'); os_->Put('s'); os_->Put('e');
+            PutReserve(*os_, 5);
+            PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e');
         }
         return true;
     }
@@ -204,45 +314,69 @@ class Writer {
     bool WriteInt(int i) {
         char buffer[11];
         const char* end = internal::i32toa(i, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            os_->Put(*p);
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteUint(unsigned u) {
         char buffer[10];
         const char* end = internal::u32toa(u, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            os_->Put(*p);
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteInt64(int64_t i64) {
         char buffer[21];
         const char* end = internal::i64toa(i64, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (const char* p = buffer; p != end; ++p)
-            os_->Put(*p);
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteUint64(uint64_t u64) {
         char buffer[20];
         char* end = internal::u64toa(u64, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (char* p = buffer; p != end; ++p)
-            os_->Put(*p);
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteDouble(double d) {
+        if (internal::Double(d).IsNanOrInf()) {
+            if (!(writeFlags & kWriteNanAndInfFlag))
+                return false;
+            if (internal::Double(d).IsNan()) {
+                PutReserve(*os_, 3);
+                PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+                return true;
+            }
+            if (internal::Double(d).Sign()) {
+                PutReserve(*os_, 9);
+                PutUnsafe(*os_, '-');
+            }
+            else
+                PutReserve(*os_, 8);
+            PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+            PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+            return true;
+        }
+
         char buffer[25];
-        char* end = internal::dtoa(d, buffer);
+        char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
         for (char* p = buffer; p != end; ++p)
-            os_->Put(*p);
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
         return true;
     }
 
     bool WriteString(const Ch* str, SizeType length)  {
-        static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+        static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
         static const char escape[256] = {
 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
             //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
@@ -255,22 +389,27 @@ class Writer {
 #undef Z16
         };
 
-        os_->Put('\"');
+        if (TargetEncoding::supportUnicode)
+            PutReserve(*os_, 2 + length * 6); // "\uxxxx..."
+        else
+            PutReserve(*os_, 2 + length * 12);  // "\uxxxx\uyyyy..."
+
+        PutUnsafe(*os_, '\"');
         GenericStringStream<SourceEncoding> is(str);
-        while (is.Tell() < length) {
+        while (ScanWriteUnescapedString(is, length)) {
             const Ch c = is.Peek();
-            if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) {
+            if (!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80) {
                 // Unicode escaping
                 unsigned codepoint;
-                if (!SourceEncoding::Decode(is, &codepoint))
+                if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint)))
                     return false;
-                os_->Put('\\');
-                os_->Put('u');
+                PutUnsafe(*os_, '\\');
+                PutUnsafe(*os_, 'u');
                 if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
-                    os_->Put(hexDigits[(codepoint >> 12) & 15]);
-                    os_->Put(hexDigits[(codepoint >>  8) & 15]);
-                    os_->Put(hexDigits[(codepoint >>  4) & 15]);
-                    os_->Put(hexDigits[(codepoint      ) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint      ) & 15]);
                 }
                 else {
                     RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF);
@@ -278,45 +417,59 @@ class Writer {
                     unsigned s = codepoint - 0x010000;
                     unsigned lead = (s >> 10) + 0xD800;
                     unsigned trail = (s & 0x3FF) + 0xDC00;
-                    os_->Put(hexDigits[(lead >> 12) & 15]);
-                    os_->Put(hexDigits[(lead >>  8) & 15]);
-                    os_->Put(hexDigits[(lead >>  4) & 15]);
-                    os_->Put(hexDigits[(lead      ) & 15]);
-                    os_->Put('\\');
-                    os_->Put('u');
-                    os_->Put(hexDigits[(trail >> 12) & 15]);
-                    os_->Put(hexDigits[(trail >>  8) & 15]);
-                    os_->Put(hexDigits[(trail >>  4) & 15]);
-                    os_->Put(hexDigits[(trail      ) & 15]);                    
+                    PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead      ) & 15]);
+                    PutUnsafe(*os_, '\\');
+                    PutUnsafe(*os_, 'u');
+                    PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail      ) & 15]);                    
                 }
             }
-            else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c])  {
+            else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)]))  {
                 is.Take();
-                os_->Put('\\');
-                os_->Put(escape[(unsigned char)c]);
-                if (escape[(unsigned char)c] == 'u') {
-                    os_->Put('0');
-                    os_->Put('0');
-                    os_->Put(hexDigits[(unsigned char)c >> 4]);
-                    os_->Put(hexDigits[(unsigned char)c & 0xF]);
+                PutUnsafe(*os_, '\\');
+                PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
+                if (escape[static_cast<unsigned char>(c)] == 'u') {
+                    PutUnsafe(*os_, '0');
+                    PutUnsafe(*os_, '0');
+                    PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) >> 4]);
+                    PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) & 0xF]);
                 }
             }
-            else
-                if (!Transcoder<SourceEncoding, TargetEncoding>::Transcode(is, *os_))
-                    return false;
+            else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? 
+                Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
+                Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+                return false;
         }
-        os_->Put('\"');
+        PutUnsafe(*os_, '\"');
         return true;
     }
 
+    bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length) {
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+    }
+
     bool WriteStartObject() { os_->Put('{'); return true; }
     bool WriteEndObject()   { os_->Put('}'); return true; }
     bool WriteStartArray()  { os_->Put('['); return true; }
     bool WriteEndArray()    { os_->Put(']'); return true; }
 
+    bool WriteRawValue(const Ch* json, size_t length) {
+        PutReserve(*os_, length);
+        for (size_t i = 0; i < length; i++) {
+            RAPIDJSON_ASSERT(json[i] != '\0');
+            PutUnsafe(*os_, json[i]);
+        }
+        return true;
+    }
+
     void Prefix(Type type) {
         (void)type;
-        if (level_stack_.GetSize() != 0) { // this value is not at root
+        if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root
             Level* level = level_stack_.template Top<Level>();
             if (level->valueCount > 0) {
                 if (level->inArray) 
@@ -334,8 +487,16 @@ class Writer {
         }
     }
 
+    // Flush the value if it is the top level one.
+    bool EndValue(bool ret) {
+        if (RAPIDJSON_UNLIKELY(level_stack_.Empty()))   // end of json text
+            Flush();
+        return ret;
+    }
+
     OutputStream* os_;
     internal::Stack<StackAllocator> level_stack_;
+    int maxDecimalPlaces_;
     bool hasRoot_;
 
 private:
@@ -350,7 +511,7 @@ template<>
 inline bool Writer<StringBuffer>::WriteInt(int i) {
     char *buffer = os_->Push(11);
     const char* end = internal::i32toa(i, buffer);
-    os_->Pop(11 - (end - buffer));
+    os_->Pop(static_cast<size_t>(11 - (end - buffer)));
     return true;
 }
 
@@ -358,7 +519,7 @@ template<>
 inline bool Writer<StringBuffer>::WriteUint(unsigned u) {
     char *buffer = os_->Push(10);
     const char* end = internal::u32toa(u, buffer);
-    os_->Pop(10 - (end - buffer));
+    os_->Pop(static_cast<size_t>(10 - (end - buffer)));
     return true;
 }
 
@@ -366,7 +527,7 @@ template<>
 inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) {
     char *buffer = os_->Push(21);
     const char* end = internal::i64toa(i64, buffer);
-    os_->Pop(21 - (end - buffer));
+    os_->Pop(static_cast<size_t>(21 - (end - buffer)));
     return true;
 }
 
@@ -374,22 +535,177 @@ template<>
 inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
     char *buffer = os_->Push(20);
     const char* end = internal::u64toa(u, buffer);
-    os_->Pop(20 - (end - buffer));
+    os_->Pop(static_cast<size_t>(20 - (end - buffer)));
     return true;
 }
 
 template<>
 inline bool Writer<StringBuffer>::WriteDouble(double d) {
+    if (internal::Double(d).IsNanOrInf()) {
+        // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag).
+        if (!(kWriteDefaultFlags & kWriteNanAndInfFlag))
+            return false;
+        if (internal::Double(d).IsNan()) {
+            PutReserve(*os_, 3);
+            PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+            return true;
+        }
+        if (internal::Double(d).Sign()) {
+            PutReserve(*os_, 9);
+            PutUnsafe(*os_, '-');
+        }
+        else
+            PutReserve(*os_, 8);
+        PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+        PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+        return true;
+    }
+    
     char *buffer = os_->Push(25);
-    char* end = internal::dtoa(d, buffer);
-    os_->Pop(25 - (end - buffer));
+    char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+    os_->Pop(static_cast<size_t>(25 - (end - buffer)));
     return true;
 }
 
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+    if (length < 16)
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+
+    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+        return false;
+
+    const char* p = is.src_;
+    const char* end = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if (nextAligned > end)
+        return true;
+
+    while (p != nextAligned)
+        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+            is.src_ = p;
+            return RAPIDJSON_LIKELY(is.Tell() < length);
+        }
+        else
+            os_->PutUnsafe(*p++);
+
+    // The rest of string using SIMD
+    static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+    static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+    static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+    const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+    const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+    const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+    for (; p != endAligned; p += 16) {
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+        const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+        const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+        const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+        unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+        if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+            SizeType len;
+#ifdef _MSC_VER         // Find the index of first escaped
+            unsigned long offset;
+            _BitScanForward(&offset, r);
+            len = offset;
+#else
+            len = static_cast<SizeType>(__builtin_ffs(r) - 1);
+#endif
+            char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+            for (size_t i = 0; i < len; i++)
+                q[i] = p[i];
+
+            p += len;
+            break;
+        }
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s);
+    }
+
+    is.src_ = p;
+    return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#elif defined(RAPIDJSON_NEON)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+    if (length < 16)
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+
+    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+        return false;
+
+    const char* p = is.src_;
+    const char* end = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if (nextAligned > end)
+        return true;
+
+    while (p != nextAligned)
+        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+            is.src_ = p;
+            return RAPIDJSON_LIKELY(is.Tell() < length);
+        }
+        else
+            os_->PutUnsafe(*p++);
+
+    // The rest of string using SIMD
+    const uint8x16_t s0 = vmovq_n_u8('"');
+    const uint8x16_t s1 = vmovq_n_u8('\\');
+    const uint8x16_t s2 = vmovq_n_u8('\b');
+    const uint8x16_t s3 = vmovq_n_u8(32);
+
+    for (; p != endAligned; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, s0);
+        x = vorrq_u8(x, vceqq_u8(s, s1));
+        x = vorrq_u8(x, vceqq_u8(s, s2));
+        x = vorrq_u8(x, vcltq_u8(s, s3));
+
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        SizeType len = 0;
+        bool escaped = false;
+        if (low == 0) {
+            if (high != 0) {
+                unsigned lz = (unsigned)__builtin_clzll(high);
+                len = 8 + (lz >> 3);
+                escaped = true;
+            }
+        } else {
+            unsigned lz = (unsigned)__builtin_clzll(low);
+            len = lz >> 3;
+            escaped = true;
+        }
+        if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+            char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+            for (size_t i = 0; i < len; i++)
+                q[i] = p[i];
+
+            p += len;
+            break;
+        }
+        vst1q_u8(reinterpret_cast<uint8_t *>(os_->PushUnsafe(16)), s);
+    }
+
+    is.src_ = p;
+    return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#endif // RAPIDJSON_NEON
+
 RAPIDJSON_NAMESPACE_END
 
 #ifdef _MSC_VER
 RAPIDJSON_DIAG_POP
 #endif
 
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
 #endif // RAPIDJSON_RAPIDJSON_H_