]> git.lizzy.rs Git - dragonfireclient.git/blob - src/util/serialize.cpp
Optimize JSON string (de)serialization routines
[dragonfireclient.git] / src / util / serialize.cpp
1 /*
2 Minetest
3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "serialize.h"
21 #include "porting.h"
22 #include "util/string.h"
23 #include "util/hex.h"
24 #include "exceptions.h"
25 #include "irrlichttypes.h"
26
27 #include <iostream>
28 #include <cassert>
29
30 FloatType g_serialize_f32_type = FLOATTYPE_UNKNOWN;
31
32
33 ////
34 //// String
35 ////
36
37 std::string serializeString16(const std::string &plain)
38 {
39         std::string s;
40         char buf[2];
41
42         if (plain.size() > STRING_MAX_LEN)
43                 throw SerializationError("String too long for serializeString16");
44         s.reserve(2 + plain.size());
45
46         writeU16((u8 *)&buf[0], plain.size());
47         s.append(buf, 2);
48
49         s.append(plain);
50         return s;
51 }
52
53 std::string deSerializeString16(std::istream &is)
54 {
55         std::string s;
56         char buf[2];
57
58         is.read(buf, 2);
59         if (is.gcount() != 2)
60                 throw SerializationError("deSerializeString16: size not read");
61
62         u16 s_size = readU16((u8 *)buf);
63         if (s_size == 0)
64                 return s;
65
66         s.resize(s_size);
67         is.read(&s[0], s_size);
68         if (is.gcount() != s_size)
69                 throw SerializationError("deSerializeString16: couldn't read all chars");
70
71         return s;
72 }
73
74
75 ////
76 //// Long String
77 ////
78
79 std::string serializeString32(const std::string &plain)
80 {
81         std::string s;
82         char buf[4];
83
84         if (plain.size() > LONG_STRING_MAX_LEN)
85                 throw SerializationError("String too long for serializeLongString");
86         s.reserve(4 + plain.size());
87
88         writeU32((u8*)&buf[0], plain.size());
89         s.append(buf, 4);
90         s.append(plain);
91         return s;
92 }
93
94 std::string deSerializeString32(std::istream &is)
95 {
96         std::string s;
97         char buf[4];
98
99         is.read(buf, 4);
100         if (is.gcount() != 4)
101                 throw SerializationError("deSerializeLongString: size not read");
102
103         u32 s_size = readU32((u8 *)buf);
104         if (s_size == 0)
105                 return s;
106
107         // We don't really want a remote attacker to force us to allocate 4GB...
108         if (s_size > LONG_STRING_MAX_LEN) {
109                 throw SerializationError("deSerializeLongString: "
110                         "string too long: " + itos(s_size) + " bytes");
111         }
112
113         s.resize(s_size);
114         is.read(&s[0], s_size);
115         if ((u32)is.gcount() != s_size)
116                 throw SerializationError("deSerializeLongString: couldn't read all chars");
117
118         return s;
119 }
120
121 ////
122 //// JSON-like strings
123 ////
124
125 std::string serializeJsonString(const std::string &plain)
126 {
127         std::string tmp;
128
129         tmp.reserve(plain.size() + 2);
130         tmp.push_back('"');
131
132         for (char c : plain) {
133                 switch (c) {
134                         case '"':
135                                 tmp.append("\\\"");
136                                 break;
137                         case '\\':
138                                 tmp.append("\\\\");
139                                 break;
140                         case '\b':
141                                 tmp.append("\\b");
142                                 break;
143                         case '\f':
144                                 tmp.append("\\f");
145                                 break;
146                         case '\n':
147                                 tmp.append("\\n");
148                                 break;
149                         case '\r':
150                                 tmp.append("\\r");
151                                 break;
152                         case '\t':
153                                 tmp.append("\\t");
154                                 break;
155                         default: {
156                                 if (c >= 32 && c <= 126) {
157                                         tmp.push_back(c);
158                                 } else {
159                                         // We pretend that Unicode codepoints map to bytes (they don't)
160                                         u8 cnum = static_cast<u8>(c);
161                                         tmp.append("\\u00");
162                                         tmp.push_back(hex_chars[cnum >> 4]);
163                                         tmp.push_back(hex_chars[cnum & 0xf]);
164                                 }
165                                 break;
166                         }
167                 }
168         }
169
170         tmp.push_back('"');
171         return tmp;
172 }
173
174 static void deSerializeJsonString(std::string &s)
175 {
176         assert(s.size() >= 2);
177         assert(s.front() == '"' && s.back() == '"');
178
179         size_t w = 0; // write index
180         size_t i = 1; // read index
181         const size_t len = s.size() - 1; // string length with trailing quote removed
182
183         while (i < len) {
184                 char c = s[i++];
185                 assert(c != '"');
186
187                 if (c != '\\') {
188                         s[w++] = c;
189                         continue;
190                 }
191
192                 if (i >= len)
193                         throw SerializationError("JSON string ended prematurely");
194                 char c2 = s[i++];
195                 switch (c2) {
196                         case 'b':
197                                 s[w++] = '\b';
198                                 break;
199                         case 'f':
200                                 s[w++] = '\f';
201                                 break;
202                         case 'n':
203                                 s[w++] = '\n';
204                                 break;
205                         case 'r':
206                                 s[w++] = '\r';
207                                 break;
208                         case 't':
209                                 s[w++] = '\t';
210                                 break;
211                         case 'u': {
212                                 if (i + 3 >= len)
213                                         throw SerializationError("JSON string ended prematurely");
214                                 unsigned char v[4] = {};
215                                 for (int j = 0; j < 4; j++)
216                                         hex_digit_decode(s[i+j], v[j]);
217                                 i += 4;
218                                 u32 hexnumber = (v[0] << 12) | (v[1] << 8) | (v[2] << 4) | v[3];
219                                 // Note that this does not work for anything other than ASCII
220                                 // but these functions do not actually interact with real JSON input.
221                                 s[w++] = (int) hexnumber;
222                                 break;
223                         }
224                         default:
225                                 s[w++] = c2;
226                                 break;
227                 }
228         }
229
230         assert(w <= i && i <= len);
231         // Truncate string to current write index
232         s.resize(w);
233 }
234
235 std::string deSerializeJsonString(std::istream &is)
236 {
237         std::string tmp;
238         char c;
239         bool was_backslash = false;
240
241         // Parse initial doublequote
242         c = is.get();
243         if (c != '"')
244                 throw SerializationError("JSON string must start with doublequote");
245         tmp.push_back(c);
246
247         // Grab the entire json string
248         for (;;) {
249                 c = is.get();
250                 if (is.eof())
251                         throw SerializationError("JSON string ended prematurely");
252
253                 tmp.push_back(c);
254                 if (was_backslash)
255                         was_backslash = false;
256                 else if (c == '\\')
257                         was_backslash = true;
258                 else if (c == '"')
259                         break; // found end of string
260         }
261
262         deSerializeJsonString(tmp);
263         return tmp;
264 }
265
266 std::string serializeJsonStringIfNeeded(const std::string &s)
267 {
268         for (size_t i = 0; i < s.size(); ++i) {
269                 if (s[i] <= 0x1f || s[i] >= 0x7f || s[i] == ' ' || s[i] == '\"')
270                         return serializeJsonString(s);
271         }
272         return s;
273 }
274
275 std::string deSerializeJsonStringIfNeeded(std::istream &is)
276 {
277         // Check for initial quote
278         char c = is.peek();
279         if (is.eof())
280                 return "";
281
282         if (c == '"') {
283                 // json string: defer to the right implementation
284                 return deSerializeJsonString(is);
285         }
286
287         // not a json string:
288         std::string tmp;
289         std::getline(is, tmp, ' ');
290         if (!is.eof())
291                 is.unget(); // we hit a space, put it back
292         return tmp;
293 }
294