]> git.lizzy.rs Git - dragonfireclient.git/blob - src/util/string.cpp
b805b2f78c19b28be4420b6784ea63f7d18aabdd
[dragonfireclient.git] / src / util / string.cpp
1 /*
2 Minetest
3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "string.h"
21 #include "pointer.h"
22 #include "numeric.h"
23 #include "log.h"
24
25 #include "hex.h"
26 #include "porting.h"
27 #include "translation.h"
28
29 #include <algorithm>
30 #include <array>
31 #include <sstream>
32 #include <iomanip>
33 #include <unordered_map>
34
35 #ifndef _WIN32
36         #include <iconv.h>
37 #else
38         #define _WIN32_WINNT 0x0501
39         #include <windows.h>
40 #endif
41
42 #ifdef __NetBSD__
43         #include <sys/param.h>
44         #if __NetBSD_Version__ <= 999001500
45                 #define BSD_ICONV_USED
46         #endif
47 #elif defined(_ICONV_H_) && (defined(__FreeBSD__) || defined(__OpenBSD__) || \
48         defined(__DragonFly__))
49         #define BSD_ICONV_USED
50 #endif
51
52 #ifndef _WIN32
53
54 static bool convert(const char *to, const char *from, char *outbuf,
55                 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
56 {
57         iconv_t cd = iconv_open(to, from);
58
59 #ifdef BSD_ICONV_USED
60         const char *inbuf_ptr = inbuf;
61 #else
62         char *inbuf_ptr = inbuf;
63 #endif
64         char *outbuf_ptr = outbuf;
65
66         size_t *inbuf_left_ptr = &inbuf_size;
67
68         const size_t old_outbuf_size = *outbuf_size;
69         size_t old_size = inbuf_size;
70         while (inbuf_size > 0) {
71                 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
72                 if (inbuf_size == old_size) {
73                         iconv_close(cd);
74                         return false;
75                 }
76                 old_size = inbuf_size;
77         }
78
79         iconv_close(cd);
80         *outbuf_size = old_outbuf_size - *outbuf_size;
81         return true;
82 }
83
84 #ifdef __ANDROID__
85 // On Android iconv disagrees how big a wchar_t is for whatever reason
86 const char *DEFAULT_ENCODING = "UTF-32LE";
87 #elif defined(__NetBSD__)
88         // NetBSD does not allow "WCHAR_T" as a charset input to iconv.
89         #include <sys/endian.h>
90         #if BYTE_ORDER == BIG_ENDIAN
91         const char *DEFAULT_ENCODING = "UTF-32BE";
92         #else
93         const char *DEFAULT_ENCODING = "UTF-32LE";
94         #endif
95 #else
96 const char *DEFAULT_ENCODING = "WCHAR_T";
97 #endif
98
99 std::wstring utf8_to_wide(const std::string &input)
100 {
101         const size_t inbuf_size = input.length();
102         // maximum possible size, every character is sizeof(wchar_t) bytes
103         size_t outbuf_size = input.length() * sizeof(wchar_t);
104
105         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
106         memcpy(inbuf, input.c_str(), inbuf_size);
107         std::wstring out;
108         out.resize(outbuf_size / sizeof(wchar_t));
109
110 #if defined(__ANDROID__) || defined(__NetBSD__)
111         SANITY_CHECK(sizeof(wchar_t) == 4);
112 #endif
113
114         char *outbuf = reinterpret_cast<char*>(&out[0]);
115         if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
116                 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
117                         << " into wstring" << std::endl;
118                 delete[] inbuf;
119                 return L"<invalid UTF-8 string>";
120         }
121         delete[] inbuf;
122
123         out.resize(outbuf_size / sizeof(wchar_t));
124         return out;
125 }
126
127 std::string wide_to_utf8(const std::wstring &input)
128 {
129         const size_t inbuf_size = input.length() * sizeof(wchar_t);
130         // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
131         size_t outbuf_size = input.length() * 4;
132
133         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
134         memcpy(inbuf, input.c_str(), inbuf_size);
135         std::string out;
136         out.resize(outbuf_size);
137
138         if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
139                 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
140                         << " into UTF-8 string" << std::endl;
141                 delete[] inbuf;
142                 return "<invalid wide string>";
143         }
144         delete[] inbuf;
145
146         out.resize(outbuf_size);
147         return out;
148 }
149
150 #else // _WIN32
151
152 std::wstring utf8_to_wide(const std::string &input)
153 {
154         size_t outbuf_size = input.size() + 1;
155         wchar_t *outbuf = new wchar_t[outbuf_size];
156         memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
157         MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
158                 outbuf, outbuf_size);
159         std::wstring out(outbuf);
160         delete[] outbuf;
161         return out;
162 }
163
164 std::string wide_to_utf8(const std::wstring &input)
165 {
166         size_t outbuf_size = (input.size() + 1) * 6;
167         char *outbuf = new char[outbuf_size];
168         memset(outbuf, 0, outbuf_size);
169         WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
170                 outbuf, outbuf_size, NULL, NULL);
171         std::string out(outbuf);
172         delete[] outbuf;
173         return out;
174 }
175
176 #endif // _WIN32
177
178 wchar_t *utf8_to_wide_c(const char *str)
179 {
180         std::wstring ret = utf8_to_wide(std::string(str));
181         size_t len = ret.length();
182         wchar_t *ret_c = new wchar_t[len + 1];
183         memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
184         return ret_c;
185 }
186
187
188 std::string urlencode(const std::string &str)
189 {
190         // Encodes non-unreserved URI characters by a percent sign
191         // followed by two hex digits. See RFC 3986, section 2.3.
192         static const char url_hex_chars[] = "0123456789ABCDEF";
193         std::ostringstream oss(std::ios::binary);
194         for (unsigned char c : str) {
195                 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
196                         oss << c;
197                 } else {
198                         oss << "%"
199                                 << url_hex_chars[(c & 0xf0) >> 4]
200                                 << url_hex_chars[c & 0x0f];
201                 }
202         }
203         return oss.str();
204 }
205
206 std::string urldecode(const std::string &str)
207 {
208         // Inverse of urlencode
209         std::ostringstream oss(std::ios::binary);
210         for (u32 i = 0; i < str.size(); i++) {
211                 unsigned char highvalue, lowvalue;
212                 if (str[i] == '%' &&
213                                 hex_digit_decode(str[i+1], highvalue) &&
214                                 hex_digit_decode(str[i+2], lowvalue)) {
215                         oss << (char) ((highvalue << 4) | lowvalue);
216                         i += 2;
217                 } else {
218                         oss << str[i];
219                 }
220         }
221         return oss.str();
222 }
223
224 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
225 {
226         u32 result = 0;
227         u32 mask = 0;
228         char *s = &str[0];
229         char *flagstr;
230         char *strpos = nullptr;
231
232         while ((flagstr = strtok_r(s, ",", &strpos))) {
233                 s = nullptr;
234
235                 while (*flagstr == ' ' || *flagstr == '\t')
236                         flagstr++;
237
238                 bool flagset = true;
239                 if (!strncasecmp(flagstr, "no", 2)) {
240                         flagset = false;
241                         flagstr += 2;
242                 }
243
244                 for (int i = 0; flagdesc[i].name; i++) {
245                         if (!strcasecmp(flagstr, flagdesc[i].name)) {
246                                 mask |= flagdesc[i].flag;
247                                 if (flagset)
248                                         result |= flagdesc[i].flag;
249                                 break;
250                         }
251                 }
252         }
253
254         if (flagmask)
255                 *flagmask = mask;
256
257         return result;
258 }
259
260 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
261 {
262         std::string result;
263
264         for (int i = 0; flagdesc[i].name; i++) {
265                 if (flagmask & flagdesc[i].flag) {
266                         if (!(flags & flagdesc[i].flag))
267                                 result += "no";
268
269                         result += flagdesc[i].name;
270                         result += ", ";
271                 }
272         }
273
274         size_t len = result.length();
275         if (len >= 2)
276                 result.erase(len - 2, 2);
277
278         return result;
279 }
280
281 size_t mystrlcpy(char *dst, const char *src, size_t size)
282 {
283         size_t srclen  = strlen(src) + 1;
284         size_t copylen = MYMIN(srclen, size);
285
286         if (copylen > 0) {
287                 memcpy(dst, src, copylen);
288                 dst[copylen - 1] = '\0';
289         }
290
291         return srclen;
292 }
293
294 char *mystrtok_r(char *s, const char *sep, char **lasts)
295 {
296         char *t;
297
298         if (!s)
299                 s = *lasts;
300
301         while (*s && strchr(sep, *s))
302                 s++;
303
304         if (!*s)
305                 return nullptr;
306
307         t = s;
308         while (*t) {
309                 if (strchr(sep, *t)) {
310                         *t++ = '\0';
311                         break;
312                 }
313                 t++;
314         }
315
316         *lasts = t;
317         return s;
318 }
319
320 u64 read_seed(const char *str)
321 {
322         char *endptr;
323         u64 num;
324
325         if (str[0] == '0' && str[1] == 'x')
326                 num = strtoull(str, &endptr, 16);
327         else
328                 num = strtoull(str, &endptr, 10);
329
330         if (*endptr)
331                 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
332
333         return num;
334 }
335
336 static bool parseHexColorString(const std::string &value, video::SColor &color,
337                 unsigned char default_alpha)
338 {
339         u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
340
341         size_t len = value.size();
342         bool short_form;
343
344         if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
345                 short_form = false;
346         else if (len == 5 || len == 4) // #RGBA or #RGB
347                 short_form = true;
348         else
349                 return false;
350
351         for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
352                 if (short_form) {
353                         u8 d;
354                         if (!hex_digit_decode(value[pos], d))
355                                 return false;
356
357                         components[cc] = (d & 0xf) << 4 | (d & 0xf);
358                 } else {
359                         u8 d1, d2;
360                         if (!hex_digit_decode(value[pos], d1) ||
361                                         !hex_digit_decode(value[pos+1], d2))
362                                 return false;
363
364                         components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
365                         pos++; // skip the second digit -- it's already used
366                 }
367         }
368
369         color.setRed(components[0]);
370         color.setGreen(components[1]);
371         color.setBlue(components[2]);
372         color.setAlpha(components[3]);
373
374         return true;
375 }
376
377 const static std::unordered_map<std::string, u32> s_named_colors = {
378         {"aliceblue",            0xf0f8ff},
379         {"antiquewhite",         0xfaebd7},
380         {"aqua",                 0x00ffff},
381         {"aquamarine",           0x7fffd4},
382         {"azure",                0xf0ffff},
383         {"beige",                0xf5f5dc},
384         {"bisque",               0xffe4c4},
385         {"black",                00000000},
386         {"blanchedalmond",       0xffebcd},
387         {"blue",                 0x0000ff},
388         {"blueviolet",           0x8a2be2},
389         {"brown",                0xa52a2a},
390         {"burlywood",            0xdeb887},
391         {"cadetblue",            0x5f9ea0},
392         {"chartreuse",           0x7fff00},
393         {"chocolate",            0xd2691e},
394         {"coral",                0xff7f50},
395         {"cornflowerblue",       0x6495ed},
396         {"cornsilk",             0xfff8dc},
397         {"crimson",              0xdc143c},
398         {"cyan",                 0x00ffff},
399         {"darkblue",             0x00008b},
400         {"darkcyan",             0x008b8b},
401         {"darkgoldenrod",        0xb8860b},
402         {"darkgray",             0xa9a9a9},
403         {"darkgreen",            0x006400},
404         {"darkgrey",             0xa9a9a9},
405         {"darkkhaki",            0xbdb76b},
406         {"darkmagenta",          0x8b008b},
407         {"darkolivegreen",       0x556b2f},
408         {"darkorange",           0xff8c00},
409         {"darkorchid",           0x9932cc},
410         {"darkred",              0x8b0000},
411         {"darksalmon",           0xe9967a},
412         {"darkseagreen",         0x8fbc8f},
413         {"darkslateblue",        0x483d8b},
414         {"darkslategray",        0x2f4f4f},
415         {"darkslategrey",        0x2f4f4f},
416         {"darkturquoise",        0x00ced1},
417         {"darkviolet",           0x9400d3},
418         {"deeppink",             0xff1493},
419         {"deepskyblue",          0x00bfff},
420         {"dimgray",              0x696969},
421         {"dimgrey",              0x696969},
422         {"dodgerblue",           0x1e90ff},
423         {"firebrick",            0xb22222},
424         {"floralwhite",          0xfffaf0},
425         {"forestgreen",          0x228b22},
426         {"fuchsia",              0xff00ff},
427         {"gainsboro",            0xdcdcdc},
428         {"ghostwhite",           0xf8f8ff},
429         {"gold",                 0xffd700},
430         {"goldenrod",            0xdaa520},
431         {"gray",                 0x808080},
432         {"green",                0x008000},
433         {"greenyellow",          0xadff2f},
434         {"grey",                 0x808080},
435         {"honeydew",             0xf0fff0},
436         {"hotpink",              0xff69b4},
437         {"indianred",            0xcd5c5c},
438         {"indigo",               0x4b0082},
439         {"ivory",                0xfffff0},
440         {"khaki",                0xf0e68c},
441         {"lavender",             0xe6e6fa},
442         {"lavenderblush",        0xfff0f5},
443         {"lawngreen",            0x7cfc00},
444         {"lemonchiffon",         0xfffacd},
445         {"lightblue",            0xadd8e6},
446         {"lightcoral",           0xf08080},
447         {"lightcyan",            0xe0ffff},
448         {"lightgoldenrodyellow", 0xfafad2},
449         {"lightgray",            0xd3d3d3},
450         {"lightgreen",           0x90ee90},
451         {"lightgrey",            0xd3d3d3},
452         {"lightpink",            0xffb6c1},
453         {"lightsalmon",          0xffa07a},
454         {"lightseagreen",        0x20b2aa},
455         {"lightskyblue",         0x87cefa},
456         {"lightslategray",       0x778899},
457         {"lightslategrey",       0x778899},
458         {"lightsteelblue",       0xb0c4de},
459         {"lightyellow",          0xffffe0},
460         {"lime",                 0x00ff00},
461         {"limegreen",            0x32cd32},
462         {"linen",                0xfaf0e6},
463         {"magenta",              0xff00ff},
464         {"maroon",               0x800000},
465         {"mediumaquamarine",     0x66cdaa},
466         {"mediumblue",           0x0000cd},
467         {"mediumorchid",         0xba55d3},
468         {"mediumpurple",         0x9370db},
469         {"mediumseagreen",       0x3cb371},
470         {"mediumslateblue",      0x7b68ee},
471         {"mediumspringgreen",    0x00fa9a},
472         {"mediumturquoise",      0x48d1cc},
473         {"mediumvioletred",      0xc71585},
474         {"midnightblue",         0x191970},
475         {"mintcream",            0xf5fffa},
476         {"mistyrose",            0xffe4e1},
477         {"moccasin",             0xffe4b5},
478         {"navajowhite",          0xffdead},
479         {"navy",                 0x000080},
480         {"oldlace",              0xfdf5e6},
481         {"olive",                0x808000},
482         {"olivedrab",            0x6b8e23},
483         {"orange",               0xffa500},
484         {"orangered",            0xff4500},
485         {"orchid",               0xda70d6},
486         {"palegoldenrod",        0xeee8aa},
487         {"palegreen",            0x98fb98},
488         {"paleturquoise",        0xafeeee},
489         {"palevioletred",        0xdb7093},
490         {"papayawhip",           0xffefd5},
491         {"peachpuff",            0xffdab9},
492         {"peru",                 0xcd853f},
493         {"pink",                 0xffc0cb},
494         {"plum",                 0xdda0dd},
495         {"powderblue",           0xb0e0e6},
496         {"purple",               0x800080},
497         {"rebeccapurple",        0x663399},
498         {"red",                  0xff0000},
499         {"rosybrown",            0xbc8f8f},
500         {"royalblue",            0x4169e1},
501         {"saddlebrown",          0x8b4513},
502         {"salmon",               0xfa8072},
503         {"sandybrown",           0xf4a460},
504         {"seagreen",             0x2e8b57},
505         {"seashell",             0xfff5ee},
506         {"sienna",               0xa0522d},
507         {"silver",               0xc0c0c0},
508         {"skyblue",              0x87ceeb},
509         {"slateblue",            0x6a5acd},
510         {"slategray",            0x708090},
511         {"slategrey",            0x708090},
512         {"snow",                 0xfffafa},
513         {"springgreen",          0x00ff7f},
514         {"steelblue",            0x4682b4},
515         {"tan",                  0xd2b48c},
516         {"teal",                 0x008080},
517         {"thistle",              0xd8bfd8},
518         {"tomato",               0xff6347},
519         {"turquoise",            0x40e0d0},
520         {"violet",               0xee82ee},
521         {"wheat",                0xf5deb3},
522         {"white",                0xffffff},
523         {"whitesmoke",           0xf5f5f5},
524         {"yellow",               0xffff00},
525         {"yellowgreen",          0x9acd32}
526 };
527
528 static bool parseNamedColorString(const std::string &value, video::SColor &color)
529 {
530         std::string color_name;
531         std::string alpha_string;
532
533         /* If the string has a # in it, assume this is the start of a specified
534          * alpha value (if it isn't the string is invalid and the error will be
535          * caught later on, either because the color name won't be found or the
536          * alpha value will fail conversion)
537          */
538         size_t alpha_pos = value.find('#');
539         if (alpha_pos != std::string::npos) {
540                 color_name = value.substr(0, alpha_pos);
541                 alpha_string = value.substr(alpha_pos + 1);
542         } else {
543                 color_name = value;
544         }
545
546         color_name = lowercase(color_name);
547
548         auto it = s_named_colors.find(color_name);
549         if (it == s_named_colors.end())
550                 return false;
551
552         u32 color_temp = it->second;
553
554         /* An empty string for alpha is ok (none of the color table entries
555          * have an alpha value either). Color strings without an alpha specified
556          * are interpreted as fully opaque
557          */
558         if (!alpha_string.empty()) {
559                 if (alpha_string.size() == 1) {
560                         u8 d;
561                         if (!hex_digit_decode(alpha_string[0], d))
562                                 return false;
563
564                         color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
565                 } else if (alpha_string.size() == 2) {
566                         u8 d1, d2;
567                         if (!hex_digit_decode(alpha_string[0], d1)
568                                         || !hex_digit_decode(alpha_string[1], d2))
569                                 return false;
570
571                         color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
572                 } else {
573                         return false;
574                 }
575         } else {
576                 color_temp |= 0xff << 24; // Fully opaque
577         }
578
579         color = video::SColor(color_temp);
580
581         return true;
582 }
583
584 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
585                 unsigned char default_alpha)
586 {
587         bool success;
588
589         if (value[0] == '#')
590                 success = parseHexColorString(value, color, default_alpha);
591         else
592                 success = parseNamedColorString(value, color);
593
594         if (!success && !quiet)
595                 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
596
597         return success;
598 }
599
600 void str_replace(std::string &str, char from, char to)
601 {
602         std::replace(str.begin(), str.end(), from, to);
603 }
604
605 /* Translated strings have the following format:
606  * \x1bT marks the beginning of a translated string
607  * \x1bE marks its end
608  *
609  * \x1bF marks the beginning of an argument, and \x1bE its end.
610  *
611  * Arguments are *not* translated, as they may contain escape codes.
612  * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
613  *
614  * This representation is chosen so that clients ignoring escape codes will
615  * see untranslated strings.
616  *
617  * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
618  * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
619  * To translate this string, we extract what is inside \x1bT/\x1bE tags.
620  * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
621  * translating it as well.
622  * We get the argument "White", translated, and create a template string with "@1" instead of it.
623  * We finally get the template "@1 Wool" that was used in the beginning, which we translate
624  * before filling it again.
625  */
626
627 void translate_all(const std::wstring &s, size_t &i,
628                 Translations *translations, std::wstring &res);
629
630 void translate_string(const std::wstring &s, Translations *translations,
631                 const std::wstring &textdomain, size_t &i, std::wstring &res)
632 {
633         std::wostringstream output;
634         std::vector<std::wstring> args;
635         int arg_number = 1;
636         while (i < s.length()) {
637                 // Not an escape sequence: just add the character.
638                 if (s[i] != '\x1b') {
639                         output.put(s[i]);
640                         // The character is a literal '@'; add it twice
641                         // so that it is not mistaken for an argument.
642                         if (s[i] == L'@')
643                                 output.put(L'@');
644                         ++i;
645                         continue;
646                 }
647
648                 // We have an escape sequence: locate it and its data
649                 // It is either a single character, or it begins with '('
650                 // and extends up to the following ')', with '\' as an escape character.
651                 ++i;
652                 size_t start_index = i;
653                 size_t length;
654                 if (i == s.length()) {
655                         length = 0;
656                 } else if (s[i] == L'(') {
657                         ++i;
658                         ++start_index;
659                         while (i < s.length() && s[i] != L')') {
660                                 if (s[i] == L'\\')
661                                         ++i;
662                                 ++i;
663                         }
664                         length = i - start_index;
665                         ++i;
666                         if (i > s.length())
667                                 i = s.length();
668                 } else {
669                         ++i;
670                         length = 1;
671                 }
672                 std::wstring escape_sequence(s, start_index, length);
673
674                 // The escape sequence is now reconstructed.
675                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
676                 if (parts[0] == L"E") {
677                         // "End of translation" escape sequence. We are done locating the string to translate.
678                         break;
679                 } else if (parts[0] == L"F") {
680                         // "Start of argument" escape sequence.
681                         // Recursively translate the argument, and add it to the argument list.
682                         // Add an "@n" instead of the argument to the template to translate.
683                         if (arg_number >= 10) {
684                                 errorstream << "Ignoring too many arguments to translation" << std::endl;
685                                 std::wstring arg;
686                                 translate_all(s, i, translations, arg);
687                                 args.push_back(arg);
688                                 continue;
689                         }
690                         output.put(L'@');
691                         output << arg_number;
692                         ++arg_number;
693                         std::wstring arg;
694                         translate_all(s, i, translations, arg);
695                         args.push_back(arg);
696                 } else {
697                         // This is an escape sequence *inside* the template string to translate itself.
698                         // This should not happen, show an error message.
699                         errorstream << "Ignoring escape sequence '"
700                                 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
701                 }
702         }
703
704         std::wstring toutput;
705         // Translate the template.
706         if (translations != nullptr)
707                 toutput = translations->getTranslation(
708                                 textdomain, output.str());
709         else
710                 toutput = output.str();
711
712         // Put back the arguments in the translated template.
713         std::wostringstream result;
714         size_t j = 0;
715         while (j < toutput.length()) {
716                 // Normal character, add it to output and continue.
717                 if (toutput[j] != L'@' || j == toutput.length() - 1) {
718                         result.put(toutput[j]);
719                         ++j;
720                         continue;
721                 }
722
723                 ++j;
724                 // Literal escape for '@'.
725                 if (toutput[j] == L'@') {
726                         result.put(L'@');
727                         ++j;
728                         continue;
729                 }
730
731                 // Here we have an argument; get its index and add the translated argument to the output.
732                 int arg_index = toutput[j] - L'1';
733                 ++j;
734                 if (0 <= arg_index && (size_t)arg_index < args.size()) {
735                         result << args[arg_index];
736                 } else {
737                         // This is not allowed: show an error message
738                         errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
739                 }
740         }
741         res = result.str();
742 }
743
744 void translate_all(const std::wstring &s, size_t &i,
745                 Translations *translations, std::wstring &res)
746 {
747         std::wostringstream output;
748         while (i < s.length()) {
749                 // Not an escape sequence: just add the character.
750                 if (s[i] != '\x1b') {
751                         output.put(s[i]);
752                         ++i;
753                         continue;
754                 }
755
756                 // We have an escape sequence: locate it and its data
757                 // It is either a single character, or it begins with '('
758                 // and extends up to the following ')', with '\' as an escape character.
759                 size_t escape_start = i;
760                 ++i;
761                 size_t start_index = i;
762                 size_t length;
763                 if (i == s.length()) {
764                         length = 0;
765                 } else if (s[i] == L'(') {
766                         ++i;
767                         ++start_index;
768                         while (i < s.length() && s[i] != L')') {
769                                 if (s[i] == L'\\') {
770                                         ++i;
771                                 }
772                                 ++i;
773                         }
774                         length = i - start_index;
775                         ++i;
776                         if (i > s.length())
777                                 i = s.length();
778                 } else {
779                         ++i;
780                         length = 1;
781                 }
782                 std::wstring escape_sequence(s, start_index, length);
783
784                 // The escape sequence is now reconstructed.
785                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
786                 if (parts[0] == L"E") {
787                         // "End of argument" escape sequence. Exit.
788                         break;
789                 } else if (parts[0] == L"T") {
790                         // Beginning of translated string.
791                         std::wstring textdomain;
792                         if (parts.size() > 1)
793                                 textdomain = parts[1];
794                         std::wstring translated;
795                         translate_string(s, translations, textdomain, i, translated);
796                         output << translated;
797                 } else {
798                         // Another escape sequence, such as colors. Preserve it.
799                         output << std::wstring(s, escape_start, i - escape_start);
800                 }
801         }
802
803         res = output.str();
804 }
805
806 // Translate string server side
807 std::wstring translate_string(const std::wstring &s, Translations *translations)
808 {
809         size_t i = 0;
810         std::wstring res;
811         translate_all(s, i, translations, res);
812         return res;
813 }
814
815 // Translate string client side
816 std::wstring translate_string(const std::wstring &s)
817 {
818 #ifdef SERVER
819         return translate_string(s, nullptr);
820 #else
821         return translate_string(s, g_client_translations);
822 #endif
823 }
824
825 static const std::array<std::wstring, 30> disallowed_dir_names = {
826         // Problematic filenames from here:
827         // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
828         // Plus undocumented values from here:
829         // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
830         L"CON",
831         L"PRN",
832         L"AUX",
833         L"NUL",
834         L"COM1",
835         L"COM2",
836         L"COM3",
837         L"COM4",
838         L"COM5",
839         L"COM6",
840         L"COM7",
841         L"COM8",
842         L"COM9",
843         L"COM\u00B2",
844         L"COM\u00B3",
845         L"COM\u00B9",
846         L"LPT1",
847         L"LPT2",
848         L"LPT3",
849         L"LPT4",
850         L"LPT5",
851         L"LPT6",
852         L"LPT7",
853         L"LPT8",
854         L"LPT9",
855         L"LPT\u00B2",
856         L"LPT\u00B3",
857         L"LPT\u00B9",
858         L"CONIN$",
859         L"CONOUT$",
860 };
861
862 /**
863  * List of characters that are blacklisted from created directories
864  */
865 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
866
867
868 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
869 {
870         std::wstring safe_name = utf8_to_wide(str);
871
872         for (std::wstring disallowed_name : disallowed_dir_names) {
873                 if (str_equal(safe_name, disallowed_name, true)) {
874                         safe_name = utf8_to_wide(optional_prefix) + safe_name;
875                         break;
876                 }
877         }
878
879         // Replace leading and trailing spaces with underscores.
880         size_t start = safe_name.find_first_not_of(L' ');
881         size_t end = safe_name.find_last_not_of(L' ');
882         if (start == std::wstring::npos || end == std::wstring::npos)
883                 start = end = safe_name.size();
884         for (size_t i = 0; i < start; i++)
885                 safe_name[i] = L'_';
886         for (size_t i = end + 1; i < safe_name.size(); i++)
887                 safe_name[i] = L'_';
888
889         // Replace other disallowed characters with underscores
890         for (size_t i = 0; i < safe_name.length(); i++) {
891                 bool is_valid = true;
892
893                 // Unlikely, but control characters should always be blacklisted
894                 if (safe_name[i] < 32) {
895                         is_valid = false;
896                 } else if (safe_name[i] < 128) {
897                         is_valid = disallowed_path_chars.find_first_of(safe_name[i])
898                                         == std::wstring::npos;
899                 }
900
901                 if (!is_valid)
902                         safe_name[i] = L'_';
903         }
904
905         return wide_to_utf8(safe_name);
906 }
907
908
909 void safe_print_string(std::ostream &os, const std::string &str)
910 {
911         std::ostream::fmtflags flags = os.flags();
912         os << std::hex;
913         for (const char c : str) {
914                 if (IS_ASCII_PRINTABLE_CHAR(c) || IS_UTF8_MULTB_START(c) ||
915                                 IS_UTF8_MULTB_INNER(c) || c == '\n' || c == '\t') {
916                         os << c;
917                 } else {
918                         os << '<' << std::setw(2) << (int)c << '>';
919                 }
920         }
921         os.setf(flags);
922 }