]> git.lizzy.rs Git - minetest.git/blob - src/util/string.cpp
Fix typos and en_US/en_GB inconsistency in various files (#12902)
[minetest.git] / src / util / string.cpp
1 /*
2 Minetest
3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "string.h"
21 #include "pointer.h"
22 #include "numeric.h"
23 #include "log.h"
24
25 #include "hex.h"
26 #include "porting.h"
27 #include "translation.h"
28
29 #include <algorithm>
30 #include <array>
31 #include <sstream>
32 #include <iomanip>
33 #include <unordered_map>
34
35 #ifndef _WIN32
36         #include <iconv.h>
37 #else
38         #define _WIN32_WINNT 0x0501
39         #include <windows.h>
40 #endif
41
42 #ifndef _WIN32
43
44 static bool convert(const char *to, const char *from, char *outbuf,
45                 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
46 {
47         iconv_t cd = iconv_open(to, from);
48
49         char *inbuf_ptr = inbuf;
50         char *outbuf_ptr = outbuf;
51
52         size_t *inbuf_left_ptr = &inbuf_size;
53
54         const size_t old_outbuf_size = *outbuf_size;
55         size_t old_size = inbuf_size;
56         while (inbuf_size > 0) {
57                 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
58                 if (inbuf_size == old_size) {
59                         iconv_close(cd);
60                         return false;
61                 }
62                 old_size = inbuf_size;
63         }
64
65         iconv_close(cd);
66         *outbuf_size = old_outbuf_size - *outbuf_size;
67         return true;
68 }
69
70 #ifdef __ANDROID__
71 // On Android iconv disagrees how big a wchar_t is for whatever reason
72 const char *DEFAULT_ENCODING = "UTF-32LE";
73 #elif defined(__NetBSD__) || defined(__OpenBSD__)
74         // NetBSD does not allow "WCHAR_T" as a charset input to iconv.
75         #include <sys/endian.h>
76         #if BYTE_ORDER == BIG_ENDIAN
77         const char *DEFAULT_ENCODING = "UTF-32BE";
78         #else
79         const char *DEFAULT_ENCODING = "UTF-32LE";
80         #endif
81 #else
82 const char *DEFAULT_ENCODING = "WCHAR_T";
83 #endif
84
85 std::wstring utf8_to_wide(const std::string &input)
86 {
87         const size_t inbuf_size = input.length();
88         // maximum possible size, every character is sizeof(wchar_t) bytes
89         size_t outbuf_size = input.length() * sizeof(wchar_t);
90
91         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
92         memcpy(inbuf, input.c_str(), inbuf_size);
93         std::wstring out;
94         out.resize(outbuf_size / sizeof(wchar_t));
95
96 #if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__)
97         static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size");
98 #endif
99
100         char *outbuf = reinterpret_cast<char*>(&out[0]);
101         if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
102                 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
103                         << " into wstring" << std::endl;
104                 delete[] inbuf;
105                 return L"<invalid UTF-8 string>";
106         }
107         delete[] inbuf;
108
109         out.resize(outbuf_size / sizeof(wchar_t));
110         return out;
111 }
112
113 std::string wide_to_utf8(const std::wstring &input)
114 {
115         const size_t inbuf_size = input.length() * sizeof(wchar_t);
116         // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
117         size_t outbuf_size = input.length() * 4;
118
119         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
120         memcpy(inbuf, input.c_str(), inbuf_size);
121         std::string out;
122         out.resize(outbuf_size);
123
124         if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
125                 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
126                         << " into UTF-8 string" << std::endl;
127                 delete[] inbuf;
128                 return "<invalid wide string>";
129         }
130         delete[] inbuf;
131
132         out.resize(outbuf_size);
133         return out;
134 }
135
136 #else // _WIN32
137
138 std::wstring utf8_to_wide(const std::string &input)
139 {
140         size_t outbuf_size = input.size() + 1;
141         wchar_t *outbuf = new wchar_t[outbuf_size];
142         memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
143         MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
144                 outbuf, outbuf_size);
145         std::wstring out(outbuf);
146         delete[] outbuf;
147         return out;
148 }
149
150 std::string wide_to_utf8(const std::wstring &input)
151 {
152         size_t outbuf_size = (input.size() + 1) * 6;
153         char *outbuf = new char[outbuf_size];
154         memset(outbuf, 0, outbuf_size);
155         WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
156                 outbuf, outbuf_size, NULL, NULL);
157         std::string out(outbuf);
158         delete[] outbuf;
159         return out;
160 }
161
162 #endif // _WIN32
163
164 wchar_t *utf8_to_wide_c(const char *str)
165 {
166         std::wstring ret = utf8_to_wide(std::string(str));
167         size_t len = ret.length();
168         wchar_t *ret_c = new wchar_t[len + 1];
169         memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
170         return ret_c;
171 }
172
173
174 std::string urlencode(const std::string &str)
175 {
176         // Encodes non-unreserved URI characters by a percent sign
177         // followed by two hex digits. See RFC 3986, section 2.3.
178         static const char url_hex_chars[] = "0123456789ABCDEF";
179         std::ostringstream oss(std::ios::binary);
180         for (unsigned char c : str) {
181                 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
182                         oss << c;
183                 } else {
184                         oss << "%"
185                                 << url_hex_chars[(c & 0xf0) >> 4]
186                                 << url_hex_chars[c & 0x0f];
187                 }
188         }
189         return oss.str();
190 }
191
192 std::string urldecode(const std::string &str)
193 {
194         // Inverse of urlencode
195         std::ostringstream oss(std::ios::binary);
196         for (u32 i = 0; i < str.size(); i++) {
197                 unsigned char highvalue, lowvalue;
198                 if (str[i] == '%' &&
199                                 hex_digit_decode(str[i+1], highvalue) &&
200                                 hex_digit_decode(str[i+2], lowvalue)) {
201                         oss << (char) ((highvalue << 4) | lowvalue);
202                         i += 2;
203                 } else {
204                         oss << str[i];
205                 }
206         }
207         return oss.str();
208 }
209
210 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
211 {
212         u32 result = 0;
213         u32 mask = 0;
214         char *s = &str[0];
215         char *flagstr;
216         char *strpos = nullptr;
217
218         while ((flagstr = strtok_r(s, ",", &strpos))) {
219                 s = nullptr;
220
221                 while (*flagstr == ' ' || *flagstr == '\t')
222                         flagstr++;
223
224                 bool flagset = true;
225                 if (!strncasecmp(flagstr, "no", 2)) {
226                         flagset = false;
227                         flagstr += 2;
228                 }
229
230                 for (int i = 0; flagdesc[i].name; i++) {
231                         if (!strcasecmp(flagstr, flagdesc[i].name)) {
232                                 mask |= flagdesc[i].flag;
233                                 if (flagset)
234                                         result |= flagdesc[i].flag;
235                                 break;
236                         }
237                 }
238         }
239
240         if (flagmask)
241                 *flagmask = mask;
242
243         return result;
244 }
245
246 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
247 {
248         std::string result;
249
250         for (int i = 0; flagdesc[i].name; i++) {
251                 if (flagmask & flagdesc[i].flag) {
252                         if (!(flags & flagdesc[i].flag))
253                                 result += "no";
254
255                         result += flagdesc[i].name;
256                         result += ", ";
257                 }
258         }
259
260         size_t len = result.length();
261         if (len >= 2)
262                 result.erase(len - 2, 2);
263
264         return result;
265 }
266
267 size_t mystrlcpy(char *dst, const char *src, size_t size)
268 {
269         size_t srclen  = strlen(src) + 1;
270         size_t copylen = MYMIN(srclen, size);
271
272         if (copylen > 0) {
273                 memcpy(dst, src, copylen);
274                 dst[copylen - 1] = '\0';
275         }
276
277         return srclen;
278 }
279
280 char *mystrtok_r(char *s, const char *sep, char **lasts)
281 {
282         char *t;
283
284         if (!s)
285                 s = *lasts;
286
287         while (*s && strchr(sep, *s))
288                 s++;
289
290         if (!*s)
291                 return nullptr;
292
293         t = s;
294         while (*t) {
295                 if (strchr(sep, *t)) {
296                         *t++ = '\0';
297                         break;
298                 }
299                 t++;
300         }
301
302         *lasts = t;
303         return s;
304 }
305
306 u64 read_seed(const char *str)
307 {
308         char *endptr;
309         u64 num;
310
311         if (str[0] == '0' && str[1] == 'x')
312                 num = strtoull(str, &endptr, 16);
313         else
314                 num = strtoull(str, &endptr, 10);
315
316         if (*endptr)
317                 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
318
319         return num;
320 }
321
322 static bool parseHexColorString(const std::string &value, video::SColor &color,
323                 unsigned char default_alpha)
324 {
325         u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
326
327         size_t len = value.size();
328         bool short_form;
329
330         if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
331                 short_form = false;
332         else if (len == 5 || len == 4) // #RGBA or #RGB
333                 short_form = true;
334         else
335                 return false;
336
337         for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
338                 if (short_form) {
339                         u8 d;
340                         if (!hex_digit_decode(value[pos], d))
341                                 return false;
342
343                         components[cc] = (d & 0xf) << 4 | (d & 0xf);
344                 } else {
345                         u8 d1, d2;
346                         if (!hex_digit_decode(value[pos], d1) ||
347                                         !hex_digit_decode(value[pos+1], d2))
348                                 return false;
349
350                         components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
351                         pos++; // skip the second digit -- it's already used
352                 }
353         }
354
355         color.setRed(components[0]);
356         color.setGreen(components[1]);
357         color.setBlue(components[2]);
358         color.setAlpha(components[3]);
359
360         return true;
361 }
362
363 const static std::unordered_map<std::string, u32> s_named_colors = {
364         {"aliceblue",            0xf0f8ff},
365         {"antiquewhite",         0xfaebd7},
366         {"aqua",                 0x00ffff},
367         {"aquamarine",           0x7fffd4},
368         {"azure",                0xf0ffff},
369         {"beige",                0xf5f5dc},
370         {"bisque",               0xffe4c4},
371         {"black",                00000000},
372         {"blanchedalmond",       0xffebcd},
373         {"blue",                 0x0000ff},
374         {"blueviolet",           0x8a2be2},
375         {"brown",                0xa52a2a},
376         {"burlywood",            0xdeb887},
377         {"cadetblue",            0x5f9ea0},
378         {"chartreuse",           0x7fff00},
379         {"chocolate",            0xd2691e},
380         {"coral",                0xff7f50},
381         {"cornflowerblue",       0x6495ed},
382         {"cornsilk",             0xfff8dc},
383         {"crimson",              0xdc143c},
384         {"cyan",                 0x00ffff},
385         {"darkblue",             0x00008b},
386         {"darkcyan",             0x008b8b},
387         {"darkgoldenrod",        0xb8860b},
388         {"darkgray",             0xa9a9a9},
389         {"darkgreen",            0x006400},
390         {"darkgrey",             0xa9a9a9},
391         {"darkkhaki",            0xbdb76b},
392         {"darkmagenta",          0x8b008b},
393         {"darkolivegreen",       0x556b2f},
394         {"darkorange",           0xff8c00},
395         {"darkorchid",           0x9932cc},
396         {"darkred",              0x8b0000},
397         {"darksalmon",           0xe9967a},
398         {"darkseagreen",         0x8fbc8f},
399         {"darkslateblue",        0x483d8b},
400         {"darkslategray",        0x2f4f4f},
401         {"darkslategrey",        0x2f4f4f},
402         {"darkturquoise",        0x00ced1},
403         {"darkviolet",           0x9400d3},
404         {"deeppink",             0xff1493},
405         {"deepskyblue",          0x00bfff},
406         {"dimgray",              0x696969},
407         {"dimgrey",              0x696969},
408         {"dodgerblue",           0x1e90ff},
409         {"firebrick",            0xb22222},
410         {"floralwhite",          0xfffaf0},
411         {"forestgreen",          0x228b22},
412         {"fuchsia",              0xff00ff},
413         {"gainsboro",            0xdcdcdc},
414         {"ghostwhite",           0xf8f8ff},
415         {"gold",                 0xffd700},
416         {"goldenrod",            0xdaa520},
417         {"gray",                 0x808080},
418         {"green",                0x008000},
419         {"greenyellow",          0xadff2f},
420         {"grey",                 0x808080},
421         {"honeydew",             0xf0fff0},
422         {"hotpink",              0xff69b4},
423         {"indianred",            0xcd5c5c},
424         {"indigo",               0x4b0082},
425         {"ivory",                0xfffff0},
426         {"khaki",                0xf0e68c},
427         {"lavender",             0xe6e6fa},
428         {"lavenderblush",        0xfff0f5},
429         {"lawngreen",            0x7cfc00},
430         {"lemonchiffon",         0xfffacd},
431         {"lightblue",            0xadd8e6},
432         {"lightcoral",           0xf08080},
433         {"lightcyan",            0xe0ffff},
434         {"lightgoldenrodyellow", 0xfafad2},
435         {"lightgray",            0xd3d3d3},
436         {"lightgreen",           0x90ee90},
437         {"lightgrey",            0xd3d3d3},
438         {"lightpink",            0xffb6c1},
439         {"lightsalmon",          0xffa07a},
440         {"lightseagreen",        0x20b2aa},
441         {"lightskyblue",         0x87cefa},
442         {"lightslategray",       0x778899},
443         {"lightslategrey",       0x778899},
444         {"lightsteelblue",       0xb0c4de},
445         {"lightyellow",          0xffffe0},
446         {"lime",                 0x00ff00},
447         {"limegreen",            0x32cd32},
448         {"linen",                0xfaf0e6},
449         {"magenta",              0xff00ff},
450         {"maroon",               0x800000},
451         {"mediumaquamarine",     0x66cdaa},
452         {"mediumblue",           0x0000cd},
453         {"mediumorchid",         0xba55d3},
454         {"mediumpurple",         0x9370db},
455         {"mediumseagreen",       0x3cb371},
456         {"mediumslateblue",      0x7b68ee},
457         {"mediumspringgreen",    0x00fa9a},
458         {"mediumturquoise",      0x48d1cc},
459         {"mediumvioletred",      0xc71585},
460         {"midnightblue",         0x191970},
461         {"mintcream",            0xf5fffa},
462         {"mistyrose",            0xffe4e1},
463         {"moccasin",             0xffe4b5},
464         {"navajowhite",          0xffdead},
465         {"navy",                 0x000080},
466         {"oldlace",              0xfdf5e6},
467         {"olive",                0x808000},
468         {"olivedrab",            0x6b8e23},
469         {"orange",               0xffa500},
470         {"orangered",            0xff4500},
471         {"orchid",               0xda70d6},
472         {"palegoldenrod",        0xeee8aa},
473         {"palegreen",            0x98fb98},
474         {"paleturquoise",        0xafeeee},
475         {"palevioletred",        0xdb7093},
476         {"papayawhip",           0xffefd5},
477         {"peachpuff",            0xffdab9},
478         {"peru",                 0xcd853f},
479         {"pink",                 0xffc0cb},
480         {"plum",                 0xdda0dd},
481         {"powderblue",           0xb0e0e6},
482         {"purple",               0x800080},
483         {"rebeccapurple",        0x663399},
484         {"red",                  0xff0000},
485         {"rosybrown",            0xbc8f8f},
486         {"royalblue",            0x4169e1},
487         {"saddlebrown",          0x8b4513},
488         {"salmon",               0xfa8072},
489         {"sandybrown",           0xf4a460},
490         {"seagreen",             0x2e8b57},
491         {"seashell",             0xfff5ee},
492         {"sienna",               0xa0522d},
493         {"silver",               0xc0c0c0},
494         {"skyblue",              0x87ceeb},
495         {"slateblue",            0x6a5acd},
496         {"slategray",            0x708090},
497         {"slategrey",            0x708090},
498         {"snow",                 0xfffafa},
499         {"springgreen",          0x00ff7f},
500         {"steelblue",            0x4682b4},
501         {"tan",                  0xd2b48c},
502         {"teal",                 0x008080},
503         {"thistle",              0xd8bfd8},
504         {"tomato",               0xff6347},
505         {"turquoise",            0x40e0d0},
506         {"violet",               0xee82ee},
507         {"wheat",                0xf5deb3},
508         {"white",                0xffffff},
509         {"whitesmoke",           0xf5f5f5},
510         {"yellow",               0xffff00},
511         {"yellowgreen",          0x9acd32}
512 };
513
514 static bool parseNamedColorString(const std::string &value, video::SColor &color)
515 {
516         std::string color_name;
517         std::string alpha_string;
518
519         /* If the string has a # in it, assume this is the start of a specified
520          * alpha value (if it isn't the string is invalid and the error will be
521          * caught later on, either because the color name won't be found or the
522          * alpha value will fail conversion)
523          */
524         size_t alpha_pos = value.find('#');
525         if (alpha_pos != std::string::npos) {
526                 color_name = value.substr(0, alpha_pos);
527                 alpha_string = value.substr(alpha_pos + 1);
528         } else {
529                 color_name = value;
530         }
531
532         color_name = lowercase(color_name);
533
534         auto it = s_named_colors.find(color_name);
535         if (it == s_named_colors.end())
536                 return false;
537
538         u32 color_temp = it->second;
539
540         /* An empty string for alpha is ok (none of the color table entries
541          * have an alpha value either). Color strings without an alpha specified
542          * are interpreted as fully opaque
543          */
544         if (!alpha_string.empty()) {
545                 if (alpha_string.size() == 1) {
546                         u8 d;
547                         if (!hex_digit_decode(alpha_string[0], d))
548                                 return false;
549
550                         color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
551                 } else if (alpha_string.size() == 2) {
552                         u8 d1, d2;
553                         if (!hex_digit_decode(alpha_string[0], d1)
554                                         || !hex_digit_decode(alpha_string[1], d2))
555                                 return false;
556
557                         color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
558                 } else {
559                         return false;
560                 }
561         } else {
562                 color_temp |= 0xff << 24; // Fully opaque
563         }
564
565         color = video::SColor(color_temp);
566
567         return true;
568 }
569
570 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
571                 unsigned char default_alpha)
572 {
573         bool success;
574
575         if (value[0] == '#')
576                 success = parseHexColorString(value, color, default_alpha);
577         else
578                 success = parseNamedColorString(value, color);
579
580         if (!success && !quiet)
581                 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
582
583         return success;
584 }
585
586 void str_replace(std::string &str, char from, char to)
587 {
588         std::replace(str.begin(), str.end(), from, to);
589 }
590
591 /* Translated strings have the following format:
592  * \x1bT marks the beginning of a translated string
593  * \x1bE marks its end
594  *
595  * \x1bF marks the beginning of an argument, and \x1bE its end.
596  *
597  * Arguments are *not* translated, as they may contain escape codes.
598  * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
599  *
600  * This representation is chosen so that clients ignoring escape codes will
601  * see untranslated strings.
602  *
603  * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
604  * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
605  * To translate this string, we extract what is inside \x1bT/\x1bE tags.
606  * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
607  * translating it as well.
608  * We get the argument "White", translated, and create a template string with "@1" instead of it.
609  * We finally get the template "@1 Wool" that was used in the beginning, which we translate
610  * before filling it again.
611  */
612
613 void translate_all(const std::wstring &s, size_t &i,
614                 Translations *translations, std::wstring &res);
615
616 void translate_string(const std::wstring &s, Translations *translations,
617                 const std::wstring &textdomain, size_t &i, std::wstring &res)
618 {
619         std::wostringstream output;
620         std::vector<std::wstring> args;
621         int arg_number = 1;
622         while (i < s.length()) {
623                 // Not an escape sequence: just add the character.
624                 if (s[i] != '\x1b') {
625                         output.put(s[i]);
626                         // The character is a literal '@'; add it twice
627                         // so that it is not mistaken for an argument.
628                         if (s[i] == L'@')
629                                 output.put(L'@');
630                         ++i;
631                         continue;
632                 }
633
634                 // We have an escape sequence: locate it and its data
635                 // It is either a single character, or it begins with '('
636                 // and extends up to the following ')', with '\' as an escape character.
637                 ++i;
638                 size_t start_index = i;
639                 size_t length;
640                 if (i == s.length()) {
641                         length = 0;
642                 } else if (s[i] == L'(') {
643                         ++i;
644                         ++start_index;
645                         while (i < s.length() && s[i] != L')') {
646                                 if (s[i] == L'\\')
647                                         ++i;
648                                 ++i;
649                         }
650                         length = i - start_index;
651                         ++i;
652                         if (i > s.length())
653                                 i = s.length();
654                 } else {
655                         ++i;
656                         length = 1;
657                 }
658                 std::wstring escape_sequence(s, start_index, length);
659
660                 // The escape sequence is now reconstructed.
661                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
662                 if (parts[0] == L"E") {
663                         // "End of translation" escape sequence. We are done locating the string to translate.
664                         break;
665                 } else if (parts[0] == L"F") {
666                         // "Start of argument" escape sequence.
667                         // Recursively translate the argument, and add it to the argument list.
668                         // Add an "@n" instead of the argument to the template to translate.
669                         if (arg_number >= 10) {
670                                 errorstream << "Ignoring too many arguments to translation" << std::endl;
671                                 std::wstring arg;
672                                 translate_all(s, i, translations, arg);
673                                 args.push_back(arg);
674                                 continue;
675                         }
676                         output.put(L'@');
677                         output << arg_number;
678                         ++arg_number;
679                         std::wstring arg;
680                         translate_all(s, i, translations, arg);
681                         args.push_back(arg);
682                 } else {
683                         // This is an escape sequence *inside* the template string to translate itself.
684                         // This should not happen, show an error message.
685                         errorstream << "Ignoring escape sequence '"
686                                 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
687                 }
688         }
689
690         std::wstring toutput;
691         // Translate the template.
692         if (translations != nullptr)
693                 toutput = translations->getTranslation(
694                                 textdomain, output.str());
695         else
696                 toutput = output.str();
697
698         // Put back the arguments in the translated template.
699         std::wostringstream result;
700         size_t j = 0;
701         while (j < toutput.length()) {
702                 // Normal character, add it to output and continue.
703                 if (toutput[j] != L'@' || j == toutput.length() - 1) {
704                         result.put(toutput[j]);
705                         ++j;
706                         continue;
707                 }
708
709                 ++j;
710                 // Literal escape for '@'.
711                 if (toutput[j] == L'@') {
712                         result.put(L'@');
713                         ++j;
714                         continue;
715                 }
716
717                 // Here we have an argument; get its index and add the translated argument to the output.
718                 int arg_index = toutput[j] - L'1';
719                 ++j;
720                 if (0 <= arg_index && (size_t)arg_index < args.size()) {
721                         result << args[arg_index];
722                 } else {
723                         // This is not allowed: show an error message
724                         errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
725                 }
726         }
727         res = result.str();
728 }
729
730 void translate_all(const std::wstring &s, size_t &i,
731                 Translations *translations, std::wstring &res)
732 {
733         std::wostringstream output;
734         while (i < s.length()) {
735                 // Not an escape sequence: just add the character.
736                 if (s[i] != '\x1b') {
737                         output.put(s[i]);
738                         ++i;
739                         continue;
740                 }
741
742                 // We have an escape sequence: locate it and its data
743                 // It is either a single character, or it begins with '('
744                 // and extends up to the following ')', with '\' as an escape character.
745                 size_t escape_start = i;
746                 ++i;
747                 size_t start_index = i;
748                 size_t length;
749                 if (i == s.length()) {
750                         length = 0;
751                 } else if (s[i] == L'(') {
752                         ++i;
753                         ++start_index;
754                         while (i < s.length() && s[i] != L')') {
755                                 if (s[i] == L'\\') {
756                                         ++i;
757                                 }
758                                 ++i;
759                         }
760                         length = i - start_index;
761                         ++i;
762                         if (i > s.length())
763                                 i = s.length();
764                 } else {
765                         ++i;
766                         length = 1;
767                 }
768                 std::wstring escape_sequence(s, start_index, length);
769
770                 // The escape sequence is now reconstructed.
771                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
772                 if (parts[0] == L"E") {
773                         // "End of argument" escape sequence. Exit.
774                         break;
775                 } else if (parts[0] == L"T") {
776                         // Beginning of translated string.
777                         std::wstring textdomain;
778                         if (parts.size() > 1)
779                                 textdomain = parts[1];
780                         std::wstring translated;
781                         translate_string(s, translations, textdomain, i, translated);
782                         output << translated;
783                 } else {
784                         // Another escape sequence, such as colors. Preserve it.
785                         output << std::wstring(s, escape_start, i - escape_start);
786                 }
787         }
788
789         res = output.str();
790 }
791
792 // Translate string server side
793 std::wstring translate_string(const std::wstring &s, Translations *translations)
794 {
795         size_t i = 0;
796         std::wstring res;
797         translate_all(s, i, translations, res);
798         return res;
799 }
800
801 // Translate string client side
802 std::wstring translate_string(const std::wstring &s)
803 {
804 #ifdef SERVER
805         return translate_string(s, nullptr);
806 #else
807         return translate_string(s, g_client_translations);
808 #endif
809 }
810
811 static const std::array<std::wstring, 30> disallowed_dir_names = {
812         // Problematic filenames from here:
813         // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
814         // Plus undocumented values from here:
815         // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
816         L"CON",
817         L"PRN",
818         L"AUX",
819         L"NUL",
820         L"COM1",
821         L"COM2",
822         L"COM3",
823         L"COM4",
824         L"COM5",
825         L"COM6",
826         L"COM7",
827         L"COM8",
828         L"COM9",
829         L"COM\u00B2",
830         L"COM\u00B3",
831         L"COM\u00B9",
832         L"LPT1",
833         L"LPT2",
834         L"LPT3",
835         L"LPT4",
836         L"LPT5",
837         L"LPT6",
838         L"LPT7",
839         L"LPT8",
840         L"LPT9",
841         L"LPT\u00B2",
842         L"LPT\u00B3",
843         L"LPT\u00B9",
844         L"CONIN$",
845         L"CONOUT$",
846 };
847
848 /**
849  * List of characters that are blacklisted from created directories
850  */
851 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
852
853
854 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
855 {
856         std::wstring safe_name = utf8_to_wide(str);
857
858         for (std::wstring disallowed_name : disallowed_dir_names) {
859                 if (str_equal(safe_name, disallowed_name, true)) {
860                         safe_name = utf8_to_wide(optional_prefix) + safe_name;
861                         break;
862                 }
863         }
864
865         // Replace leading and trailing spaces with underscores.
866         size_t start = safe_name.find_first_not_of(L' ');
867         size_t end = safe_name.find_last_not_of(L' ');
868         if (start == std::wstring::npos || end == std::wstring::npos)
869                 start = end = safe_name.size();
870         for (size_t i = 0; i < start; i++)
871                 safe_name[i] = L'_';
872         for (size_t i = end + 1; i < safe_name.size(); i++)
873                 safe_name[i] = L'_';
874
875         // Replace other disallowed characters with underscores
876         for (size_t i = 0; i < safe_name.length(); i++) {
877                 bool is_valid = true;
878
879                 // Unlikely, but control characters should always be blacklisted
880                 if (safe_name[i] < 32) {
881                         is_valid = false;
882                 } else if (safe_name[i] < 128) {
883                         is_valid = disallowed_path_chars.find_first_of(safe_name[i])
884                                         == std::wstring::npos;
885                 }
886
887                 if (!is_valid)
888                         safe_name[i] = L'_';
889         }
890
891         return wide_to_utf8(safe_name);
892 }
893
894
895 void safe_print_string(std::ostream &os, const std::string &str)
896 {
897         std::ostream::fmtflags flags = os.flags();
898         os << std::hex;
899         for (const char c : str) {
900                 if (IS_ASCII_PRINTABLE_CHAR(c) || IS_UTF8_MULTB_START(c) ||
901                                 IS_UTF8_MULTB_INNER(c) || c == '\n' || c == '\t') {
902                         os << c;
903                 } else {
904                         os << '<' << std::setw(2) << (int)c << '>';
905                 }
906         }
907         os.setf(flags);
908 }