]> git.lizzy.rs Git - minetest.git/blob - src/util/string.cpp
eec5ab4cddcb450eeb24b1a4cf489e8160caca9f
[minetest.git] / src / util / string.cpp
1 /*
2 Minetest
3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "string.h"
21 #include "pointer.h"
22 #include "numeric.h"
23 #include "log.h"
24
25 #include "hex.h"
26 #include "porting.h"
27 #include "translation.h"
28
29 #include <algorithm>
30 #include <array>
31 #include <sstream>
32 #include <iomanip>
33 #include <unordered_map>
34
35 #ifndef _WIN32
36         #include <iconv.h>
37 #else
38         #define _WIN32_WINNT 0x0501
39         #include <windows.h>
40 #endif
41
42 #if defined(_ICONV_H_) && (defined(__FreeBSD__) || defined(__NetBSD__) || \
43         defined(__OpenBSD__) || defined(__DragonFly__))
44         #define BSD_ICONV_USED
45 #endif
46
47 #ifndef _WIN32
48
49 static bool convert(const char *to, const char *from, char *outbuf,
50                 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
51 {
52         iconv_t cd = iconv_open(to, from);
53
54 #ifdef BSD_ICONV_USED
55         const char *inbuf_ptr = inbuf;
56 #else
57         char *inbuf_ptr = inbuf;
58 #endif
59         char *outbuf_ptr = outbuf;
60
61         size_t *inbuf_left_ptr = &inbuf_size;
62
63         const size_t old_outbuf_size = *outbuf_size;
64         size_t old_size = inbuf_size;
65         while (inbuf_size > 0) {
66                 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
67                 if (inbuf_size == old_size) {
68                         iconv_close(cd);
69                         return false;
70                 }
71                 old_size = inbuf_size;
72         }
73
74         iconv_close(cd);
75         *outbuf_size = old_outbuf_size - *outbuf_size;
76         return true;
77 }
78
79 #ifdef __ANDROID__
80 // On Android iconv disagrees how big a wchar_t is for whatever reason
81 const char *DEFAULT_ENCODING = "UTF-32LE";
82 #else
83 const char *DEFAULT_ENCODING = "WCHAR_T";
84 #endif
85
86 std::wstring utf8_to_wide(const std::string &input)
87 {
88         const size_t inbuf_size = input.length();
89         // maximum possible size, every character is sizeof(wchar_t) bytes
90         size_t outbuf_size = input.length() * sizeof(wchar_t);
91
92         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
93         memcpy(inbuf, input.c_str(), inbuf_size);
94         std::wstring out;
95         out.resize(outbuf_size / sizeof(wchar_t));
96
97 #ifdef __ANDROID__
98         SANITY_CHECK(sizeof(wchar_t) == 4);
99 #endif
100
101         char *outbuf = reinterpret_cast<char*>(&out[0]);
102         if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
103                 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
104                         << " into wstring" << std::endl;
105                 delete[] inbuf;
106                 return L"<invalid UTF-8 string>";
107         }
108         delete[] inbuf;
109
110         out.resize(outbuf_size / sizeof(wchar_t));
111         return out;
112 }
113
114 std::string wide_to_utf8(const std::wstring &input)
115 {
116         const size_t inbuf_size = input.length() * sizeof(wchar_t);
117         // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
118         size_t outbuf_size = input.length() * 4;
119
120         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
121         memcpy(inbuf, input.c_str(), inbuf_size);
122         std::string out;
123         out.resize(outbuf_size);
124
125         if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
126                 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
127                         << " into UTF-8 string" << std::endl;
128                 delete[] inbuf;
129                 return "<invalid wide string>";
130         }
131         delete[] inbuf;
132
133         out.resize(outbuf_size);
134         return out;
135 }
136
137 #else // _WIN32
138
139 std::wstring utf8_to_wide(const std::string &input)
140 {
141         size_t outbuf_size = input.size() + 1;
142         wchar_t *outbuf = new wchar_t[outbuf_size];
143         memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
144         MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
145                 outbuf, outbuf_size);
146         std::wstring out(outbuf);
147         delete[] outbuf;
148         return out;
149 }
150
151 std::string wide_to_utf8(const std::wstring &input)
152 {
153         size_t outbuf_size = (input.size() + 1) * 6;
154         char *outbuf = new char[outbuf_size];
155         memset(outbuf, 0, outbuf_size);
156         WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
157                 outbuf, outbuf_size, NULL, NULL);
158         std::string out(outbuf);
159         delete[] outbuf;
160         return out;
161 }
162
163 #endif // _WIN32
164
165 wchar_t *utf8_to_wide_c(const char *str)
166 {
167         std::wstring ret = utf8_to_wide(std::string(str));
168         size_t len = ret.length();
169         wchar_t *ret_c = new wchar_t[len + 1];
170         memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
171         return ret_c;
172 }
173
174
175 std::string urlencode(const std::string &str)
176 {
177         // Encodes non-unreserved URI characters by a percent sign
178         // followed by two hex digits. See RFC 3986, section 2.3.
179         static const char url_hex_chars[] = "0123456789ABCDEF";
180         std::ostringstream oss(std::ios::binary);
181         for (unsigned char c : str) {
182                 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
183                         oss << c;
184                 } else {
185                         oss << "%"
186                                 << url_hex_chars[(c & 0xf0) >> 4]
187                                 << url_hex_chars[c & 0x0f];
188                 }
189         }
190         return oss.str();
191 }
192
193 std::string urldecode(const std::string &str)
194 {
195         // Inverse of urlencode
196         std::ostringstream oss(std::ios::binary);
197         for (u32 i = 0; i < str.size(); i++) {
198                 unsigned char highvalue, lowvalue;
199                 if (str[i] == '%' &&
200                                 hex_digit_decode(str[i+1], highvalue) &&
201                                 hex_digit_decode(str[i+2], lowvalue)) {
202                         oss << (char) ((highvalue << 4) | lowvalue);
203                         i += 2;
204                 } else {
205                         oss << str[i];
206                 }
207         }
208         return oss.str();
209 }
210
211 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
212 {
213         u32 result = 0;
214         u32 mask = 0;
215         char *s = &str[0];
216         char *flagstr;
217         char *strpos = nullptr;
218
219         while ((flagstr = strtok_r(s, ",", &strpos))) {
220                 s = nullptr;
221
222                 while (*flagstr == ' ' || *flagstr == '\t')
223                         flagstr++;
224
225                 bool flagset = true;
226                 if (!strncasecmp(flagstr, "no", 2)) {
227                         flagset = false;
228                         flagstr += 2;
229                 }
230
231                 for (int i = 0; flagdesc[i].name; i++) {
232                         if (!strcasecmp(flagstr, flagdesc[i].name)) {
233                                 mask |= flagdesc[i].flag;
234                                 if (flagset)
235                                         result |= flagdesc[i].flag;
236                                 break;
237                         }
238                 }
239         }
240
241         if (flagmask)
242                 *flagmask = mask;
243
244         return result;
245 }
246
247 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
248 {
249         std::string result;
250
251         for (int i = 0; flagdesc[i].name; i++) {
252                 if (flagmask & flagdesc[i].flag) {
253                         if (!(flags & flagdesc[i].flag))
254                                 result += "no";
255
256                         result += flagdesc[i].name;
257                         result += ", ";
258                 }
259         }
260
261         size_t len = result.length();
262         if (len >= 2)
263                 result.erase(len - 2, 2);
264
265         return result;
266 }
267
268 size_t mystrlcpy(char *dst, const char *src, size_t size)
269 {
270         size_t srclen  = strlen(src) + 1;
271         size_t copylen = MYMIN(srclen, size);
272
273         if (copylen > 0) {
274                 memcpy(dst, src, copylen);
275                 dst[copylen - 1] = '\0';
276         }
277
278         return srclen;
279 }
280
281 char *mystrtok_r(char *s, const char *sep, char **lasts)
282 {
283         char *t;
284
285         if (!s)
286                 s = *lasts;
287
288         while (*s && strchr(sep, *s))
289                 s++;
290
291         if (!*s)
292                 return nullptr;
293
294         t = s;
295         while (*t) {
296                 if (strchr(sep, *t)) {
297                         *t++ = '\0';
298                         break;
299                 }
300                 t++;
301         }
302
303         *lasts = t;
304         return s;
305 }
306
307 u64 read_seed(const char *str)
308 {
309         char *endptr;
310         u64 num;
311
312         if (str[0] == '0' && str[1] == 'x')
313                 num = strtoull(str, &endptr, 16);
314         else
315                 num = strtoull(str, &endptr, 10);
316
317         if (*endptr)
318                 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
319
320         return num;
321 }
322
323 static bool parseHexColorString(const std::string &value, video::SColor &color,
324                 unsigned char default_alpha)
325 {
326         u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
327
328         size_t len = value.size();
329         bool short_form;
330
331         if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
332                 short_form = false;
333         else if (len == 5 || len == 4) // #RGBA or #RGB
334                 short_form = true;
335         else
336                 return false;
337
338         for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
339                 if (short_form) {
340                         u8 d;
341                         if (!hex_digit_decode(value[pos], d))
342                                 return false;
343
344                         components[cc] = (d & 0xf) << 4 | (d & 0xf);
345                 } else {
346                         u8 d1, d2;
347                         if (!hex_digit_decode(value[pos], d1) ||
348                                         !hex_digit_decode(value[pos+1], d2))
349                                 return false;
350
351                         components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
352                         pos++; // skip the second digit -- it's already used
353                 }
354         }
355
356         color.setRed(components[0]);
357         color.setGreen(components[1]);
358         color.setBlue(components[2]);
359         color.setAlpha(components[3]);
360
361         return true;
362 }
363
364 const static std::unordered_map<std::string, u32> s_named_colors = {
365         {"aliceblue",            0xf0f8ff},
366         {"antiquewhite",         0xfaebd7},
367         {"aqua",                 0x00ffff},
368         {"aquamarine",           0x7fffd4},
369         {"azure",                0xf0ffff},
370         {"beige",                0xf5f5dc},
371         {"bisque",               0xffe4c4},
372         {"black",                00000000},
373         {"blanchedalmond",       0xffebcd},
374         {"blue",                 0x0000ff},
375         {"blueviolet",           0x8a2be2},
376         {"brown",                0xa52a2a},
377         {"burlywood",            0xdeb887},
378         {"cadetblue",            0x5f9ea0},
379         {"chartreuse",           0x7fff00},
380         {"chocolate",            0xd2691e},
381         {"coral",                0xff7f50},
382         {"cornflowerblue",       0x6495ed},
383         {"cornsilk",             0xfff8dc},
384         {"crimson",              0xdc143c},
385         {"cyan",                 0x00ffff},
386         {"darkblue",             0x00008b},
387         {"darkcyan",             0x008b8b},
388         {"darkgoldenrod",        0xb8860b},
389         {"darkgray",             0xa9a9a9},
390         {"darkgreen",            0x006400},
391         {"darkgrey",             0xa9a9a9},
392         {"darkkhaki",            0xbdb76b},
393         {"darkmagenta",          0x8b008b},
394         {"darkolivegreen",       0x556b2f},
395         {"darkorange",           0xff8c00},
396         {"darkorchid",           0x9932cc},
397         {"darkred",              0x8b0000},
398         {"darksalmon",           0xe9967a},
399         {"darkseagreen",         0x8fbc8f},
400         {"darkslateblue",        0x483d8b},
401         {"darkslategray",        0x2f4f4f},
402         {"darkslategrey",        0x2f4f4f},
403         {"darkturquoise",        0x00ced1},
404         {"darkviolet",           0x9400d3},
405         {"deeppink",             0xff1493},
406         {"deepskyblue",          0x00bfff},
407         {"dimgray",              0x696969},
408         {"dimgrey",              0x696969},
409         {"dodgerblue",           0x1e90ff},
410         {"firebrick",            0xb22222},
411         {"floralwhite",          0xfffaf0},
412         {"forestgreen",          0x228b22},
413         {"fuchsia",              0xff00ff},
414         {"gainsboro",            0xdcdcdc},
415         {"ghostwhite",           0xf8f8ff},
416         {"gold",                 0xffd700},
417         {"goldenrod",            0xdaa520},
418         {"gray",                 0x808080},
419         {"green",                0x008000},
420         {"greenyellow",          0xadff2f},
421         {"grey",                 0x808080},
422         {"honeydew",             0xf0fff0},
423         {"hotpink",              0xff69b4},
424         {"indianred",            0xcd5c5c},
425         {"indigo",               0x4b0082},
426         {"ivory",                0xfffff0},
427         {"khaki",                0xf0e68c},
428         {"lavender",             0xe6e6fa},
429         {"lavenderblush",        0xfff0f5},
430         {"lawngreen",            0x7cfc00},
431         {"lemonchiffon",         0xfffacd},
432         {"lightblue",            0xadd8e6},
433         {"lightcoral",           0xf08080},
434         {"lightcyan",            0xe0ffff},
435         {"lightgoldenrodyellow", 0xfafad2},
436         {"lightgray",            0xd3d3d3},
437         {"lightgreen",           0x90ee90},
438         {"lightgrey",            0xd3d3d3},
439         {"lightpink",            0xffb6c1},
440         {"lightsalmon",          0xffa07a},
441         {"lightseagreen",        0x20b2aa},
442         {"lightskyblue",         0x87cefa},
443         {"lightslategray",       0x778899},
444         {"lightslategrey",       0x778899},
445         {"lightsteelblue",       0xb0c4de},
446         {"lightyellow",          0xffffe0},
447         {"lime",                 0x00ff00},
448         {"limegreen",            0x32cd32},
449         {"linen",                0xfaf0e6},
450         {"magenta",              0xff00ff},
451         {"maroon",               0x800000},
452         {"mediumaquamarine",     0x66cdaa},
453         {"mediumblue",           0x0000cd},
454         {"mediumorchid",         0xba55d3},
455         {"mediumpurple",         0x9370db},
456         {"mediumseagreen",       0x3cb371},
457         {"mediumslateblue",      0x7b68ee},
458         {"mediumspringgreen",    0x00fa9a},
459         {"mediumturquoise",      0x48d1cc},
460         {"mediumvioletred",      0xc71585},
461         {"midnightblue",         0x191970},
462         {"mintcream",            0xf5fffa},
463         {"mistyrose",            0xffe4e1},
464         {"moccasin",             0xffe4b5},
465         {"navajowhite",          0xffdead},
466         {"navy",                 0x000080},
467         {"oldlace",              0xfdf5e6},
468         {"olive",                0x808000},
469         {"olivedrab",            0x6b8e23},
470         {"orange",               0xffa500},
471         {"orangered",            0xff4500},
472         {"orchid",               0xda70d6},
473         {"palegoldenrod",        0xeee8aa},
474         {"palegreen",            0x98fb98},
475         {"paleturquoise",        0xafeeee},
476         {"palevioletred",        0xdb7093},
477         {"papayawhip",           0xffefd5},
478         {"peachpuff",            0xffdab9},
479         {"peru",                 0xcd853f},
480         {"pink",                 0xffc0cb},
481         {"plum",                 0xdda0dd},
482         {"powderblue",           0xb0e0e6},
483         {"purple",               0x800080},
484         {"red",                  0xff0000},
485         {"rosybrown",            0xbc8f8f},
486         {"royalblue",            0x4169e1},
487         {"saddlebrown",          0x8b4513},
488         {"salmon",               0xfa8072},
489         {"sandybrown",           0xf4a460},
490         {"seagreen",             0x2e8b57},
491         {"seashell",             0xfff5ee},
492         {"sienna",               0xa0522d},
493         {"silver",               0xc0c0c0},
494         {"skyblue",              0x87ceeb},
495         {"slateblue",            0x6a5acd},
496         {"slategray",            0x708090},
497         {"slategrey",            0x708090},
498         {"snow",                 0xfffafa},
499         {"springgreen",          0x00ff7f},
500         {"steelblue",            0x4682b4},
501         {"tan",                  0xd2b48c},
502         {"teal",                 0x008080},
503         {"thistle",              0xd8bfd8},
504         {"tomato",               0xff6347},
505         {"turquoise",            0x40e0d0},
506         {"violet",               0xee82ee},
507         {"wheat",                0xf5deb3},
508         {"white",                0xffffff},
509         {"whitesmoke",           0xf5f5f5},
510         {"yellow",               0xffff00},
511         {"yellowgreen",          0x9acd32}
512 };
513
514 static bool parseNamedColorString(const std::string &value, video::SColor &color)
515 {
516         std::string color_name;
517         std::string alpha_string;
518
519         /* If the string has a # in it, assume this is the start of a specified
520          * alpha value (if it isn't the string is invalid and the error will be
521          * caught later on, either because the color name won't be found or the
522          * alpha value will fail conversion)
523          */
524         size_t alpha_pos = value.find('#');
525         if (alpha_pos != std::string::npos) {
526                 color_name = value.substr(0, alpha_pos);
527                 alpha_string = value.substr(alpha_pos + 1);
528         } else {
529                 color_name = value;
530         }
531
532         color_name = lowercase(color_name);
533
534         auto it = s_named_colors.find(color_name);
535         if (it == s_named_colors.end())
536                 return false;
537
538         u32 color_temp = it->second;
539
540         /* An empty string for alpha is ok (none of the color table entries
541          * have an alpha value either). Color strings without an alpha specified
542          * are interpreted as fully opaque
543          */
544         if (!alpha_string.empty()) {
545                 if (alpha_string.size() == 1) {
546                         u8 d;
547                         if (!hex_digit_decode(alpha_string[0], d))
548                                 return false;
549
550                         color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
551                 } else if (alpha_string.size() == 2) {
552                         u8 d1, d2;
553                         if (!hex_digit_decode(alpha_string[0], d1)
554                                         || !hex_digit_decode(alpha_string[1], d2))
555                                 return false;
556
557                         color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
558                 } else {
559                         return false;
560                 }
561         } else {
562                 color_temp |= 0xff << 24; // Fully opaque
563         }
564
565         color = video::SColor(color_temp);
566
567         return true;
568 }
569
570 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
571                 unsigned char default_alpha)
572 {
573         bool success;
574
575         if (value[0] == '#')
576                 success = parseHexColorString(value, color, default_alpha);
577         else
578                 success = parseNamedColorString(value, color);
579
580         if (!success && !quiet)
581                 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
582
583         return success;
584 }
585
586 void str_replace(std::string &str, char from, char to)
587 {
588         std::replace(str.begin(), str.end(), from, to);
589 }
590
591 /* Translated strings have the following format:
592  * \x1bT marks the beginning of a translated string
593  * \x1bE marks its end
594  *
595  * \x1bF marks the beginning of an argument, and \x1bE its end.
596  *
597  * Arguments are *not* translated, as they may contain escape codes.
598  * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
599  *
600  * This representation is chosen so that clients ignoring escape codes will
601  * see untranslated strings.
602  *
603  * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
604  * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
605  * To translate this string, we extract what is inside \x1bT/\x1bE tags.
606  * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
607  * translating it as well.
608  * We get the argument "White", translated, and create a template string with "@1" instead of it.
609  * We finally get the template "@1 Wool" that was used in the beginning, which we translate
610  * before filling it again.
611  */
612
613 void translate_all(const std::wstring &s, size_t &i,
614                 Translations *translations, std::wstring &res);
615
616 void translate_string(const std::wstring &s, Translations *translations,
617                 const std::wstring &textdomain, size_t &i, std::wstring &res)
618 {
619         std::wostringstream output;
620         std::vector<std::wstring> args;
621         int arg_number = 1;
622         while (i < s.length()) {
623                 // Not an escape sequence: just add the character.
624                 if (s[i] != '\x1b') {
625                         output.put(s[i]);
626                         // The character is a literal '@'; add it twice
627                         // so that it is not mistaken for an argument.
628                         if (s[i] == L'@')
629                                 output.put(L'@');
630                         ++i;
631                         continue;
632                 }
633
634                 // We have an escape sequence: locate it and its data
635                 // It is either a single character, or it begins with '('
636                 // and extends up to the following ')', with '\' as an escape character.
637                 ++i;
638                 size_t start_index = i;
639                 size_t length;
640                 if (i == s.length()) {
641                         length = 0;
642                 } else if (s[i] == L'(') {
643                         ++i;
644                         ++start_index;
645                         while (i < s.length() && s[i] != L')') {
646                                 if (s[i] == L'\\')
647                                         ++i;
648                                 ++i;
649                         }
650                         length = i - start_index;
651                         ++i;
652                         if (i > s.length())
653                                 i = s.length();
654                 } else {
655                         ++i;
656                         length = 1;
657                 }
658                 std::wstring escape_sequence(s, start_index, length);
659
660                 // The escape sequence is now reconstructed.
661                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
662                 if (parts[0] == L"E") {
663                         // "End of translation" escape sequence. We are done locating the string to translate.
664                         break;
665                 } else if (parts[0] == L"F") {
666                         // "Start of argument" escape sequence.
667                         // Recursively translate the argument, and add it to the argument list.
668                         // Add an "@n" instead of the argument to the template to translate.
669                         if (arg_number >= 10) {
670                                 errorstream << "Ignoring too many arguments to translation" << std::endl;
671                                 std::wstring arg;
672                                 translate_all(s, i, translations, arg);
673                                 args.push_back(arg);
674                                 continue;
675                         }
676                         output.put(L'@');
677                         output << arg_number;
678                         ++arg_number;
679                         std::wstring arg;
680                         translate_all(s, i, translations, arg);
681                         args.push_back(arg);
682                 } else {
683                         // This is an escape sequence *inside* the template string to translate itself.
684                         // This should not happen, show an error message.
685                         errorstream << "Ignoring escape sequence '"
686                                 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
687                 }
688         }
689
690         std::wstring toutput;
691         // Translate the template.
692         if (translations != nullptr)
693                 toutput = translations->getTranslation(
694                                 textdomain, output.str());
695         else
696                 toutput = output.str();
697
698         // Put back the arguments in the translated template.
699         std::wostringstream result;
700         size_t j = 0;
701         while (j < toutput.length()) {
702                 // Normal character, add it to output and continue.
703                 if (toutput[j] != L'@' || j == toutput.length() - 1) {
704                         result.put(toutput[j]);
705                         ++j;
706                         continue;
707                 }
708
709                 ++j;
710                 // Literal escape for '@'.
711                 if (toutput[j] == L'@') {
712                         result.put(L'@');
713                         ++j;
714                         continue;
715                 }
716
717                 // Here we have an argument; get its index and add the translated argument to the output.
718                 int arg_index = toutput[j] - L'1';
719                 ++j;
720                 if (0 <= arg_index && (size_t)arg_index < args.size()) {
721                         result << args[arg_index];
722                 } else {
723                         // This is not allowed: show an error message
724                         errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
725                 }
726         }
727         res = result.str();
728 }
729
730 void translate_all(const std::wstring &s, size_t &i,
731                 Translations *translations, std::wstring &res)
732 {
733         std::wostringstream output;
734         while (i < s.length()) {
735                 // Not an escape sequence: just add the character.
736                 if (s[i] != '\x1b') {
737                         output.put(s[i]);
738                         ++i;
739                         continue;
740                 }
741
742                 // We have an escape sequence: locate it and its data
743                 // It is either a single character, or it begins with '('
744                 // and extends up to the following ')', with '\' as an escape character.
745                 size_t escape_start = i;
746                 ++i;
747                 size_t start_index = i;
748                 size_t length;
749                 if (i == s.length()) {
750                         length = 0;
751                 } else if (s[i] == L'(') {
752                         ++i;
753                         ++start_index;
754                         while (i < s.length() && s[i] != L')') {
755                                 if (s[i] == L'\\') {
756                                         ++i;
757                                 }
758                                 ++i;
759                         }
760                         length = i - start_index;
761                         ++i;
762                         if (i > s.length())
763                                 i = s.length();
764                 } else {
765                         ++i;
766                         length = 1;
767                 }
768                 std::wstring escape_sequence(s, start_index, length);
769
770                 // The escape sequence is now reconstructed.
771                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
772                 if (parts[0] == L"E") {
773                         // "End of argument" escape sequence. Exit.
774                         break;
775                 } else if (parts[0] == L"T") {
776                         // Beginning of translated string.
777                         std::wstring textdomain;
778                         if (parts.size() > 1)
779                                 textdomain = parts[1];
780                         std::wstring translated;
781                         translate_string(s, translations, textdomain, i, translated);
782                         output << translated;
783                 } else {
784                         // Another escape sequence, such as colors. Preserve it.
785                         output << std::wstring(s, escape_start, i - escape_start);
786                 }
787         }
788
789         res = output.str();
790 }
791
792 // Translate string server side
793 std::wstring translate_string(const std::wstring &s, Translations *translations)
794 {
795         size_t i = 0;
796         std::wstring res;
797         translate_all(s, i, translations, res);
798         return res;
799 }
800
801 // Translate string client side
802 std::wstring translate_string(const std::wstring &s)
803 {
804 #ifdef SERVER
805         return translate_string(s, nullptr);
806 #else
807         return translate_string(s, g_client_translations);
808 #endif
809 }
810
811 static const std::array<std::wstring, 22> disallowed_dir_names = {
812         // Problematic filenames from here:
813         // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
814         L"CON",
815         L"PRN",
816         L"AUX",
817         L"NUL",
818         L"COM1",
819         L"COM2",
820         L"COM3",
821         L"COM4",
822         L"COM5",
823         L"COM6",
824         L"COM7",
825         L"COM8",
826         L"COM9",
827         L"LPT1",
828         L"LPT2",
829         L"LPT3",
830         L"LPT4",
831         L"LPT5",
832         L"LPT6",
833         L"LPT7",
834         L"LPT8",
835         L"LPT9",
836 };
837
838 /**
839  * List of characters that are blacklisted from created directories
840  */
841 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
842
843 /**
844  * Sanitize the name of a new directory. This consists of two stages:
845  * 1. Check for 'reserved filenames' that can't be used on some filesystems
846  *      and add a prefix to them
847  * 2. Remove 'unsafe' characters from the name by replacing them with '_'
848  */
849 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
850 {
851         std::wstring safe_name = utf8_to_wide(str);
852
853         for (std::wstring disallowed_name : disallowed_dir_names) {
854                 if (str_equal(safe_name, disallowed_name, true)) {
855                         safe_name = utf8_to_wide(optional_prefix) + safe_name;
856                         break;
857                 }
858         }
859
860         for (unsigned long i = 0; i < safe_name.length(); i++) {
861                 bool is_valid = true;
862
863                 // Unlikely, but control characters should always be blacklisted
864                 if (safe_name[i] < 32) {
865                         is_valid = false;
866                 } else if (safe_name[i] < 128) {
867                         is_valid = disallowed_path_chars.find_first_of(safe_name[i])
868                                         == std::wstring::npos;
869                 }
870
871                 if (!is_valid)
872                         safe_name[i] = '_';
873         }
874
875         return wide_to_utf8(safe_name);
876 }