3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "translation.h"
33 #include <unordered_map>
38 #define _WIN32_WINNT 0x0501
43 #include <sys/param.h>
44 #if __NetBSD_Version__ <= 999001500
45 #define BSD_ICONV_USED
47 #elif defined(_ICONV_H_) && (defined(__FreeBSD__) || defined(__OpenBSD__) || \
48 defined(__DragonFly__))
49 #define BSD_ICONV_USED
54 static bool convert(const char *to, const char *from, char *outbuf,
55 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
57 iconv_t cd = iconv_open(to, from);
60 const char *inbuf_ptr = inbuf;
62 char *inbuf_ptr = inbuf;
64 char *outbuf_ptr = outbuf;
66 size_t *inbuf_left_ptr = &inbuf_size;
68 const size_t old_outbuf_size = *outbuf_size;
69 size_t old_size = inbuf_size;
70 while (inbuf_size > 0) {
71 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
72 if (inbuf_size == old_size) {
76 old_size = inbuf_size;
80 *outbuf_size = old_outbuf_size - *outbuf_size;
85 // On Android iconv disagrees how big a wchar_t is for whatever reason
86 const char *DEFAULT_ENCODING = "UTF-32LE";
87 #elif defined(__NetBSD__)
88 // NetBSD does not allow "WCHAR_T" as a charset input to iconv.
89 #include <sys/endian.h>
90 #if BYTE_ORDER == BIG_ENDIAN
91 const char *DEFAULT_ENCODING = "UTF-32BE";
93 const char *DEFAULT_ENCODING = "UTF-32LE";
96 const char *DEFAULT_ENCODING = "WCHAR_T";
99 std::wstring utf8_to_wide(const std::string &input)
101 const size_t inbuf_size = input.length();
102 // maximum possible size, every character is sizeof(wchar_t) bytes
103 size_t outbuf_size = input.length() * sizeof(wchar_t);
105 char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
106 memcpy(inbuf, input.c_str(), inbuf_size);
108 out.resize(outbuf_size / sizeof(wchar_t));
110 #if defined(__ANDROID__) || defined(__NetBSD__)
111 SANITY_CHECK(sizeof(wchar_t) == 4);
114 char *outbuf = reinterpret_cast<char*>(&out[0]);
115 if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
116 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
117 << " into wstring" << std::endl;
119 return L"<invalid UTF-8 string>";
123 out.resize(outbuf_size / sizeof(wchar_t));
127 std::string wide_to_utf8(const std::wstring &input)
129 const size_t inbuf_size = input.length() * sizeof(wchar_t);
130 // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
131 size_t outbuf_size = input.length() * 4;
133 char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
134 memcpy(inbuf, input.c_str(), inbuf_size);
136 out.resize(outbuf_size);
138 if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
139 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
140 << " into UTF-8 string" << std::endl;
142 return "<invalid wide string>";
146 out.resize(outbuf_size);
152 std::wstring utf8_to_wide(const std::string &input)
154 size_t outbuf_size = input.size() + 1;
155 wchar_t *outbuf = new wchar_t[outbuf_size];
156 memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
157 MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
158 outbuf, outbuf_size);
159 std::wstring out(outbuf);
164 std::string wide_to_utf8(const std::wstring &input)
166 size_t outbuf_size = (input.size() + 1) * 6;
167 char *outbuf = new char[outbuf_size];
168 memset(outbuf, 0, outbuf_size);
169 WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
170 outbuf, outbuf_size, NULL, NULL);
171 std::string out(outbuf);
178 wchar_t *utf8_to_wide_c(const char *str)
180 std::wstring ret = utf8_to_wide(std::string(str));
181 size_t len = ret.length();
182 wchar_t *ret_c = new wchar_t[len + 1];
183 memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
188 std::string urlencode(const std::string &str)
190 // Encodes non-unreserved URI characters by a percent sign
191 // followed by two hex digits. See RFC 3986, section 2.3.
192 static const char url_hex_chars[] = "0123456789ABCDEF";
193 std::ostringstream oss(std::ios::binary);
194 for (unsigned char c : str) {
195 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
199 << url_hex_chars[(c & 0xf0) >> 4]
200 << url_hex_chars[c & 0x0f];
206 std::string urldecode(const std::string &str)
208 // Inverse of urlencode
209 std::ostringstream oss(std::ios::binary);
210 for (u32 i = 0; i < str.size(); i++) {
211 unsigned char highvalue, lowvalue;
213 hex_digit_decode(str[i+1], highvalue) &&
214 hex_digit_decode(str[i+2], lowvalue)) {
215 oss << (char) ((highvalue << 4) | lowvalue);
224 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
230 char *strpos = nullptr;
232 while ((flagstr = strtok_r(s, ",", &strpos))) {
235 while (*flagstr == ' ' || *flagstr == '\t')
239 if (!strncasecmp(flagstr, "no", 2)) {
244 for (int i = 0; flagdesc[i].name; i++) {
245 if (!strcasecmp(flagstr, flagdesc[i].name)) {
246 mask |= flagdesc[i].flag;
248 result |= flagdesc[i].flag;
260 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
264 for (int i = 0; flagdesc[i].name; i++) {
265 if (flagmask & flagdesc[i].flag) {
266 if (!(flags & flagdesc[i].flag))
269 result += flagdesc[i].name;
274 size_t len = result.length();
276 result.erase(len - 2, 2);
281 size_t mystrlcpy(char *dst, const char *src, size_t size)
283 size_t srclen = strlen(src) + 1;
284 size_t copylen = MYMIN(srclen, size);
287 memcpy(dst, src, copylen);
288 dst[copylen - 1] = '\0';
294 char *mystrtok_r(char *s, const char *sep, char **lasts)
301 while (*s && strchr(sep, *s))
309 if (strchr(sep, *t)) {
320 u64 read_seed(const char *str)
325 if (str[0] == '0' && str[1] == 'x')
326 num = strtoull(str, &endptr, 16);
328 num = strtoull(str, &endptr, 10);
331 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
336 static bool parseHexColorString(const std::string &value, video::SColor &color,
337 unsigned char default_alpha)
339 u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
341 size_t len = value.size();
344 if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
346 else if (len == 5 || len == 4) // #RGBA or #RGB
351 for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
354 if (!hex_digit_decode(value[pos], d))
357 components[cc] = (d & 0xf) << 4 | (d & 0xf);
360 if (!hex_digit_decode(value[pos], d1) ||
361 !hex_digit_decode(value[pos+1], d2))
364 components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
365 pos++; // skip the second digit -- it's already used
369 color.setRed(components[0]);
370 color.setGreen(components[1]);
371 color.setBlue(components[2]);
372 color.setAlpha(components[3]);
377 const static std::unordered_map<std::string, u32> s_named_colors = {
378 {"aliceblue", 0xf0f8ff},
379 {"antiquewhite", 0xfaebd7},
381 {"aquamarine", 0x7fffd4},
384 {"bisque", 0xffe4c4},
386 {"blanchedalmond", 0xffebcd},
388 {"blueviolet", 0x8a2be2},
390 {"burlywood", 0xdeb887},
391 {"cadetblue", 0x5f9ea0},
392 {"chartreuse", 0x7fff00},
393 {"chocolate", 0xd2691e},
395 {"cornflowerblue", 0x6495ed},
396 {"cornsilk", 0xfff8dc},
397 {"crimson", 0xdc143c},
399 {"darkblue", 0x00008b},
400 {"darkcyan", 0x008b8b},
401 {"darkgoldenrod", 0xb8860b},
402 {"darkgray", 0xa9a9a9},
403 {"darkgreen", 0x006400},
404 {"darkgrey", 0xa9a9a9},
405 {"darkkhaki", 0xbdb76b},
406 {"darkmagenta", 0x8b008b},
407 {"darkolivegreen", 0x556b2f},
408 {"darkorange", 0xff8c00},
409 {"darkorchid", 0x9932cc},
410 {"darkred", 0x8b0000},
411 {"darksalmon", 0xe9967a},
412 {"darkseagreen", 0x8fbc8f},
413 {"darkslateblue", 0x483d8b},
414 {"darkslategray", 0x2f4f4f},
415 {"darkslategrey", 0x2f4f4f},
416 {"darkturquoise", 0x00ced1},
417 {"darkviolet", 0x9400d3},
418 {"deeppink", 0xff1493},
419 {"deepskyblue", 0x00bfff},
420 {"dimgray", 0x696969},
421 {"dimgrey", 0x696969},
422 {"dodgerblue", 0x1e90ff},
423 {"firebrick", 0xb22222},
424 {"floralwhite", 0xfffaf0},
425 {"forestgreen", 0x228b22},
426 {"fuchsia", 0xff00ff},
427 {"gainsboro", 0xdcdcdc},
428 {"ghostwhite", 0xf8f8ff},
430 {"goldenrod", 0xdaa520},
433 {"greenyellow", 0xadff2f},
435 {"honeydew", 0xf0fff0},
436 {"hotpink", 0xff69b4},
437 {"indianred", 0xcd5c5c},
438 {"indigo", 0x4b0082},
441 {"lavender", 0xe6e6fa},
442 {"lavenderblush", 0xfff0f5},
443 {"lawngreen", 0x7cfc00},
444 {"lemonchiffon", 0xfffacd},
445 {"lightblue", 0xadd8e6},
446 {"lightcoral", 0xf08080},
447 {"lightcyan", 0xe0ffff},
448 {"lightgoldenrodyellow", 0xfafad2},
449 {"lightgray", 0xd3d3d3},
450 {"lightgreen", 0x90ee90},
451 {"lightgrey", 0xd3d3d3},
452 {"lightpink", 0xffb6c1},
453 {"lightsalmon", 0xffa07a},
454 {"lightseagreen", 0x20b2aa},
455 {"lightskyblue", 0x87cefa},
456 {"lightslategray", 0x778899},
457 {"lightslategrey", 0x778899},
458 {"lightsteelblue", 0xb0c4de},
459 {"lightyellow", 0xffffe0},
461 {"limegreen", 0x32cd32},
463 {"magenta", 0xff00ff},
464 {"maroon", 0x800000},
465 {"mediumaquamarine", 0x66cdaa},
466 {"mediumblue", 0x0000cd},
467 {"mediumorchid", 0xba55d3},
468 {"mediumpurple", 0x9370db},
469 {"mediumseagreen", 0x3cb371},
470 {"mediumslateblue", 0x7b68ee},
471 {"mediumspringgreen", 0x00fa9a},
472 {"mediumturquoise", 0x48d1cc},
473 {"mediumvioletred", 0xc71585},
474 {"midnightblue", 0x191970},
475 {"mintcream", 0xf5fffa},
476 {"mistyrose", 0xffe4e1},
477 {"moccasin", 0xffe4b5},
478 {"navajowhite", 0xffdead},
480 {"oldlace", 0xfdf5e6},
482 {"olivedrab", 0x6b8e23},
483 {"orange", 0xffa500},
484 {"orangered", 0xff4500},
485 {"orchid", 0xda70d6},
486 {"palegoldenrod", 0xeee8aa},
487 {"palegreen", 0x98fb98},
488 {"paleturquoise", 0xafeeee},
489 {"palevioletred", 0xdb7093},
490 {"papayawhip", 0xffefd5},
491 {"peachpuff", 0xffdab9},
495 {"powderblue", 0xb0e0e6},
496 {"purple", 0x800080},
497 {"rebeccapurple", 0x663399},
499 {"rosybrown", 0xbc8f8f},
500 {"royalblue", 0x4169e1},
501 {"saddlebrown", 0x8b4513},
502 {"salmon", 0xfa8072},
503 {"sandybrown", 0xf4a460},
504 {"seagreen", 0x2e8b57},
505 {"seashell", 0xfff5ee},
506 {"sienna", 0xa0522d},
507 {"silver", 0xc0c0c0},
508 {"skyblue", 0x87ceeb},
509 {"slateblue", 0x6a5acd},
510 {"slategray", 0x708090},
511 {"slategrey", 0x708090},
513 {"springgreen", 0x00ff7f},
514 {"steelblue", 0x4682b4},
517 {"thistle", 0xd8bfd8},
518 {"tomato", 0xff6347},
519 {"turquoise", 0x40e0d0},
520 {"violet", 0xee82ee},
523 {"whitesmoke", 0xf5f5f5},
524 {"yellow", 0xffff00},
525 {"yellowgreen", 0x9acd32}
528 static bool parseNamedColorString(const std::string &value, video::SColor &color)
530 std::string color_name;
531 std::string alpha_string;
533 /* If the string has a # in it, assume this is the start of a specified
534 * alpha value (if it isn't the string is invalid and the error will be
535 * caught later on, either because the color name won't be found or the
536 * alpha value will fail conversion)
538 size_t alpha_pos = value.find('#');
539 if (alpha_pos != std::string::npos) {
540 color_name = value.substr(0, alpha_pos);
541 alpha_string = value.substr(alpha_pos + 1);
546 color_name = lowercase(color_name);
548 auto it = s_named_colors.find(color_name);
549 if (it == s_named_colors.end())
552 u32 color_temp = it->second;
554 /* An empty string for alpha is ok (none of the color table entries
555 * have an alpha value either). Color strings without an alpha specified
556 * are interpreted as fully opaque
558 if (!alpha_string.empty()) {
559 if (alpha_string.size() == 1) {
561 if (!hex_digit_decode(alpha_string[0], d))
564 color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
565 } else if (alpha_string.size() == 2) {
567 if (!hex_digit_decode(alpha_string[0], d1)
568 || !hex_digit_decode(alpha_string[1], d2))
571 color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
576 color_temp |= 0xff << 24; // Fully opaque
579 color = video::SColor(color_temp);
584 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
585 unsigned char default_alpha)
590 success = parseHexColorString(value, color, default_alpha);
592 success = parseNamedColorString(value, color);
594 if (!success && !quiet)
595 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
600 void str_replace(std::string &str, char from, char to)
602 std::replace(str.begin(), str.end(), from, to);
605 /* Translated strings have the following format:
606 * \x1bT marks the beginning of a translated string
607 * \x1bE marks its end
609 * \x1bF marks the beginning of an argument, and \x1bE its end.
611 * Arguments are *not* translated, as they may contain escape codes.
612 * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
614 * This representation is chosen so that clients ignoring escape codes will
615 * see untranslated strings.
617 * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
618 * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
619 * To translate this string, we extract what is inside \x1bT/\x1bE tags.
620 * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
621 * translating it as well.
622 * We get the argument "White", translated, and create a template string with "@1" instead of it.
623 * We finally get the template "@1 Wool" that was used in the beginning, which we translate
624 * before filling it again.
627 void translate_all(const std::wstring &s, size_t &i,
628 Translations *translations, std::wstring &res);
630 void translate_string(const std::wstring &s, Translations *translations,
631 const std::wstring &textdomain, size_t &i, std::wstring &res)
633 std::wostringstream output;
634 std::vector<std::wstring> args;
636 while (i < s.length()) {
637 // Not an escape sequence: just add the character.
638 if (s[i] != '\x1b') {
640 // The character is a literal '@'; add it twice
641 // so that it is not mistaken for an argument.
648 // We have an escape sequence: locate it and its data
649 // It is either a single character, or it begins with '('
650 // and extends up to the following ')', with '\' as an escape character.
652 size_t start_index = i;
654 if (i == s.length()) {
656 } else if (s[i] == L'(') {
659 while (i < s.length() && s[i] != L')') {
664 length = i - start_index;
672 std::wstring escape_sequence(s, start_index, length);
674 // The escape sequence is now reconstructed.
675 std::vector<std::wstring> parts = split(escape_sequence, L'@');
676 if (parts[0] == L"E") {
677 // "End of translation" escape sequence. We are done locating the string to translate.
679 } else if (parts[0] == L"F") {
680 // "Start of argument" escape sequence.
681 // Recursively translate the argument, and add it to the argument list.
682 // Add an "@n" instead of the argument to the template to translate.
683 if (arg_number >= 10) {
684 errorstream << "Ignoring too many arguments to translation" << std::endl;
686 translate_all(s, i, translations, arg);
691 output << arg_number;
694 translate_all(s, i, translations, arg);
697 // This is an escape sequence *inside* the template string to translate itself.
698 // This should not happen, show an error message.
699 errorstream << "Ignoring escape sequence '"
700 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
704 std::wstring toutput;
705 // Translate the template.
706 if (translations != nullptr)
707 toutput = translations->getTranslation(
708 textdomain, output.str());
710 toutput = output.str();
712 // Put back the arguments in the translated template.
713 std::wostringstream result;
715 while (j < toutput.length()) {
716 // Normal character, add it to output and continue.
717 if (toutput[j] != L'@' || j == toutput.length() - 1) {
718 result.put(toutput[j]);
724 // Literal escape for '@'.
725 if (toutput[j] == L'@') {
731 // Here we have an argument; get its index and add the translated argument to the output.
732 int arg_index = toutput[j] - L'1';
734 if (0 <= arg_index && (size_t)arg_index < args.size()) {
735 result << args[arg_index];
737 // This is not allowed: show an error message
738 errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
744 void translate_all(const std::wstring &s, size_t &i,
745 Translations *translations, std::wstring &res)
747 std::wostringstream output;
748 while (i < s.length()) {
749 // Not an escape sequence: just add the character.
750 if (s[i] != '\x1b') {
756 // We have an escape sequence: locate it and its data
757 // It is either a single character, or it begins with '('
758 // and extends up to the following ')', with '\' as an escape character.
759 size_t escape_start = i;
761 size_t start_index = i;
763 if (i == s.length()) {
765 } else if (s[i] == L'(') {
768 while (i < s.length() && s[i] != L')') {
774 length = i - start_index;
782 std::wstring escape_sequence(s, start_index, length);
784 // The escape sequence is now reconstructed.
785 std::vector<std::wstring> parts = split(escape_sequence, L'@');
786 if (parts[0] == L"E") {
787 // "End of argument" escape sequence. Exit.
789 } else if (parts[0] == L"T") {
790 // Beginning of translated string.
791 std::wstring textdomain;
792 if (parts.size() > 1)
793 textdomain = parts[1];
794 std::wstring translated;
795 translate_string(s, translations, textdomain, i, translated);
796 output << translated;
798 // Another escape sequence, such as colors. Preserve it.
799 output << std::wstring(s, escape_start, i - escape_start);
806 // Translate string server side
807 std::wstring translate_string(const std::wstring &s, Translations *translations)
811 translate_all(s, i, translations, res);
815 // Translate string client side
816 std::wstring translate_string(const std::wstring &s)
819 return translate_string(s, nullptr);
821 return translate_string(s, g_client_translations);
825 static const std::array<std::wstring, 30> disallowed_dir_names = {
826 // Problematic filenames from here:
827 // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
828 // Plus undocumented values from here:
829 // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
863 * List of characters that are blacklisted from created directories
865 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
868 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
870 std::wstring safe_name = utf8_to_wide(str);
872 for (std::wstring disallowed_name : disallowed_dir_names) {
873 if (str_equal(safe_name, disallowed_name, true)) {
874 safe_name = utf8_to_wide(optional_prefix) + safe_name;
879 // Replace leading and trailing spaces with underscores.
880 size_t start = safe_name.find_first_not_of(L' ');
881 size_t end = safe_name.find_last_not_of(L' ');
882 if (start == std::wstring::npos || end == std::wstring::npos)
883 start = end = safe_name.size();
884 for (size_t i = 0; i < start; i++)
886 for (size_t i = end + 1; i < safe_name.size(); i++)
889 // Replace other disallowed characters with underscores
890 for (size_t i = 0; i < safe_name.length(); i++) {
891 bool is_valid = true;
893 // Unlikely, but control characters should always be blacklisted
894 if (safe_name[i] < 32) {
896 } else if (safe_name[i] < 128) {
897 is_valid = disallowed_path_chars.find_first_of(safe_name[i])
898 == std::wstring::npos;
905 return wide_to_utf8(safe_name);
909 void safe_print_string(std::ostream &os, const std::string &str)
911 std::ostream::fmtflags flags = os.flags();
913 for (const char c : str) {
914 if (IS_ASCII_PRINTABLE_CHAR(c) || IS_UTF8_MULTB_START(c) ||
915 IS_UTF8_MULTB_INNER(c) || c == '\n' || c == '\t') {
918 os << '<' << std::setw(2) << (int)c << '>';