src/util/string.cpp

   1 /*
   2 Minetest
   3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
   4
   5 This program is free software; you can redistribute it and/or modify
   6 it under the terms of the GNU Lesser General Public License as published by
   7 the Free Software Foundation; either version 2.1 of the License, or
   8 (at your option) any later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU Lesser General Public License for more details.
  14
  15 You should have received a copy of the GNU Lesser General Public License along
  16 with this program; if not, write to the Free Software Foundation, Inc.,
  17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18 */
  19
  20 #include "string.h"
  21 #include "pointer.h"
  22 #include "numeric.h"
  23 #include "log.h"
  24
  25 #include "hex.h"
  26 #include "porting.h"
  27 #include "translation.h"
  28
  29 #include <algorithm>
  30 #include <array>
  31 #include <sstream>
  32 #include <iomanip>
  33 #include <unordered_map>
  34
  35 #ifndef _WIN32
  36         #include <iconv.h>
  37 #else
  38         #define _WIN32_WINNT 0x0501
  39         #include <windows.h>
  40 #endif
  41
  42 #ifndef _WIN32
  43
  44 static bool convert(const char *to, const char *from, char *outbuf,
  45                 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
  46 {
  47         iconv_t cd = iconv_open(to, from);
  48
  49         char *inbuf_ptr = inbuf;
  50         char *outbuf_ptr = outbuf;
  51
  52         size_t *inbuf_left_ptr = &inbuf_size;
  53
  54         const size_t old_outbuf_size = *outbuf_size;
  55         size_t old_size = inbuf_size;
  56         while (inbuf_size > 0) {
  57                 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
  58                 if (inbuf_size == old_size) {
  59                         iconv_close(cd);
  60                         return false;
  61                 }
  62                 old_size = inbuf_size;
  63         }
  64
  65         iconv_close(cd);
  66         *outbuf_size = old_outbuf_size - *outbuf_size;
  67         return true;
  68 }
  69
  70 #ifdef __ANDROID__
  71 // On Android iconv disagrees how big a wchar_t is for whatever reason
  72 const char *DEFAULT_ENCODING = "UTF-32LE";
  73 #elif defined(__NetBSD__) || defined(__OpenBSD__)
  74         // NetBSD does not allow "WCHAR_T" as a charset input to iconv.
  75         #include <sys/endian.h>
  76         #if BYTE_ORDER == BIG_ENDIAN
  77         const char *DEFAULT_ENCODING = "UTF-32BE";
  78         #else
  79         const char *DEFAULT_ENCODING = "UTF-32LE";
  80         #endif
  81 #else
  82 const char *DEFAULT_ENCODING = "WCHAR_T";
  83 #endif
  84
  85 std::wstring utf8_to_wide(const std::string &input)
  86 {
  87         const size_t inbuf_size = input.length();
  88         // maximum possible size, every character is sizeof(wchar_t) bytes
  89         size_t outbuf_size = input.length() * sizeof(wchar_t);
  90
  91         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
  92         memcpy(inbuf, input.c_str(), inbuf_size);
  93         std::wstring out;
  94         out.resize(outbuf_size / sizeof(wchar_t));
  95
  96 #if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__)
  97         static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size");
  98 #endif
  99
 100         char *outbuf = reinterpret_cast<char*>(&out[0]);
 101         if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
 102                 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
 103                         << " into wstring" << std::endl;
 104                 delete[] inbuf;
 105                 return L"<invalid UTF-8 string>";
 106         }
 107         delete[] inbuf;
 108
 109         out.resize(outbuf_size / sizeof(wchar_t));
 110         return out;
 111 }
 112
 113 std::string wide_to_utf8(const std::wstring &input)
 114 {
 115         const size_t inbuf_size = input.length() * sizeof(wchar_t);
 116         // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
 117         size_t outbuf_size = input.length() * 4;
 118
 119         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
 120         memcpy(inbuf, input.c_str(), inbuf_size);
 121         std::string out;
 122         out.resize(outbuf_size);
 123
 124         if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
 125                 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
 126                         << " into UTF-8 string" << std::endl;
 127                 delete[] inbuf;
 128                 return "<invalid wide string>";
 129         }
 130         delete[] inbuf;
 131
 132         out.resize(outbuf_size);
 133         return out;
 134 }
 135
 136 #else // _WIN32
 137
 138 std::wstring utf8_to_wide(const std::string &input)
 139 {
 140         size_t outbuf_size = input.size() + 1;
 141         wchar_t *outbuf = new wchar_t[outbuf_size];
 142         memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
 143         MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
 144                 outbuf, outbuf_size);
 145         std::wstring out(outbuf);
 146         delete[] outbuf;
 147         return out;
 148 }
 149
 150 std::string wide_to_utf8(const std::wstring &input)
 151 {
 152         size_t outbuf_size = (input.size() + 1) * 6;
 153         char *outbuf = new char[outbuf_size];
 154         memset(outbuf, 0, outbuf_size);
 155         WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
 156                 outbuf, outbuf_size, NULL, NULL);
 157         std::string out(outbuf);
 158         delete[] outbuf;
 159         return out;
 160 }
 161
 162 #endif // _WIN32
 163
 164 wchar_t *utf8_to_wide_c(const char *str)
 165 {
 166         std::wstring ret = utf8_to_wide(std::string(str));
 167         size_t len = ret.length();
 168         wchar_t *ret_c = new wchar_t[len + 1];
 169         memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
 170         return ret_c;
 171 }
 172
 173
 174 std::string urlencode(const std::string &str)
 175 {
 176         // Encodes non-unreserved URI characters by a percent sign
 177         // followed by two hex digits. See RFC 3986, section 2.3.
 178         static const char url_hex_chars[] = "0123456789ABCDEF";
 179         std::ostringstream oss(std::ios::binary);
 180         for (unsigned char c : str) {
 181                 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
 182                         oss << c;
 183                 } else {
 184                         oss << "%"
 185                                 << url_hex_chars[(c & 0xf0) >> 4]
 186                                 << url_hex_chars[c & 0x0f];
 187                 }
 188         }
 189         return oss.str();
 190 }
 191
 192 std::string urldecode(const std::string &str)
 193 {
 194         // Inverse of urlencode
 195         std::ostringstream oss(std::ios::binary);
 196         for (u32 i = 0; i < str.size(); i++) {
 197                 unsigned char highvalue, lowvalue;
 198                 if (str[i] == '%' &&
 199                                 hex_digit_decode(str[i+1], highvalue) &&
 200                                 hex_digit_decode(str[i+2], lowvalue)) {
 201                         oss << (char) ((highvalue << 4) | lowvalue);
 202                         i += 2;
 203                 } else {
 204                         oss << str[i];
 205                 }
 206         }
 207         return oss.str();
 208 }
 209
 210 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
 211 {
 212         u32 result = 0;
 213         u32 mask = 0;
 214         char *s = &str[0];
 215         char *flagstr;
 216         char *strpos = nullptr;
 217
 218         while ((flagstr = strtok_r(s, ",", &strpos))) {
 219                 s = nullptr;
 220
 221                 while (*flagstr == ' ' || *flagstr == '\t')
 222                         flagstr++;
 223
 224                 bool flagset = true;
 225                 if (!strncasecmp(flagstr, "no", 2)) {
 226                         flagset = false;
 227                         flagstr += 2;
 228                 }
 229
 230                 for (int i = 0; flagdesc[i].name; i++) {
 231                         if (!strcasecmp(flagstr, flagdesc[i].name)) {
 232                                 mask |= flagdesc[i].flag;
 233                                 if (flagset)
 234                                         result |= flagdesc[i].flag;
 235                                 break;
 236                         }
 237                 }
 238         }
 239
 240         if (flagmask)
 241                 *flagmask = mask;
 242
 243         return result;
 244 }
 245
 246 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
 247 {
 248         std::string result;
 249
 250         for (int i = 0; flagdesc[i].name; i++) {
 251                 if (flagmask & flagdesc[i].flag) {
 252                         if (!(flags & flagdesc[i].flag))
 253                                 result += "no";
 254
 255                         result += flagdesc[i].name;
 256                         result += ", ";
 257                 }
 258         }
 259
 260         size_t len = result.length();
 261         if (len >= 2)
 262                 result.erase(len - 2, 2);
 263
 264         return result;
 265 }
 266
 267 size_t mystrlcpy(char *dst, const char *src, size_t size)
 268 {
 269         size_t srclen  = strlen(src) + 1;
 270         size_t copylen = MYMIN(srclen, size);
 271
 272         if (copylen > 0) {
 273                 memcpy(dst, src, copylen);
 274                 dst[copylen - 1] = '\0';
 275         }
 276
 277         return srclen;
 278 }
 279
 280 char *mystrtok_r(char *s, const char *sep, char **lasts)
 281 {
 282         char *t;
 283
 284         if (!s)
 285                 s = *lasts;
 286
 287         while (*s && strchr(sep, *s))
 288                 s++;
 289
 290         if (!*s)
 291                 return nullptr;
 292
 293         t = s;
 294         while (*t) {
 295                 if (strchr(sep, *t)) {
 296                         *t++ = '\0';
 297                         break;
 298                 }
 299                 t++;
 300         }
 301
 302         *lasts = t;
 303         return s;
 304 }
 305
 306 u64 read_seed(const char *str)
 307 {
 308         char *endptr;
 309         u64 num;
 310
 311         if (str[0] == '0' && str[1] == 'x')
 312                 num = strtoull(str, &endptr, 16);
 313         else
 314                 num = strtoull(str, &endptr, 10);
 315
 316         if (*endptr)
 317                 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
 318
 319         return num;
 320 }
 321
 322 static bool parseHexColorString(const std::string &value, video::SColor &color,
 323                 unsigned char default_alpha)
 324 {
 325         u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
 326
 327         size_t len = value.size();
 328         bool short_form;
 329
 330         if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
 331                 short_form = false;
 332         else if (len == 5 || len == 4) // #RGBA or #RGB
 333                 short_form = true;
 334         else
 335                 return false;
 336
 337         for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
 338                 if (short_form) {
 339                         u8 d;
 340                         if (!hex_digit_decode(value[pos], d))
 341                                 return false;
 342
 343                         components[cc] = (d & 0xf) << 4 | (d & 0xf);
 344                 } else {
 345                         u8 d1, d2;
 346                         if (!hex_digit_decode(value[pos], d1) ||
 347                                         !hex_digit_decode(value[pos+1], d2))
 348                                 return false;
 349
 350                         components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
 351                         pos++; // skip the second digit -- it's already used
 352                 }
 353         }
 354
 355         color.setRed(components[0]);
 356         color.setGreen(components[1]);
 357         color.setBlue(components[2]);
 358         color.setAlpha(components[3]);
 359
 360         return true;
 361 }
 362
 363 const static std::unordered_map<std::string, u32> s_named_colors = {
 364         {"aliceblue",            0xf0f8ff},
 365         {"antiquewhite",         0xfaebd7},
 366         {"aqua",                 0x00ffff},
 367         {"aquamarine",           0x7fffd4},
 368         {"azure",                0xf0ffff},
 369         {"beige",                0xf5f5dc},
 370         {"bisque",               0xffe4c4},
 371         {"black",                00000000},
 372         {"blanchedalmond",       0xffebcd},
 373         {"blue",                 0x0000ff},
 374         {"blueviolet",           0x8a2be2},
 375         {"brown",                0xa52a2a},
 376         {"burlywood",            0xdeb887},
 377         {"cadetblue",            0x5f9ea0},
 378         {"chartreuse",           0x7fff00},
 379         {"chocolate",            0xd2691e},
 380         {"coral",                0xff7f50},
 381         {"cornflowerblue",       0x6495ed},
 382         {"cornsilk",             0xfff8dc},
 383         {"crimson",              0xdc143c},
 384         {"cyan",                 0x00ffff},
 385         {"darkblue",             0x00008b},
 386         {"darkcyan",             0x008b8b},
 387         {"darkgoldenrod",        0xb8860b},
 388         {"darkgray",             0xa9a9a9},
 389         {"darkgreen",            0x006400},
 390         {"darkgrey",             0xa9a9a9},
 391         {"darkkhaki",            0xbdb76b},
 392         {"darkmagenta",          0x8b008b},
 393         {"darkolivegreen",       0x556b2f},
 394         {"darkorange",           0xff8c00},
 395         {"darkorchid",           0x9932cc},
 396         {"darkred",              0x8b0000},
 397         {"darksalmon",           0xe9967a},
 398         {"darkseagreen",         0x8fbc8f},
 399         {"darkslateblue",        0x483d8b},
 400         {"darkslategray",        0x2f4f4f},
 401         {"darkslategrey",        0x2f4f4f},
 402         {"darkturquoise",        0x00ced1},
 403         {"darkviolet",           0x9400d3},
 404         {"deeppink",             0xff1493},
 405         {"deepskyblue",          0x00bfff},
 406         {"dimgray",              0x696969},
 407         {"dimgrey",              0x696969},
 408         {"dodgerblue",           0x1e90ff},
 409         {"firebrick",            0xb22222},
 410         {"floralwhite",          0xfffaf0},
 411         {"forestgreen",          0x228b22},
 412         {"fuchsia",              0xff00ff},
 413         {"gainsboro",            0xdcdcdc},
 414         {"ghostwhite",           0xf8f8ff},
 415         {"gold",                 0xffd700},
 416         {"goldenrod",            0xdaa520},
 417         {"gray",                 0x808080},
 418         {"green",                0x008000},
 419         {"greenyellow",          0xadff2f},
 420         {"grey",                 0x808080},
 421         {"honeydew",             0xf0fff0},
 422         {"hotpink",              0xff69b4},
 423         {"indianred",            0xcd5c5c},
 424         {"indigo",               0x4b0082},
 425         {"ivory",                0xfffff0},
 426         {"khaki",                0xf0e68c},
 427         {"lavender",             0xe6e6fa},
 428         {"lavenderblush",        0xfff0f5},
 429         {"lawngreen",            0x7cfc00},
 430         {"lemonchiffon",         0xfffacd},
 431         {"lightblue",            0xadd8e6},
 432         {"lightcoral",           0xf08080},
 433         {"lightcyan",            0xe0ffff},
 434         {"lightgoldenrodyellow", 0xfafad2},
 435         {"lightgray",            0xd3d3d3},
 436         {"lightgreen",           0x90ee90},
 437         {"lightgrey",            0xd3d3d3},
 438         {"lightpink",            0xffb6c1},
 439         {"lightsalmon",          0xffa07a},
 440         {"lightseagreen",        0x20b2aa},
 441         {"lightskyblue",         0x87cefa},
 442         {"lightslategray",       0x778899},
 443         {"lightslategrey",       0x778899},
 444         {"lightsteelblue",       0xb0c4de},
 445         {"lightyellow",          0xffffe0},
 446         {"lime",                 0x00ff00},
 447         {"limegreen",            0x32cd32},
 448         {"linen",                0xfaf0e6},
 449         {"magenta",              0xff00ff},
 450         {"maroon",               0x800000},
 451         {"mediumaquamarine",     0x66cdaa},
 452         {"mediumblue",           0x0000cd},
 453         {"mediumorchid",         0xba55d3},
 454         {"mediumpurple",         0x9370db},
 455         {"mediumseagreen",       0x3cb371},
 456         {"mediumslateblue",      0x7b68ee},
 457         {"mediumspringgreen",    0x00fa9a},
 458         {"mediumturquoise",      0x48d1cc},
 459         {"mediumvioletred",      0xc71585},
 460         {"midnightblue",         0x191970},
 461         {"mintcream",            0xf5fffa},
 462         {"mistyrose",            0xffe4e1},
 463         {"moccasin",             0xffe4b5},
 464         {"navajowhite",          0xffdead},
 465         {"navy",                 0x000080},
 466         {"oldlace",              0xfdf5e6},
 467         {"olive",                0x808000},
 468         {"olivedrab",            0x6b8e23},
 469         {"orange",               0xffa500},
 470         {"orangered",            0xff4500},
 471         {"orchid",               0xda70d6},
 472         {"palegoldenrod",        0xeee8aa},
 473         {"palegreen",            0x98fb98},
 474         {"paleturquoise",        0xafeeee},
 475         {"palevioletred",        0xdb7093},
 476         {"papayawhip",           0xffefd5},
 477         {"peachpuff",            0xffdab9},
 478         {"peru",                 0xcd853f},
 479         {"pink",                 0xffc0cb},
 480         {"plum",                 0xdda0dd},
 481         {"powderblue",           0xb0e0e6},
 482         {"purple",               0x800080},
 483         {"rebeccapurple",        0x663399},
 484         {"red",                  0xff0000},
 485         {"rosybrown",            0xbc8f8f},
 486         {"royalblue",            0x4169e1},
 487         {"saddlebrown",          0x8b4513},
 488         {"salmon",               0xfa8072},
 489         {"sandybrown",           0xf4a460},
 490         {"seagreen",             0x2e8b57},
 491         {"seashell",             0xfff5ee},
 492         {"sienna",               0xa0522d},
 493         {"silver",               0xc0c0c0},
 494         {"skyblue",              0x87ceeb},
 495         {"slateblue",            0x6a5acd},
 496         {"slategray",            0x708090},
 497         {"slategrey",            0x708090},
 498         {"snow",                 0xfffafa},
 499         {"springgreen",          0x00ff7f},
 500         {"steelblue",            0x4682b4},
 501         {"tan",                  0xd2b48c},
 502         {"teal",                 0x008080},
 503         {"thistle",              0xd8bfd8},
 504         {"tomato",               0xff6347},
 505         {"turquoise",            0x40e0d0},
 506         {"violet",               0xee82ee},
 507         {"wheat",                0xf5deb3},
 508         {"white",                0xffffff},
 509         {"whitesmoke",           0xf5f5f5},
 510         {"yellow",               0xffff00},
 511         {"yellowgreen",          0x9acd32}
 512 };
 513
 514 static bool parseNamedColorString(const std::string &value, video::SColor &color)
 515 {
 516         std::string color_name;
 517         std::string alpha_string;
 518
 519         /* If the string has a # in it, assume this is the start of a specified
 520          * alpha value (if it isn't the string is invalid and the error will be
 521          * caught later on, either because the color name won't be found or the
 522          * alpha value will fail conversion)
 523          */
 524         size_t alpha_pos = value.find('#');
 525         if (alpha_pos != std::string::npos) {
 526                 color_name = value.substr(0, alpha_pos);
 527                 alpha_string = value.substr(alpha_pos + 1);
 528         } else {
 529                 color_name = value;
 530         }
 531
 532         color_name = lowercase(color_name);
 533
 534         auto it = s_named_colors.find(color_name);
 535         if (it == s_named_colors.end())
 536                 return false;
 537
 538         u32 color_temp = it->second;
 539
 540         /* An empty string for alpha is ok (none of the color table entries
 541          * have an alpha value either). Color strings without an alpha specified
 542          * are interpreted as fully opaque
 543          */
 544         if (!alpha_string.empty()) {
 545                 if (alpha_string.size() == 1) {
 546                         u8 d;
 547                         if (!hex_digit_decode(alpha_string[0], d))
 548                                 return false;
 549
 550                         color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
 551                 } else if (alpha_string.size() == 2) {
 552                         u8 d1, d2;
 553                         if (!hex_digit_decode(alpha_string[0], d1)
 554                                         || !hex_digit_decode(alpha_string[1], d2))
 555                                 return false;
 556
 557                         color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
 558                 } else {
 559                         return false;
 560                 }
 561         } else {
 562                 color_temp |= 0xff << 24; // Fully opaque
 563         }
 564
 565         color = video::SColor(color_temp);
 566
 567         return true;
 568 }
 569
 570 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
 571                 unsigned char default_alpha)
 572 {
 573         bool success;
 574
 575         if (value[0] == '#')
 576                 success = parseHexColorString(value, color, default_alpha);
 577         else
 578                 success = parseNamedColorString(value, color);
 579
 580         if (!success && !quiet)
 581                 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
 582
 583         return success;
 584 }
 585
 586 void str_replace(std::string &str, char from, char to)
 587 {
 588         std::replace(str.begin(), str.end(), from, to);
 589 }
 590
 591 /* Translated strings have the following format:
 592  * \x1bT marks the beginning of a translated string
 593  * \x1bE marks its end
 594  *
 595  * \x1bF marks the beginning of an argument, and \x1bE its end.
 596  *
 597  * Arguments are *not* translated, as they may contain escape codes.
 598  * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
 599  *
 600  * This representation is chosen so that clients ignoring escape codes will
 601  * see untranslated strings.
 602  *
 603  * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
 604  * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
 605  * To translate this string, we extract what is inside \x1bT/\x1bE tags.
 606  * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
 607  * translating it as well.
 608  * We get the argument "White", translated, and create a template string with "@1" instead of it.
 609  * We finally get the template "@1 Wool" that was used in the beginning, which we translate
 610  * before filling it again.
 611  */
 612
 613 void translate_all(const std::wstring &s, size_t &i,
 614                 Translations *translations, std::wstring &res);
 615
 616 void translate_string(const std::wstring &s, Translations *translations,
 617                 const std::wstring &textdomain, size_t &i, std::wstring &res)
 618 {
 619         std::wostringstream output;
 620         std::vector<std::wstring> args;
 621         int arg_number = 1;
 622         while (i < s.length()) {
 623                 // Not an escape sequence: just add the character.
 624                 if (s[i] != '\x1b') {
 625                         output.put(s[i]);
 626                         // The character is a literal '@'; add it twice
 627                         // so that it is not mistaken for an argument.
 628                         if (s[i] == L'@')
 629                                 output.put(L'@');
 630                         ++i;
 631                         continue;
 632                 }
 633
 634                 // We have an escape sequence: locate it and its data
 635                 // It is either a single character, or it begins with '('
 636                 // and extends up to the following ')', with '\' as an escape character.
 637                 ++i;
 638                 size_t start_index = i;
 639                 size_t length;
 640                 if (i == s.length()) {
 641                         length = 0;
 642                 } else if (s[i] == L'(') {
 643                         ++i;
 644                         ++start_index;
 645                         while (i < s.length() && s[i] != L')') {
 646                                 if (s[i] == L'\\')
 647                                         ++i;
 648                                 ++i;
 649                         }
 650                         length = i - start_index;
 651                         ++i;
 652                         if (i > s.length())
 653                                 i = s.length();
 654                 } else {
 655                         ++i;
 656                         length = 1;
 657                 }
 658                 std::wstring escape_sequence(s, start_index, length);
 659
 660                 // The escape sequence is now reconstructed.
 661                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
 662                 if (parts[0] == L"E") {
 663                         // "End of translation" escape sequence. We are done locating the string to translate.
 664                         break;
 665                 } else if (parts[0] == L"F") {
 666                         // "Start of argument" escape sequence.
 667                         // Recursively translate the argument, and add it to the argument list.
 668                         // Add an "@n" instead of the argument to the template to translate.
 669                         if (arg_number >= 10) {
 670                                 errorstream << "Ignoring too many arguments to translation" << std::endl;
 671                                 std::wstring arg;
 672                                 translate_all(s, i, translations, arg);
 673                                 args.push_back(arg);
 674                                 continue;
 675                         }
 676                         output.put(L'@');
 677                         output << arg_number;
 678                         ++arg_number;
 679                         std::wstring arg;
 680                         translate_all(s, i, translations, arg);
 681                         args.push_back(arg);
 682                 } else {
 683                         // This is an escape sequence *inside* the template string to translate itself.
 684                         // This should not happen, show an error message.
 685                         errorstream << "Ignoring escape sequence '"
 686                                 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
 687                 }
 688         }
 689
 690         std::wstring toutput;
 691         // Translate the template.
 692         if (translations != nullptr)
 693                 toutput = translations->getTranslation(
 694                                 textdomain, output.str());
 695         else
 696                 toutput = output.str();
 697
 698         // Put back the arguments in the translated template.
 699         std::wostringstream result;
 700         size_t j = 0;
 701         while (j < toutput.length()) {
 702                 // Normal character, add it to output and continue.
 703                 if (toutput[j] != L'@' || j == toutput.length() - 1) {
 704                         result.put(toutput[j]);
 705                         ++j;
 706                         continue;
 707                 }
 708
 709                 ++j;
 710                 // Literal escape for '@'.
 711                 if (toutput[j] == L'@') {
 712                         result.put(L'@');
 713                         ++j;
 714                         continue;
 715                 }
 716
 717                 // Here we have an argument; get its index and add the translated argument to the output.
 718                 int arg_index = toutput[j] - L'1';
 719                 ++j;
 720                 if (0 <= arg_index && (size_t)arg_index < args.size()) {
 721                         result << args[arg_index];
 722                 } else {
 723                         // This is not allowed: show an error message
 724                         errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
 725                 }
 726         }
 727         res = result.str();
 728 }
 729
 730 void translate_all(const std::wstring &s, size_t &i,
 731                 Translations *translations, std::wstring &res)
 732 {
 733         std::wostringstream output;
 734         while (i < s.length()) {
 735                 // Not an escape sequence: just add the character.
 736                 if (s[i] != '\x1b') {
 737                         output.put(s[i]);
 738                         ++i;
 739                         continue;
 740                 }
 741
 742                 // We have an escape sequence: locate it and its data
 743                 // It is either a single character, or it begins with '('
 744                 // and extends up to the following ')', with '\' as an escape character.
 745                 size_t escape_start = i;
 746                 ++i;
 747                 size_t start_index = i;
 748                 size_t length;
 749                 if (i == s.length()) {
 750                         length = 0;
 751                 } else if (s[i] == L'(') {
 752                         ++i;
 753                         ++start_index;
 754                         while (i < s.length() && s[i] != L')') {
 755                                 if (s[i] == L'\\') {
 756                                         ++i;
 757                                 }
 758                                 ++i;
 759                         }
 760                         length = i - start_index;
 761                         ++i;
 762                         if (i > s.length())
 763                                 i = s.length();
 764                 } else {
 765                         ++i;
 766                         length = 1;
 767                 }
 768                 std::wstring escape_sequence(s, start_index, length);
 769
 770                 // The escape sequence is now reconstructed.
 771                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
 772                 if (parts[0] == L"E") {
 773                         // "End of argument" escape sequence. Exit.
 774                         break;
 775                 } else if (parts[0] == L"T") {
 776                         // Beginning of translated string.
 777                         std::wstring textdomain;
 778                         if (parts.size() > 1)
 779                                 textdomain = parts[1];
 780                         std::wstring translated;
 781                         translate_string(s, translations, textdomain, i, translated);
 782                         output << translated;
 783                 } else {
 784                         // Another escape sequence, such as colors. Preserve it.
 785                         output << std::wstring(s, escape_start, i - escape_start);
 786                 }
 787         }
 788
 789         res = output.str();
 790 }
 791
 792 // Translate string server side
 793 std::wstring translate_string(const std::wstring &s, Translations *translations)
 794 {
 795         size_t i = 0;
 796         std::wstring res;
 797         translate_all(s, i, translations, res);
 798         return res;
 799 }
 800
 801 // Translate string client side
 802 std::wstring translate_string(const std::wstring &s)
 803 {
 804 #ifdef SERVER
 805         return translate_string(s, nullptr);
 806 #else
 807         return translate_string(s, g_client_translations);
 808 #endif
 809 }
 810
 811 static const std::array<std::wstring, 30> disallowed_dir_names = {
 812         // Problematic filenames from here:
 813         // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
 814         // Plus undocumented values from here:
 815         // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
 816         L"CON",
 817         L"PRN",
 818         L"AUX",
 819         L"NUL",
 820         L"COM1",
 821         L"COM2",
 822         L"COM3",
 823         L"COM4",
 824         L"COM5",
 825         L"COM6",
 826         L"COM7",
 827         L"COM8",
 828         L"COM9",
 829         L"COM\u00B2",
 830         L"COM\u00B3",
 831         L"COM\u00B9",
 832         L"LPT1",
 833         L"LPT2",
 834         L"LPT3",
 835         L"LPT4",
 836         L"LPT5",
 837         L"LPT6",
 838         L"LPT7",
 839         L"LPT8",
 840         L"LPT9",
 841         L"LPT\u00B2",
 842         L"LPT\u00B3",
 843         L"LPT\u00B9",
 844         L"CONIN$",
 845         L"CONOUT$",
 846 };
 847
 848 /**
 849  * List of characters that are blacklisted from created directories
 850  */
 851 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
 852
 853
 854 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
 855 {
 856         std::wstring safe_name = utf8_to_wide(str);
 857
 858         for (std::wstring disallowed_name : disallowed_dir_names) {
 859                 if (str_equal(safe_name, disallowed_name, true)) {
 860                         safe_name = utf8_to_wide(optional_prefix) + safe_name;
 861                         break;
 862                 }
 863         }
 864
 865         // Replace leading and trailing spaces with underscores.
 866         size_t start = safe_name.find_first_not_of(L' ');
 867         size_t end = safe_name.find_last_not_of(L' ');
 868         if (start == std::wstring::npos || end == std::wstring::npos)
 869                 start = end = safe_name.size();
 870         for (size_t i = 0; i < start; i++)
 871                 safe_name[i] = L'_';
 872         for (size_t i = end + 1; i < safe_name.size(); i++)
 873                 safe_name[i] = L'_';
 874
 875         // Replace other disallowed characters with underscores
 876         for (size_t i = 0; i < safe_name.length(); i++) {
 877                 bool is_valid = true;
 878
 879                 // Unlikely, but control characters should always be blacklisted
 880                 if (safe_name[i] < 32) {
 881                         is_valid = false;
 882                 } else if (safe_name[i] < 128) {
 883                         is_valid = disallowed_path_chars.find_first_of(safe_name[i])
 884                                         == std::wstring::npos;
 885                 }
 886
 887                 if (!is_valid)
 888                         safe_name[i] = L'_';
 889         }
 890
 891         return wide_to_utf8(safe_name);
 892 }
 893
 894
 895 void safe_print_string(std::ostream &os, const std::string &str)
 896 {
 897         std::ostream::fmtflags flags = os.flags();
 898         os << std::hex;
 899         for (const char c : str) {
 900                 if (IS_ASCII_PRINTABLE_CHAR(c) || IS_UTF8_MULTB_START(c) ||
 901                                 IS_UTF8_MULTB_INNER(c) || c == '\n' || c == '\t') {
 902                         os << c;
 903                 } else {
 904                         os << '<' << std::setw(2) << (int)c << '>';
 905                 }
 906         }
 907         os.setf(flags);
 908 }