src/util/string.cpp

   1 /*
   2 Minetest
   3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
   4
   5 This program is free software; you can redistribute it and/or modify
   6 it under the terms of the GNU Lesser General Public License as published by
   7 the Free Software Foundation; either version 2.1 of the License, or
   8 (at your option) any later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU Lesser General Public License for more details.
  14
  15 You should have received a copy of the GNU Lesser General Public License along
  16 with this program; if not, write to the Free Software Foundation, Inc.,
  17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18 */
  19
  20 #include "string.h"
  21 #include "pointer.h"
  22 #include "numeric.h"
  23 #include "log.h"
  24
  25 #include "hex.h"
  26 #include "porting.h"
  27 #include "translation.h"
  28
  29 #include <algorithm>
  30 #include <array>
  31 #include <sstream>
  32 #include <iomanip>
  33 #include <unordered_map>
  34
  35 #ifndef _WIN32
  36         #include <iconv.h>
  37 #else
  38         #define _WIN32_WINNT 0x0501
  39         #include <windows.h>
  40 #endif
  41
  42 #ifdef __NetBSD__
  43         #include <sys/param.h>
  44         #if __NetBSD_Version__ <= 999001500
  45                 #define BSD_ICONV_USED
  46         #endif
  47 #elif defined(_ICONV_H_) && (defined(__FreeBSD__) || defined(__OpenBSD__) || \
  48         defined(__DragonFly__))
  49         #define BSD_ICONV_USED
  50 #endif
  51
  52 #ifndef _WIN32
  53
  54 static bool convert(const char *to, const char *from, char *outbuf,
  55                 size_t *outbuf_size, char *inbuf, size_t inbuf_size)
  56 {
  57         iconv_t cd = iconv_open(to, from);
  58
  59 #ifdef BSD_ICONV_USED
  60         const char *inbuf_ptr = inbuf;
  61 #else
  62         char *inbuf_ptr = inbuf;
  63 #endif
  64         char *outbuf_ptr = outbuf;
  65
  66         size_t *inbuf_left_ptr = &inbuf_size;
  67
  68         const size_t old_outbuf_size = *outbuf_size;
  69         size_t old_size = inbuf_size;
  70         while (inbuf_size > 0) {
  71                 iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
  72                 if (inbuf_size == old_size) {
  73                         iconv_close(cd);
  74                         return false;
  75                 }
  76                 old_size = inbuf_size;
  77         }
  78
  79         iconv_close(cd);
  80         *outbuf_size = old_outbuf_size - *outbuf_size;
  81         return true;
  82 }
  83
  84 #ifdef __ANDROID__
  85 // On Android iconv disagrees how big a wchar_t is for whatever reason
  86 const char *DEFAULT_ENCODING = "UTF-32LE";
  87 #elif defined(__NetBSD__)
  88         // NetBSD does not allow "WCHAR_T" as a charset input to iconv.
  89         #include <sys/endian.h>
  90         #if BYTE_ORDER == BIG_ENDIAN
  91         const char *DEFAULT_ENCODING = "UTF-32BE";
  92         #else
  93         const char *DEFAULT_ENCODING = "UTF-32LE";
  94         #endif
  95 #else
  96 const char *DEFAULT_ENCODING = "WCHAR_T";
  97 #endif
  98
  99 std::wstring utf8_to_wide(const std::string &input)
 100 {
 101         const size_t inbuf_size = input.length();
 102         // maximum possible size, every character is sizeof(wchar_t) bytes
 103         size_t outbuf_size = input.length() * sizeof(wchar_t);
 104
 105         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
 106         memcpy(inbuf, input.c_str(), inbuf_size);
 107         std::wstring out;
 108         out.resize(outbuf_size / sizeof(wchar_t));
 109
 110 #if defined(__ANDROID__) || defined(__NetBSD__)
 111         SANITY_CHECK(sizeof(wchar_t) == 4);
 112 #endif
 113
 114         char *outbuf = reinterpret_cast<char*>(&out[0]);
 115         if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
 116                 infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
 117                         << " into wstring" << std::endl;
 118                 delete[] inbuf;
 119                 return L"<invalid UTF-8 string>";
 120         }
 121         delete[] inbuf;
 122
 123         out.resize(outbuf_size / sizeof(wchar_t));
 124         return out;
 125 }
 126
 127 std::string wide_to_utf8(const std::wstring &input)
 128 {
 129         const size_t inbuf_size = input.length() * sizeof(wchar_t);
 130         // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
 131         size_t outbuf_size = input.length() * 4;
 132
 133         char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
 134         memcpy(inbuf, input.c_str(), inbuf_size);
 135         std::string out;
 136         out.resize(outbuf_size);
 137
 138         if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
 139                 infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
 140                         << " into UTF-8 string" << std::endl;
 141                 delete[] inbuf;
 142                 return "<invalid wide string>";
 143         }
 144         delete[] inbuf;
 145
 146         out.resize(outbuf_size);
 147         return out;
 148 }
 149
 150 #else // _WIN32
 151
 152 std::wstring utf8_to_wide(const std::string &input)
 153 {
 154         size_t outbuf_size = input.size() + 1;
 155         wchar_t *outbuf = new wchar_t[outbuf_size];
 156         memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
 157         MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(),
 158                 outbuf, outbuf_size);
 159         std::wstring out(outbuf);
 160         delete[] outbuf;
 161         return out;
 162 }
 163
 164 std::string wide_to_utf8(const std::wstring &input)
 165 {
 166         size_t outbuf_size = (input.size() + 1) * 6;
 167         char *outbuf = new char[outbuf_size];
 168         memset(outbuf, 0, outbuf_size);
 169         WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(),
 170                 outbuf, outbuf_size, NULL, NULL);
 171         std::string out(outbuf);
 172         delete[] outbuf;
 173         return out;
 174 }
 175
 176 #endif // _WIN32
 177
 178 wchar_t *utf8_to_wide_c(const char *str)
 179 {
 180         std::wstring ret = utf8_to_wide(std::string(str));
 181         size_t len = ret.length();
 182         wchar_t *ret_c = new wchar_t[len + 1];
 183         memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
 184         return ret_c;
 185 }
 186
 187
 188 std::string urlencode(const std::string &str)
 189 {
 190         // Encodes non-unreserved URI characters by a percent sign
 191         // followed by two hex digits. See RFC 3986, section 2.3.
 192         static const char url_hex_chars[] = "0123456789ABCDEF";
 193         std::ostringstream oss(std::ios::binary);
 194         for (unsigned char c : str) {
 195                 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
 196                         oss << c;
 197                 } else {
 198                         oss << "%"
 199                                 << url_hex_chars[(c & 0xf0) >> 4]
 200                                 << url_hex_chars[c & 0x0f];
 201                 }
 202         }
 203         return oss.str();
 204 }
 205
 206 std::string urldecode(const std::string &str)
 207 {
 208         // Inverse of urlencode
 209         std::ostringstream oss(std::ios::binary);
 210         for (u32 i = 0; i < str.size(); i++) {
 211                 unsigned char highvalue, lowvalue;
 212                 if (str[i] == '%' &&
 213                                 hex_digit_decode(str[i+1], highvalue) &&
 214                                 hex_digit_decode(str[i+2], lowvalue)) {
 215                         oss << (char) ((highvalue << 4) | lowvalue);
 216                         i += 2;
 217                 } else {
 218                         oss << str[i];
 219                 }
 220         }
 221         return oss.str();
 222 }
 223
 224 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
 225 {
 226         u32 result = 0;
 227         u32 mask = 0;
 228         char *s = &str[0];
 229         char *flagstr;
 230         char *strpos = nullptr;
 231
 232         while ((flagstr = strtok_r(s, ",", &strpos))) {
 233                 s = nullptr;
 234
 235                 while (*flagstr == ' ' || *flagstr == '\t')
 236                         flagstr++;
 237
 238                 bool flagset = true;
 239                 if (!strncasecmp(flagstr, "no", 2)) {
 240                         flagset = false;
 241                         flagstr += 2;
 242                 }
 243
 244                 for (int i = 0; flagdesc[i].name; i++) {
 245                         if (!strcasecmp(flagstr, flagdesc[i].name)) {
 246                                 mask |= flagdesc[i].flag;
 247                                 if (flagset)
 248                                         result |= flagdesc[i].flag;
 249                                 break;
 250                         }
 251                 }
 252         }
 253
 254         if (flagmask)
 255                 *flagmask = mask;
 256
 257         return result;
 258 }
 259
 260 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask)
 261 {
 262         std::string result;
 263
 264         for (int i = 0; flagdesc[i].name; i++) {
 265                 if (flagmask & flagdesc[i].flag) {
 266                         if (!(flags & flagdesc[i].flag))
 267                                 result += "no";
 268
 269                         result += flagdesc[i].name;
 270                         result += ", ";
 271                 }
 272         }
 273
 274         size_t len = result.length();
 275         if (len >= 2)
 276                 result.erase(len - 2, 2);
 277
 278         return result;
 279 }
 280
 281 size_t mystrlcpy(char *dst, const char *src, size_t size)
 282 {
 283         size_t srclen  = strlen(src) + 1;
 284         size_t copylen = MYMIN(srclen, size);
 285
 286         if (copylen > 0) {
 287                 memcpy(dst, src, copylen);
 288                 dst[copylen - 1] = '\0';
 289         }
 290
 291         return srclen;
 292 }
 293
 294 char *mystrtok_r(char *s, const char *sep, char **lasts)
 295 {
 296         char *t;
 297
 298         if (!s)
 299                 s = *lasts;
 300
 301         while (*s && strchr(sep, *s))
 302                 s++;
 303
 304         if (!*s)
 305                 return nullptr;
 306
 307         t = s;
 308         while (*t) {
 309                 if (strchr(sep, *t)) {
 310                         *t++ = '\0';
 311                         break;
 312                 }
 313                 t++;
 314         }
 315
 316         *lasts = t;
 317         return s;
 318 }
 319
 320 u64 read_seed(const char *str)
 321 {
 322         char *endptr;
 323         u64 num;
 324
 325         if (str[0] == '0' && str[1] == 'x')
 326                 num = strtoull(str, &endptr, 16);
 327         else
 328                 num = strtoull(str, &endptr, 10);
 329
 330         if (*endptr)
 331                 num = murmur_hash_64_ua(str, (int)strlen(str), 0x1337);
 332
 333         return num;
 334 }
 335
 336 static bool parseHexColorString(const std::string &value, video::SColor &color,
 337                 unsigned char default_alpha)
 338 {
 339         u8 components[] = {0x00, 0x00, 0x00, default_alpha}; // R,G,B,A
 340
 341         size_t len = value.size();
 342         bool short_form;
 343
 344         if (len == 9 || len == 7) // #RRGGBBAA or #RRGGBB
 345                 short_form = false;
 346         else if (len == 5 || len == 4) // #RGBA or #RGB
 347                 short_form = true;
 348         else
 349                 return false;
 350
 351         for (size_t pos = 1, cc = 0; pos < len; pos++, cc++) {
 352                 if (short_form) {
 353                         u8 d;
 354                         if (!hex_digit_decode(value[pos], d))
 355                                 return false;
 356
 357                         components[cc] = (d & 0xf) << 4 | (d & 0xf);
 358                 } else {
 359                         u8 d1, d2;
 360                         if (!hex_digit_decode(value[pos], d1) ||
 361                                         !hex_digit_decode(value[pos+1], d2))
 362                                 return false;
 363
 364                         components[cc] = (d1 & 0xf) << 4 | (d2 & 0xf);
 365                         pos++; // skip the second digit -- it's already used
 366                 }
 367         }
 368
 369         color.setRed(components[0]);
 370         color.setGreen(components[1]);
 371         color.setBlue(components[2]);
 372         color.setAlpha(components[3]);
 373
 374         return true;
 375 }
 376
 377 const static std::unordered_map<std::string, u32> s_named_colors = {
 378         {"aliceblue",            0xf0f8ff},
 379         {"antiquewhite",         0xfaebd7},
 380         {"aqua",                 0x00ffff},
 381         {"aquamarine",           0x7fffd4},
 382         {"azure",                0xf0ffff},
 383         {"beige",                0xf5f5dc},
 384         {"bisque",               0xffe4c4},
 385         {"black",                00000000},
 386         {"blanchedalmond",       0xffebcd},
 387         {"blue",                 0x0000ff},
 388         {"blueviolet",           0x8a2be2},
 389         {"brown",                0xa52a2a},
 390         {"burlywood",            0xdeb887},
 391         {"cadetblue",            0x5f9ea0},
 392         {"chartreuse",           0x7fff00},
 393         {"chocolate",            0xd2691e},
 394         {"coral",                0xff7f50},
 395         {"cornflowerblue",       0x6495ed},
 396         {"cornsilk",             0xfff8dc},
 397         {"crimson",              0xdc143c},
 398         {"cyan",                 0x00ffff},
 399         {"darkblue",             0x00008b},
 400         {"darkcyan",             0x008b8b},
 401         {"darkgoldenrod",        0xb8860b},
 402         {"darkgray",             0xa9a9a9},
 403         {"darkgreen",            0x006400},
 404         {"darkgrey",             0xa9a9a9},
 405         {"darkkhaki",            0xbdb76b},
 406         {"darkmagenta",          0x8b008b},
 407         {"darkolivegreen",       0x556b2f},
 408         {"darkorange",           0xff8c00},
 409         {"darkorchid",           0x9932cc},
 410         {"darkred",              0x8b0000},
 411         {"darksalmon",           0xe9967a},
 412         {"darkseagreen",         0x8fbc8f},
 413         {"darkslateblue",        0x483d8b},
 414         {"darkslategray",        0x2f4f4f},
 415         {"darkslategrey",        0x2f4f4f},
 416         {"darkturquoise",        0x00ced1},
 417         {"darkviolet",           0x9400d3},
 418         {"deeppink",             0xff1493},
 419         {"deepskyblue",          0x00bfff},
 420         {"dimgray",              0x696969},
 421         {"dimgrey",              0x696969},
 422         {"dodgerblue",           0x1e90ff},
 423         {"firebrick",            0xb22222},
 424         {"floralwhite",          0xfffaf0},
 425         {"forestgreen",          0x228b22},
 426         {"fuchsia",              0xff00ff},
 427         {"gainsboro",            0xdcdcdc},
 428         {"ghostwhite",           0xf8f8ff},
 429         {"gold",                 0xffd700},
 430         {"goldenrod",            0xdaa520},
 431         {"gray",                 0x808080},
 432         {"green",                0x008000},
 433         {"greenyellow",          0xadff2f},
 434         {"grey",                 0x808080},
 435         {"honeydew",             0xf0fff0},
 436         {"hotpink",              0xff69b4},
 437         {"indianred",            0xcd5c5c},
 438         {"indigo",               0x4b0082},
 439         {"ivory",                0xfffff0},
 440         {"khaki",                0xf0e68c},
 441         {"lavender",             0xe6e6fa},
 442         {"lavenderblush",        0xfff0f5},
 443         {"lawngreen",            0x7cfc00},
 444         {"lemonchiffon",         0xfffacd},
 445         {"lightblue",            0xadd8e6},
 446         {"lightcoral",           0xf08080},
 447         {"lightcyan",            0xe0ffff},
 448         {"lightgoldenrodyellow", 0xfafad2},
 449         {"lightgray",            0xd3d3d3},
 450         {"lightgreen",           0x90ee90},
 451         {"lightgrey",            0xd3d3d3},
 452         {"lightpink",            0xffb6c1},
 453         {"lightsalmon",          0xffa07a},
 454         {"lightseagreen",        0x20b2aa},
 455         {"lightskyblue",         0x87cefa},
 456         {"lightslategray",       0x778899},
 457         {"lightslategrey",       0x778899},
 458         {"lightsteelblue",       0xb0c4de},
 459         {"lightyellow",          0xffffe0},
 460         {"lime",                 0x00ff00},
 461         {"limegreen",            0x32cd32},
 462         {"linen",                0xfaf0e6},
 463         {"magenta",              0xff00ff},
 464         {"maroon",               0x800000},
 465         {"mediumaquamarine",     0x66cdaa},
 466         {"mediumblue",           0x0000cd},
 467         {"mediumorchid",         0xba55d3},
 468         {"mediumpurple",         0x9370db},
 469         {"mediumseagreen",       0x3cb371},
 470         {"mediumslateblue",      0x7b68ee},
 471         {"mediumspringgreen",    0x00fa9a},
 472         {"mediumturquoise",      0x48d1cc},
 473         {"mediumvioletred",      0xc71585},
 474         {"midnightblue",         0x191970},
 475         {"mintcream",            0xf5fffa},
 476         {"mistyrose",            0xffe4e1},
 477         {"moccasin",             0xffe4b5},
 478         {"navajowhite",          0xffdead},
 479         {"navy",                 0x000080},
 480         {"oldlace",              0xfdf5e6},
 481         {"olive",                0x808000},
 482         {"olivedrab",            0x6b8e23},
 483         {"orange",               0xffa500},
 484         {"orangered",            0xff4500},
 485         {"orchid",               0xda70d6},
 486         {"palegoldenrod",        0xeee8aa},
 487         {"palegreen",            0x98fb98},
 488         {"paleturquoise",        0xafeeee},
 489         {"palevioletred",        0xdb7093},
 490         {"papayawhip",           0xffefd5},
 491         {"peachpuff",            0xffdab9},
 492         {"peru",                 0xcd853f},
 493         {"pink",                 0xffc0cb},
 494         {"plum",                 0xdda0dd},
 495         {"powderblue",           0xb0e0e6},
 496         {"purple",               0x800080},
 497         {"rebeccapurple",        0x663399},
 498         {"red",                  0xff0000},
 499         {"rosybrown",            0xbc8f8f},
 500         {"royalblue",            0x4169e1},
 501         {"saddlebrown",          0x8b4513},
 502         {"salmon",               0xfa8072},
 503         {"sandybrown",           0xf4a460},
 504         {"seagreen",             0x2e8b57},
 505         {"seashell",             0xfff5ee},
 506         {"sienna",               0xa0522d},
 507         {"silver",               0xc0c0c0},
 508         {"skyblue",              0x87ceeb},
 509         {"slateblue",            0x6a5acd},
 510         {"slategray",            0x708090},
 511         {"slategrey",            0x708090},
 512         {"snow",                 0xfffafa},
 513         {"springgreen",          0x00ff7f},
 514         {"steelblue",            0x4682b4},
 515         {"tan",                  0xd2b48c},
 516         {"teal",                 0x008080},
 517         {"thistle",              0xd8bfd8},
 518         {"tomato",               0xff6347},
 519         {"turquoise",            0x40e0d0},
 520         {"violet",               0xee82ee},
 521         {"wheat",                0xf5deb3},
 522         {"white",                0xffffff},
 523         {"whitesmoke",           0xf5f5f5},
 524         {"yellow",               0xffff00},
 525         {"yellowgreen",          0x9acd32}
 526 };
 527
 528 static bool parseNamedColorString(const std::string &value, video::SColor &color)
 529 {
 530         std::string color_name;
 531         std::string alpha_string;
 532
 533         /* If the string has a # in it, assume this is the start of a specified
 534          * alpha value (if it isn't the string is invalid and the error will be
 535          * caught later on, either because the color name won't be found or the
 536          * alpha value will fail conversion)
 537          */
 538         size_t alpha_pos = value.find('#');
 539         if (alpha_pos != std::string::npos) {
 540                 color_name = value.substr(0, alpha_pos);
 541                 alpha_string = value.substr(alpha_pos + 1);
 542         } else {
 543                 color_name = value;
 544         }
 545
 546         color_name = lowercase(color_name);
 547
 548         auto it = s_named_colors.find(color_name);
 549         if (it == s_named_colors.end())
 550                 return false;
 551
 552         u32 color_temp = it->second;
 553
 554         /* An empty string for alpha is ok (none of the color table entries
 555          * have an alpha value either). Color strings without an alpha specified
 556          * are interpreted as fully opaque
 557          */
 558         if (!alpha_string.empty()) {
 559                 if (alpha_string.size() == 1) {
 560                         u8 d;
 561                         if (!hex_digit_decode(alpha_string[0], d))
 562                                 return false;
 563
 564                         color_temp |= ((d & 0xf) << 4 | (d & 0xf)) << 24;
 565                 } else if (alpha_string.size() == 2) {
 566                         u8 d1, d2;
 567                         if (!hex_digit_decode(alpha_string[0], d1)
 568                                         || !hex_digit_decode(alpha_string[1], d2))
 569                                 return false;
 570
 571                         color_temp |= ((d1 & 0xf) << 4 | (d2 & 0xf)) << 24;
 572                 } else {
 573                         return false;
 574                 }
 575         } else {
 576                 color_temp |= 0xff << 24; // Fully opaque
 577         }
 578
 579         color = video::SColor(color_temp);
 580
 581         return true;
 582 }
 583
 584 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
 585                 unsigned char default_alpha)
 586 {
 587         bool success;
 588
 589         if (value[0] == '#')
 590                 success = parseHexColorString(value, color, default_alpha);
 591         else
 592                 success = parseNamedColorString(value, color);
 593
 594         if (!success && !quiet)
 595                 errorstream << "Invalid color: \"" << value << "\"" << std::endl;
 596
 597         return success;
 598 }
 599
 600 void str_replace(std::string &str, char from, char to)
 601 {
 602         std::replace(str.begin(), str.end(), from, to);
 603 }
 604
 605 /* Translated strings have the following format:
 606  * \x1bT marks the beginning of a translated string
 607  * \x1bE marks its end
 608  *
 609  * \x1bF marks the beginning of an argument, and \x1bE its end.
 610  *
 611  * Arguments are *not* translated, as they may contain escape codes.
 612  * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
 613  *
 614  * This representation is chosen so that clients ignoring escape codes will
 615  * see untranslated strings.
 616  *
 617  * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
 618  * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
 619  * To translate this string, we extract what is inside \x1bT/\x1bE tags.
 620  * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
 621  * translating it as well.
 622  * We get the argument "White", translated, and create a template string with "@1" instead of it.
 623  * We finally get the template "@1 Wool" that was used in the beginning, which we translate
 624  * before filling it again.
 625  */
 626
 627 void translate_all(const std::wstring &s, size_t &i,
 628                 Translations *translations, std::wstring &res);
 629
 630 void translate_string(const std::wstring &s, Translations *translations,
 631                 const std::wstring &textdomain, size_t &i, std::wstring &res)
 632 {
 633         std::wostringstream output;
 634         std::vector<std::wstring> args;
 635         int arg_number = 1;
 636         while (i < s.length()) {
 637                 // Not an escape sequence: just add the character.
 638                 if (s[i] != '\x1b') {
 639                         output.put(s[i]);
 640                         // The character is a literal '@'; add it twice
 641                         // so that it is not mistaken for an argument.
 642                         if (s[i] == L'@')
 643                                 output.put(L'@');
 644                         ++i;
 645                         continue;
 646                 }
 647
 648                 // We have an escape sequence: locate it and its data
 649                 // It is either a single character, or it begins with '('
 650                 // and extends up to the following ')', with '\' as an escape character.
 651                 ++i;
 652                 size_t start_index = i;
 653                 size_t length;
 654                 if (i == s.length()) {
 655                         length = 0;
 656                 } else if (s[i] == L'(') {
 657                         ++i;
 658                         ++start_index;
 659                         while (i < s.length() && s[i] != L')') {
 660                                 if (s[i] == L'\\')
 661                                         ++i;
 662                                 ++i;
 663                         }
 664                         length = i - start_index;
 665                         ++i;
 666                         if (i > s.length())
 667                                 i = s.length();
 668                 } else {
 669                         ++i;
 670                         length = 1;
 671                 }
 672                 std::wstring escape_sequence(s, start_index, length);
 673
 674                 // The escape sequence is now reconstructed.
 675                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
 676                 if (parts[0] == L"E") {
 677                         // "End of translation" escape sequence. We are done locating the string to translate.
 678                         break;
 679                 } else if (parts[0] == L"F") {
 680                         // "Start of argument" escape sequence.
 681                         // Recursively translate the argument, and add it to the argument list.
 682                         // Add an "@n" instead of the argument to the template to translate.
 683                         if (arg_number >= 10) {
 684                                 errorstream << "Ignoring too many arguments to translation" << std::endl;
 685                                 std::wstring arg;
 686                                 translate_all(s, i, translations, arg);
 687                                 args.push_back(arg);
 688                                 continue;
 689                         }
 690                         output.put(L'@');
 691                         output << arg_number;
 692                         ++arg_number;
 693                         std::wstring arg;
 694                         translate_all(s, i, translations, arg);
 695                         args.push_back(arg);
 696                 } else {
 697                         // This is an escape sequence *inside* the template string to translate itself.
 698                         // This should not happen, show an error message.
 699                         errorstream << "Ignoring escape sequence '"
 700                                 << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
 701                 }
 702         }
 703
 704         std::wstring toutput;
 705         // Translate the template.
 706         if (translations != nullptr)
 707                 toutput = translations->getTranslation(
 708                                 textdomain, output.str());
 709         else
 710                 toutput = output.str();
 711
 712         // Put back the arguments in the translated template.
 713         std::wostringstream result;
 714         size_t j = 0;
 715         while (j < toutput.length()) {
 716                 // Normal character, add it to output and continue.
 717                 if (toutput[j] != L'@' || j == toutput.length() - 1) {
 718                         result.put(toutput[j]);
 719                         ++j;
 720                         continue;
 721                 }
 722
 723                 ++j;
 724                 // Literal escape for '@'.
 725                 if (toutput[j] == L'@') {
 726                         result.put(L'@');
 727                         ++j;
 728                         continue;
 729                 }
 730
 731                 // Here we have an argument; get its index and add the translated argument to the output.
 732                 int arg_index = toutput[j] - L'1';
 733                 ++j;
 734                 if (0 <= arg_index && (size_t)arg_index < args.size()) {
 735                         result << args[arg_index];
 736                 } else {
 737                         // This is not allowed: show an error message
 738                         errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
 739                 }
 740         }
 741         res = result.str();
 742 }
 743
 744 void translate_all(const std::wstring &s, size_t &i,
 745                 Translations *translations, std::wstring &res)
 746 {
 747         std::wostringstream output;
 748         while (i < s.length()) {
 749                 // Not an escape sequence: just add the character.
 750                 if (s[i] != '\x1b') {
 751                         output.put(s[i]);
 752                         ++i;
 753                         continue;
 754                 }
 755
 756                 // We have an escape sequence: locate it and its data
 757                 // It is either a single character, or it begins with '('
 758                 // and extends up to the following ')', with '\' as an escape character.
 759                 size_t escape_start = i;
 760                 ++i;
 761                 size_t start_index = i;
 762                 size_t length;
 763                 if (i == s.length()) {
 764                         length = 0;
 765                 } else if (s[i] == L'(') {
 766                         ++i;
 767                         ++start_index;
 768                         while (i < s.length() && s[i] != L')') {
 769                                 if (s[i] == L'\\') {
 770                                         ++i;
 771                                 }
 772                                 ++i;
 773                         }
 774                         length = i - start_index;
 775                         ++i;
 776                         if (i > s.length())
 777                                 i = s.length();
 778                 } else {
 779                         ++i;
 780                         length = 1;
 781                 }
 782                 std::wstring escape_sequence(s, start_index, length);
 783
 784                 // The escape sequence is now reconstructed.
 785                 std::vector<std::wstring> parts = split(escape_sequence, L'@');
 786                 if (parts[0] == L"E") {
 787                         // "End of argument" escape sequence. Exit.
 788                         break;
 789                 } else if (parts[0] == L"T") {
 790                         // Beginning of translated string.
 791                         std::wstring textdomain;
 792                         if (parts.size() > 1)
 793                                 textdomain = parts[1];
 794                         std::wstring translated;
 795                         translate_string(s, translations, textdomain, i, translated);
 796                         output << translated;
 797                 } else {
 798                         // Another escape sequence, such as colors. Preserve it.
 799                         output << std::wstring(s, escape_start, i - escape_start);
 800                 }
 801         }
 802
 803         res = output.str();
 804 }
 805
 806 // Translate string server side
 807 std::wstring translate_string(const std::wstring &s, Translations *translations)
 808 {
 809         size_t i = 0;
 810         std::wstring res;
 811         translate_all(s, i, translations, res);
 812         return res;
 813 }
 814
 815 // Translate string client side
 816 std::wstring translate_string(const std::wstring &s)
 817 {
 818 #ifdef SERVER
 819         return translate_string(s, nullptr);
 820 #else
 821         return translate_string(s, g_client_translations);
 822 #endif
 823 }
 824
 825 static const std::array<std::wstring, 30> disallowed_dir_names = {
 826         // Problematic filenames from here:
 827         // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
 828         // Plus undocumented values from here:
 829         // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
 830         L"CON",
 831         L"PRN",
 832         L"AUX",
 833         L"NUL",
 834         L"COM1",
 835         L"COM2",
 836         L"COM3",
 837         L"COM4",
 838         L"COM5",
 839         L"COM6",
 840         L"COM7",
 841         L"COM8",
 842         L"COM9",
 843         L"COM\u00B2",
 844         L"COM\u00B3",
 845         L"COM\u00B9",
 846         L"LPT1",
 847         L"LPT2",
 848         L"LPT3",
 849         L"LPT4",
 850         L"LPT5",
 851         L"LPT6",
 852         L"LPT7",
 853         L"LPT8",
 854         L"LPT9",
 855         L"LPT\u00B2",
 856         L"LPT\u00B3",
 857         L"LPT\u00B9",
 858         L"CONIN$",
 859         L"CONOUT$",
 860 };
 861
 862 /**
 863  * List of characters that are blacklisted from created directories
 864  */
 865 static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
 866
 867
 868 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
 869 {
 870         std::wstring safe_name = utf8_to_wide(str);
 871
 872         for (std::wstring disallowed_name : disallowed_dir_names) {
 873                 if (str_equal(safe_name, disallowed_name, true)) {
 874                         safe_name = utf8_to_wide(optional_prefix) + safe_name;
 875                         break;
 876                 }
 877         }
 878
 879         // Replace leading and trailing spaces with underscores.
 880         size_t start = safe_name.find_first_not_of(L' ');
 881         size_t end = safe_name.find_last_not_of(L' ');
 882         if (start == std::wstring::npos || end == std::wstring::npos)
 883                 start = end = safe_name.size();
 884         for (size_t i = 0; i < start; i++)
 885                 safe_name[i] = L'_';
 886         for (size_t i = end + 1; i < safe_name.size(); i++)
 887                 safe_name[i] = L'_';
 888
 889         // Replace other disallowed characters with underscores
 890         for (size_t i = 0; i < safe_name.length(); i++) {
 891                 bool is_valid = true;
 892
 893                 // Unlikely, but control characters should always be blacklisted
 894                 if (safe_name[i] < 32) {
 895                         is_valid = false;
 896                 } else if (safe_name[i] < 128) {
 897                         is_valid = disallowed_path_chars.find_first_of(safe_name[i])
 898                                         == std::wstring::npos;
 899                 }
 900
 901                 if (!is_valid)
 902                         safe_name[i] = L'_';
 903         }
 904
 905         return wide_to_utf8(safe_name);
 906 }
 907
 908
 909 void safe_print_string(std::ostream &os, const std::string &str)
 910 {
 911         std::ostream::fmtflags flags = os.flags();
 912         os << std::hex;
 913         for (const char c : str) {
 914                 if (IS_ASCII_PRINTABLE_CHAR(c) || IS_UTF8_MULTB_START(c) ||
 915                                 IS_UTF8_MULTB_INNER(c) || c == '\n' || c == '\t') {
 916                         os << c;
 917                 } else {
 918                         os << '<' << std::setw(2) << (int)c << '>';
 919                 }
 920         }
 921         os.setf(flags);
 922 }