src/util/string.h

   1 /*
   2 Minetest
   3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
   4
   5 This program is free software; you can redistribute it and/or modify
   6 it under the terms of the GNU Lesser General Public License as published by
   7 the Free Software Foundation; either version 2.1 of the License, or
   8 (at your option) any later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU Lesser General Public License for more details.
  14
  15 You should have received a copy of the GNU Lesser General Public License along
  16 with this program; if not, write to the Free Software Foundation, Inc.,
  17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18 */
  19
  20 #pragma once
  21
  22 #include "irrlichttypes_bloated.h"
  23 #include "irrString.h"
  24 #include <cstdlib>
  25 #include <string>
  26 #include <cstring>
  27 #include <vector>
  28 #include <map>
  29 #include <sstream>
  30 #include <iomanip>
  31 #include <cctype>
  32 #include <unordered_map>
  33
  34 #define STRINGIFY(x) #x
  35 #define TOSTRING(x) STRINGIFY(x)
  36
  37 // Checks whether a value is an ASCII printable character
  38 #define IS_ASCII_PRINTABLE_CHAR(x)   \
  39         (((unsigned int)(x) >= 0x20) &&  \
  40         ( (unsigned int)(x) <= 0x7e))
  41
  42 // Checks whether a byte is an inner byte for an utf-8 multibyte sequence
  43 #define IS_UTF8_MULTB_INNER(x)       \
  44         (((unsigned char)(x) >= 0x80) && \
  45         ( (unsigned char)(x) <= 0xbf))
  46
  47 // Checks whether a byte is a start byte for an utf-8 multibyte sequence
  48 #define IS_UTF8_MULTB_START(x)       \
  49         (((unsigned char)(x) >= 0xc2) && \
  50         ( (unsigned char)(x) <= 0xf4))
  51
  52 // Given a start byte x for an utf-8 multibyte sequence
  53 // it gives the length of the whole sequence in bytes.
  54 #define UTF8_MULTB_START_LEN(x)            \
  55         (((unsigned char)(x) < 0xe0) ? 2 :     \
  56         (((unsigned char)(x) < 0xf0) ? 3 : 4))
  57
  58 typedef std::unordered_map<std::string, std::string> StringMap;
  59
  60 struct FlagDesc {
  61         const char *name;
  62         u32 flag;
  63 };
  64
  65 // try not to convert between wide/utf8 encodings; this can result in data loss
  66 // try to only convert between them when you need to input/output stuff via Irrlicht
  67 std::wstring utf8_to_wide(const std::string &input);
  68 std::string wide_to_utf8(const std::wstring &input);
  69
  70 wchar_t *utf8_to_wide_c(const char *str);
  71
  72 // NEVER use those two functions unless you have a VERY GOOD reason to
  73 // they just convert between wide and multibyte encoding
  74 // multibyte encoding depends on current locale, this is no good, especially on Windows
  75
  76 // You must free the returned string!
  77 // The returned string is allocated using new
  78 wchar_t *narrow_to_wide_c(const char *str);
  79 std::wstring narrow_to_wide(const std::string &mbs);
  80 std::string wide_to_narrow(const std::wstring &wcs);
  81
  82 std::string urlencode(const std::string &str);
  83 std::string urldecode(const std::string &str);
  84 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask);
  85 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask);
  86 size_t mystrlcpy(char *dst, const char *src, size_t size);
  87 char *mystrtok_r(char *s, const char *sep, char **lasts);
  88 u64 read_seed(const char *str);
  89 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
  90                 unsigned char default_alpha = 0xff);
  91
  92
  93 /**
  94  * Returns a copy of \p str with spaces inserted at the right hand side to ensure
  95  * that the string is \p len characters in length. If \p str is <= \p len then the
  96  * returned string will be identical to str.
  97  */
  98 inline std::string padStringRight(std::string str, size_t len)
  99 {
 100         if (len > str.size())
 101                 str.insert(str.end(), len - str.size(), ' ');
 102
 103         return str;
 104 }
 105
 106 /**
 107  * Returns a version of \p str with the first occurrence of a string
 108  * contained within ends[] removed from the end of the string.
 109  *
 110  * @param str
 111  * @param ends A NULL- or ""- terminated array of strings to remove from s in
 112  *      the copy produced.  Note that once one of these strings is removed
 113  *      that no further postfixes contained within this array are removed.
 114  *
 115  * @return If no end could be removed then "" is returned.
 116  */
 117 inline std::string removeStringEnd(const std::string &str,
 118                 const char *ends[])
 119 {
 120         const char **p = ends;
 121
 122         for (; *p && (*p)[0] != '\0'; p++) {
 123                 std::string end = *p;
 124                 if (str.size() < end.size())
 125                         continue;
 126                 if (str.compare(str.size() - end.size(), end.size(), end) == 0)
 127                         return str.substr(0, str.size() - end.size());
 128         }
 129
 130         return "";
 131 }
 132
 133
 134 /**
 135  * Check two strings for equivalence.  If \p case_insensitive is true
 136  * then the case of the strings is ignored (default is false).
 137  *
 138  * @param s1
 139  * @param s2
 140  * @param case_insensitive
 141  * @return true if the strings match
 142  */
 143 template <typename T>
 144 inline bool str_equal(const std::basic_string<T> &s1,
 145                 const std::basic_string<T> &s2,
 146                 bool case_insensitive = false)
 147 {
 148         if (!case_insensitive)
 149                 return s1 == s2;
 150
 151         if (s1.size() != s2.size())
 152                 return false;
 153
 154         for (size_t i = 0; i < s1.size(); ++i)
 155                 if(tolower(s1[i]) != tolower(s2[i]))
 156                         return false;
 157
 158         return true;
 159 }
 160
 161
 162 /**
 163  * Check whether \p str begins with the string prefix. If \p case_insensitive
 164  * is true then the check is case insensitve (default is false; i.e. case is
 165  * significant).
 166  *
 167  * @param str
 168  * @param prefix
 169  * @param case_insensitive
 170  * @return true if the str begins with prefix
 171  */
 172 template <typename T>
 173 inline bool str_starts_with(const std::basic_string<T> &str,
 174                 const std::basic_string<T> &prefix,
 175                 bool case_insensitive = false)
 176 {
 177         if (str.size() < prefix.size())
 178                 return false;
 179
 180         if (!case_insensitive)
 181                 return str.compare(0, prefix.size(), prefix) == 0;
 182
 183         for (size_t i = 0; i < prefix.size(); ++i)
 184                 if (tolower(str[i]) != tolower(prefix[i]))
 185                         return false;
 186         return true;
 187 }
 188
 189 /**
 190  * Check whether \p str begins with the string prefix. If \p case_insensitive
 191  * is true then the check is case insensitve (default is false; i.e. case is
 192  * significant).
 193  *
 194  * @param str
 195  * @param prefix
 196  * @param case_insensitive
 197  * @return true if the str begins with prefix
 198  */
 199 template <typename T>
 200 inline bool str_starts_with(const std::basic_string<T> &str,
 201                 const T *prefix,
 202                 bool case_insensitive = false)
 203 {
 204         return str_starts_with(str, std::basic_string<T>(prefix),
 205                         case_insensitive);
 206 }
 207
 208
 209 /**
 210  * Check whether \p str ends with the string suffix. If \p case_insensitive
 211  * is true then the check is case insensitve (default is false; i.e. case is
 212  * significant).
 213  *
 214  * @param str
 215  * @param suffix
 216  * @param case_insensitive
 217  * @return true if the str begins with suffix
 218  */
 219 template <typename T>
 220 inline bool str_ends_with(const std::basic_string<T> &str,
 221                 const std::basic_string<T> &suffix,
 222                 bool case_insensitive = false)
 223 {
 224         if (str.size() < suffix.size())
 225                 return false;
 226
 227         size_t start = str.size() - suffix.size();
 228         if (!case_insensitive)
 229                 return str.compare(start, suffix.size(), suffix) == 0;
 230
 231         for (size_t i = 0; i < suffix.size(); ++i)
 232                 if (tolower(str[start + i]) != tolower(suffix[i]))
 233                         return false;
 234         return true;
 235 }
 236
 237
 238 /**
 239  * Check whether \p str ends with the string suffix. If \p case_insensitive
 240  * is true then the check is case insensitve (default is false; i.e. case is
 241  * significant).
 242  *
 243  * @param str
 244  * @param suffix
 245  * @param case_insensitive
 246  * @return true if the str begins with suffix
 247  */
 248 template <typename T>
 249 inline bool str_ends_with(const std::basic_string<T> &str,
 250                 const T *suffix,
 251                 bool case_insensitive = false)
 252 {
 253         return str_ends_with(str, std::basic_string<T>(suffix),
 254                         case_insensitive);
 255 }
 256
 257
 258 /**
 259  * Splits a string into its component parts separated by the character
 260  * \p delimiter.
 261  *
 262  * @return An std::vector<std::basic_string<T> > of the component parts
 263  */
 264 template <typename T>
 265 inline std::vector<std::basic_string<T> > str_split(
 266                 const std::basic_string<T> &str,
 267                 T delimiter)
 268 {
 269         std::vector<std::basic_string<T> > parts;
 270         std::basic_stringstream<T> sstr(str);
 271         std::basic_string<T> part;
 272
 273         while (std::getline(sstr, part, delimiter))
 274                 parts.push_back(part);
 275
 276         return parts;
 277 }
 278
 279
 280 /**
 281  * @param str
 282  * @return A copy of \p str converted to all lowercase characters.
 283  */
 284 inline std::string lowercase(const std::string &str)
 285 {
 286         std::string s2;
 287
 288         s2.reserve(str.size());
 289
 290         for (char i : str)
 291                 s2 += tolower(i);
 292
 293         return s2;
 294 }
 295
 296
 297 /**
 298  * @param str
 299  * @return A copy of \p str with leading and trailing whitespace removed.
 300  */
 301 inline std::string trim(const std::string &str)
 302 {
 303         size_t front = 0;
 304
 305         while (std::isspace(str[front]))
 306                 ++front;
 307
 308         size_t back = str.size();
 309         while (back > front && std::isspace(str[back - 1]))
 310                 --back;
 311
 312         return str.substr(front, back - front);
 313 }
 314
 315
 316 /**
 317  * Returns whether \p str should be regarded as (bool) true.  Case and leading
 318  * and trailing whitespace are ignored.  Values that will return
 319  * true are "y", "yes", "true" and any number that is not 0.
 320  * @param str
 321  */
 322 inline bool is_yes(const std::string &str)
 323 {
 324         std::string s2 = lowercase(trim(str));
 325
 326         return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0;
 327 }
 328
 329
 330 /**
 331  * Converts the string \p str to a signed 32-bit integer. The converted value
 332  * is constrained so that min <= value <= max.
 333  *
 334  * @see atoi(3) for limitations
 335  *
 336  * @param str
 337  * @param min Range minimum
 338  * @param max Range maximum
 339  * @return The value converted to a signed 32-bit integer and constrained
 340  *      within the range defined by min and max (inclusive)
 341  */
 342 inline s32 mystoi(const std::string &str, s32 min, s32 max)
 343 {
 344         s32 i = atoi(str.c_str());
 345
 346         if (i < min)
 347                 i = min;
 348         if (i > max)
 349                 i = max;
 350
 351         return i;
 352 }
 353
 354
 355 // MSVC2010 includes it's own versions of these
 356 //#if !defined(_MSC_VER) || _MSC_VER < 1600
 357
 358
 359 /**
 360  * Returns a 32-bit value reprensented by the string \p str (decimal).
 361  * @see atoi(3) for further limitations
 362  */
 363 inline s32 mystoi(const std::string &str)
 364 {
 365         return atoi(str.c_str());
 366 }
 367
 368
 369 /**
 370  * Returns s 32-bit value represented by the wide string \p str (decimal).
 371  * @see atoi(3) for further limitations
 372  */
 373 inline s32 mystoi(const std::wstring &str)
 374 {
 375         return mystoi(wide_to_narrow(str));
 376 }
 377
 378
 379 /**
 380  * Returns a float reprensented by the string \p str (decimal).
 381  * @see atof(3)
 382  */
 383 inline float mystof(const std::string &str)
 384 {
 385         return atof(str.c_str());
 386 }
 387
 388 //#endif
 389
 390 #define stoi mystoi
 391 #define stof mystof
 392
 393 /// Returns a value represented by the string \p val.
 394 template <typename T>
 395 inline T from_string(const std::string &str)
 396 {
 397         std::stringstream tmp(str);
 398         T t;
 399         tmp >> t;
 400         return t;
 401 }
 402
 403 /// Returns a 64-bit signed value represented by the string \p str (decimal).
 404 inline s64 stoi64(const std::string &str) { return from_string<s64>(str); }
 405
 406 #if __cplusplus < 201103L
 407 namespace std {
 408
 409 /// Returns a string representing the value \p val.
 410 template <typename T>
 411 inline string to_string(T val)
 412 {
 413         ostringstream oss;
 414         oss << val;
 415         return oss.str();
 416 }
 417 #define DEFINE_STD_TOSTRING_FLOATINGPOINT(T)            \
 418         template <>                                     \
 419         inline string to_string<T>(T val)               \
 420         {                                               \
 421                 ostringstream oss;                      \
 422                 oss << std::fixed                       \
 423                         << std::setprecision(6)         \
 424                         << val;                         \
 425                 return oss.str();                       \
 426         }
 427 DEFINE_STD_TOSTRING_FLOATINGPOINT(float)
 428 DEFINE_STD_TOSTRING_FLOATINGPOINT(double)
 429 DEFINE_STD_TOSTRING_FLOATINGPOINT(long double)
 430
 431 #undef DEFINE_STD_TOSTRING_FLOATINGPOINT
 432
 433 /// Returns a wide string representing the value \p val
 434 template <typename T>
 435 inline wstring to_wstring(T val)
 436 {
 437       return utf8_to_wide(to_string(val));
 438 }
 439 }
 440 #endif
 441
 442 /// Returns a string representing the decimal value of the 32-bit value \p i.
 443 inline std::string itos(s32 i) { return std::to_string(i); }
 444 /// Returns a string representing the decimal value of the 64-bit value \p i.
 445 inline std::string i64tos(s64 i) { return std::to_string(i); }
 446
 447 // std::to_string uses the '%.6f' conversion, which is inconsistent with
 448 // std::ostream::operator<<() and impractical too.  ftos() uses the
 449 // more generic and std::ostream::operator<<()-compatible '%G' format.
 450 /// Returns a string representing the decimal value of the float value \p f.
 451 inline std::string ftos(float f)
 452 {
 453         std::ostringstream oss;
 454         oss << f;
 455         return oss.str();
 456 }
 457
 458
 459 /**
 460  * Replace all occurrences of \p pattern in \p str with \p replacement.
 461  *
 462  * @param str String to replace pattern with replacement within.
 463  * @param pattern The pattern to replace.
 464  * @param replacement What to replace the pattern with.
 465  */
 466 inline void str_replace(std::string &str, const std::string &pattern,
 467                 const std::string &replacement)
 468 {
 469         std::string::size_type start = str.find(pattern, 0);
 470         while (start != str.npos) {
 471                 str.replace(start, pattern.size(), replacement);
 472                 start = str.find(pattern, start + replacement.size());
 473         }
 474 }
 475
 476 /**
 477  * Escapes characters [ ] \ , ; that can not be used in formspecs
 478  */
 479 inline void str_formspec_escape(std::string &str)
 480 {
 481         str_replace(str, "\\", "\\\\");
 482         str_replace(str, "]", "\\]");
 483         str_replace(str, "[", "\\[");
 484         str_replace(str, ";", "\\;");
 485         str_replace(str, ",", "\\,");
 486 }
 487
 488 /**
 489  * Replace all occurrences of the character \p from in \p str with \p to.
 490  *
 491  * @param str The string to (potentially) modify.
 492  * @param from The character in str to replace.
 493  * @param to The replacement character.
 494  */
 495 void str_replace(std::string &str, char from, char to);
 496
 497
 498 /**
 499  * Check that a string only contains whitelisted characters. This is the
 500  * opposite of string_allowed_blacklist().
 501  *
 502  * @param str The string to be checked.
 503  * @param allowed_chars A string containing permitted characters.
 504  * @return true if the string is allowed, otherwise false.
 505  *
 506  * @see string_allowed_blacklist()
 507  */
 508 inline bool string_allowed(const std::string &str, const std::string &allowed_chars)
 509 {
 510         return str.find_first_not_of(allowed_chars) == str.npos;
 511 }
 512
 513
 514 /**
 515  * Check that a string contains no blacklisted characters. This is the
 516  * opposite of string_allowed().
 517  *
 518  * @param str The string to be checked.
 519  * @param blacklisted_chars A string containing prohibited characters.
 520  * @return true if the string is allowed, otherwise false.
 521
 522  * @see string_allowed()
 523  */
 524 inline bool string_allowed_blacklist(const std::string &str,
 525                 const std::string &blacklisted_chars)
 526 {
 527         return str.find_first_of(blacklisted_chars) == str.npos;
 528 }
 529
 530
 531 /**
 532  * Create a string based on \p from where a newline is forcefully inserted
 533  * every \p row_len characters.
 534  *
 535  * @note This function does not honour word wraps and blindy inserts a newline
 536  *      every \p row_len characters whether it breaks a word or not.  It is
 537  *      intended to be used for, for example, showing paths in the GUI.
 538  *
 539  * @note This function doesn't wrap inside utf-8 multibyte sequences and also
 540  *      counts multibyte sequences correcly as single characters.
 541  *
 542  * @param from The (utf-8) string to be wrapped into rows.
 543  * @param row_len The row length (in characters).
 544  * @return A new string with the wrapping applied.
 545  */
 546 inline std::string wrap_rows(const std::string &from,
 547                 unsigned row_len)
 548 {
 549         std::string to;
 550
 551         size_t character_idx = 0;
 552         for (size_t i = 0; i < from.size(); i++) {
 553                 if (!IS_UTF8_MULTB_INNER(from[i])) {
 554                         // Wrap string after last inner byte of char
 555                         if (character_idx > 0 && character_idx % row_len == 0)
 556                                 to += '\n';
 557                         character_idx++;
 558                 }
 559                 to += from[i];
 560         }
 561
 562         return to;
 563 }
 564
 565
 566 /**
 567  * Removes backslashes from an escaped string (FormSpec strings)
 568  */
 569 template <typename T>
 570 inline std::basic_string<T> unescape_string(const std::basic_string<T> &s)
 571 {
 572         std::basic_string<T> res;
 573
 574         for (size_t i = 0; i < s.length(); i++) {
 575                 if (s[i] == '\\') {
 576                         i++;
 577                         if (i >= s.length())
 578                                 break;
 579                 }
 580                 res += s[i];
 581         }
 582
 583         return res;
 584 }
 585
 586 /**
 587  * Remove all escape sequences in \p s.
 588  *
 589  * @param s The string in which to remove escape sequences.
 590  * @return \p s, with escape sequences removed.
 591  */
 592 template <typename T>
 593 std::basic_string<T> unescape_enriched(const std::basic_string<T> &s)
 594 {
 595         std::basic_string<T> output;
 596         size_t i = 0;
 597         while (i < s.length()) {
 598                 if (s[i] == '\x1b') {
 599                         ++i;
 600                         if (i == s.length()) continue;
 601                         if (s[i] == '(') {
 602                                 ++i;
 603                                 while (i < s.length() && s[i] != ')') {
 604                                         if (s[i] == '\\') {
 605                                                 ++i;
 606                                         }
 607                                         ++i;
 608                                 }
 609                                 ++i;
 610                         } else {
 611                                 ++i;
 612                         }
 613                         continue;
 614                 }
 615                 output += s[i];
 616                 ++i;
 617         }
 618         return output;
 619 }
 620
 621 template <typename T>
 622 std::vector<std::basic_string<T> > split(const std::basic_string<T> &s, T delim)
 623 {
 624         std::vector<std::basic_string<T> > tokens;
 625
 626         std::basic_string<T> current;
 627         bool last_was_escape = false;
 628         for (size_t i = 0; i < s.length(); i++) {
 629                 T si = s[i];
 630                 if (last_was_escape) {
 631                         current += '\\';
 632                         current += si;
 633                         last_was_escape = false;
 634                 } else {
 635                         if (si == delim) {
 636                                 tokens.push_back(current);
 637                                 current = std::basic_string<T>();
 638                                 last_was_escape = false;
 639                         } else if (si == '\\') {
 640                                 last_was_escape = true;
 641                         } else {
 642                                 current += si;
 643                                 last_was_escape = false;
 644                         }
 645                 }
 646         }
 647         //push last element
 648         tokens.push_back(current);
 649
 650         return tokens;
 651 }
 652
 653 std::wstring translate_string(const std::wstring &s);
 654
 655 inline std::wstring unescape_translate(const std::wstring &s) {
 656         return unescape_enriched(translate_string(s));
 657 }
 658
 659 /**
 660  * Checks that all characters in \p to_check are a decimal digits.
 661  *
 662  * @param to_check
 663  * @return true if to_check is not empty and all characters in to_check are
 664  *      decimal digits, otherwise false
 665  */
 666 inline bool is_number(const std::string &to_check)
 667 {
 668         for (char i : to_check)
 669                 if (!std::isdigit(i))
 670                         return false;
 671
 672         return !to_check.empty();
 673 }
 674
 675
 676 /**
 677  * Returns a C-string, either "true" or "false", corresponding to \p val.
 678  *
 679  * @return If \p val is true, then "true" is returned, otherwise "false".
 680  */
 681 inline const char *bool_to_cstr(bool val)
 682 {
 683         return val ? "true" : "false";
 684 }
 685
 686 inline const std::string duration_to_string(int sec)
 687 {
 688         int min = sec / 60;
 689         sec %= 60;
 690         int hour = min / 60;
 691         min %= 60;
 692
 693         std::stringstream ss;
 694         if (hour > 0) {
 695                 ss << hour << "h ";
 696         }
 697
 698         if (min > 0) {
 699                 ss << min << "m ";
 700         }
 701
 702         if (sec > 0) {
 703                 ss << sec << "s ";
 704         }
 705
 706         return ss.str();
 707 }
 708
 709 /**
 710  * Joins a vector of strings by the string \p delimiter.
 711  *
 712  * @return A std::string
 713  */
 714 inline std::string str_join(const std::vector<std::string> &list,
 715                 const std::string &delimiter)
 716 {
 717         std::ostringstream oss;
 718         bool first = true;
 719         for (const auto &part : list) {
 720                 if (!first)
 721                         oss << delimiter;
 722                 oss << part;
 723                 first = false;
 724         }
 725         return oss.str();
 726 }
 727
 728 /**
 729  * Create a UTF8 std::string from a irr::core::stringw.
 730  */
 731 inline std::string stringw_to_utf8(const irr::core::stringw &input)
 732 {
 733         std::wstring str(input.c_str());
 734         return wide_to_utf8(str);
 735 }
 736
 737  /**
 738   * Create a irr::core:stringw from a UTF8 std::string.
 739   */
 740 inline irr::core::stringw utf8_to_stringw(const std::string &input)
 741 {
 742         std::wstring str = utf8_to_wide(input);
 743         return irr::core::stringw(str.c_str());
 744 }