X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Futil%2Fstring.h;h=0d2a6bdb2cbcf6e70008da71ad0380bc5c53a2fd;hb=68f9263a24a345435d2310ab559ce8a811ef0427;hp=f4337062e0adff6799f89c504ce45783f39d27db;hpb=43bf4324d5f639f338f88a599fe862630f85f787;p=dragonfireclient.git diff --git a/src/util/string.h b/src/util/string.h index f4337062e..0d2a6bdb2 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -17,73 +17,114 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef UTIL_STRING_HEADER -#define UTIL_STRING_HEADER +#pragma once #include "irrlichttypes_bloated.h" -#include +#include "irrString.h" +#include #include #include #include +#include #include +#include #include +#include #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) +// Checks whether a value is an ASCII printable character +#define IS_ASCII_PRINTABLE_CHAR(x) \ + (((unsigned int)(x) >= 0x20) && \ + ( (unsigned int)(x) <= 0x7e)) + +// Checks whether a byte is an inner byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_INNER(x) \ + (((unsigned char)(x) >= 0x80) && \ + ( (unsigned char)(x) <= 0xbf)) + +// Checks whether a byte is a start byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_START(x) \ + (((unsigned char)(x) >= 0xc2) && \ + ( (unsigned char)(x) <= 0xf4)) + +// Given a start byte x for an utf-8 multibyte sequence +// it gives the length of the whole sequence in bytes. +#define UTF8_MULTB_START_LEN(x) \ + (((unsigned char)(x) < 0xe0) ? 2 : \ + (((unsigned char)(x) < 0xf0) ? 3 : 4)) + +typedef std::unordered_map StringMap; + struct FlagDesc { const char *name; u32 flag; }; -std::wstring narrow_to_wide(const std::string& mbs); -std::string wide_to_narrow(const std::wstring& wcs); -std::string translatePassword(std::string playername, std::wstring password); -std::string urlencode(std::string str); -std::string urldecode(std::string str); +// try not to convert between wide/utf8 encodings; this can result in data loss +// try to only convert between them when you need to input/output stuff via Irrlicht +std::wstring utf8_to_wide(const std::string &input); +std::string wide_to_utf8(const std::wstring &input); + +wchar_t *utf8_to_wide_c(const char *str); + +// NEVER use those two functions unless you have a VERY GOOD reason to +// they just convert between wide and multibyte encoding +// multibyte encoding depends on current locale, this is no good, especially on Windows + +// You must free the returned string! +// The returned string is allocated using new +wchar_t *narrow_to_wide_c(const char *str); +std::wstring narrow_to_wide(const std::string &mbs); +std::string wide_to_narrow(const std::wstring &wcs); + +std::string urlencode(const std::string &str); +std::string urldecode(const std::string &str); u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask); std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask); size_t mystrlcpy(char *dst, const char *src, size_t size); char *mystrtok_r(char *s, const char *sep, char **lasts); u64 read_seed(const char *str); -bool parseColorString(const std::string &value, video::SColor &color, bool quiet); +bool parseColorString(const std::string &value, video::SColor &color, bool quiet, + unsigned char default_alpha = 0xff); /** - * Returns a copy of s with spaces inserted at the right hand side to ensure - * that the string is len characters in length. If s is <= len then the - * returned string will be identical to s. + * Returns a copy of \p str with spaces inserted at the right hand side to ensure + * that the string is \p len characters in length. If \p str is <= \p len then the + * returned string will be identical to str. */ -static inline std::string padStringRight(std::string s, size_t len) +inline std::string padStringRight(std::string str, size_t len) { - if (len > s.size()) - s.insert(s.end(), len - s.size(), ' '); + if (len > str.size()) + str.insert(str.end(), len - str.size(), ' '); - return s; + return str; } - /** - * Returns a version of the string s with the first occurrence of a string + * Returns a version of \p str with the first occurrence of a string * contained within ends[] removed from the end of the string. * - * @param s + * @param str * @param ends A NULL- or ""- terminated array of strings to remove from s in - * the copy produced. Note that once one of these strings is removed - * that no further postfixes contained within this array are removed. + * the copy produced. Note that once one of these strings is removed + * that no further postfixes contained within this array are removed. * - * @return If no end could be removed then "" is returned + * @return If no end could be removed then "" is returned. */ -static inline std::string removeStringEnd(const std::string &s, const char *ends[]) +inline std::string removeStringEnd(const std::string &str, + const char *ends[]) { const char **p = ends; for (; *p && (*p)[0] != '\0'; p++) { std::string end = *p; - if(s.size() < end.size()) + if (str.size() < end.size()) continue; - if(s.substr(s.size()-end.size(), end.size()) == end) - return s.substr(0, s.size() - end.size()); + if (str.compare(str.size() - end.size(), end.size(), end) == 0) + return str.substr(0, str.size() - end.size()); } return ""; @@ -91,94 +132,143 @@ static inline std::string removeStringEnd(const std::string &s, const char *ends /** - * Check two wide strings for equivalence. If case_insensitive is true - * then the case of the strings are ignored (default is false). + * Check two strings for equivalence. If \p case_insensitive is true + * then the case of the strings is ignored (default is false). * * @param s1 * @param s2 * @param case_insensitive * @return true if the strings match */ -inline bool str_equal(const std::wstring &s1, const std::wstring &s2, +template +inline bool str_equal(const std::basic_string &s1, + const std::basic_string &s2, bool case_insensitive = false) { - if (case_insensitive) { - if (s1.size() != s2.size()) - return false; + if (!case_insensitive) + return s1 == s2; - for (size_t i = 0; i < s1.size(); ++i) - if(tolower(s1[i]) != tolower(s2[i])) - return false; + if (s1.size() != s2.size()) + return false; - return true; - } + for (size_t i = 0; i < s1.size(); ++i) + if(tolower(s1[i]) != tolower(s2[i])) + return false; - return s1 == s2; + return true; } /** - * Check whether str begins with the string prefix. If the argument - * case_insensitive == true then the check is case insensitve (default - * is false; i.e. case is significant). + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). * * @param str * @param prefix * @param case_insensitive - * @return true if the str begins with prefix + * @return true if the str begins with prefix */ -inline bool str_starts_with(const std::wstring &str, const std::wstring &prefix, +template +inline bool str_starts_with(const std::basic_string &str, + const std::basic_string &prefix, bool case_insensitive = false) { if (str.size() < prefix.size()) return false; - if (case_insensitive) { - for (size_t i = 0; i < prefix.size(); ++i) - if (tolower(str[i]) != tolower(prefix[i])) - return false; - } else { - for (size_t i = 0; i < prefix.size(); ++i) - if (str[i] != prefix[i]) - return false; - } + if (!case_insensitive) + return str.compare(0, prefix.size(), prefix) == 0; + for (size_t i = 0; i < prefix.size(); ++i) + if (tolower(str[i]) != tolower(prefix[i])) + return false; return true; } +/** + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param prefix + * @param case_insensitive + * @return true if the str begins with prefix + */ +template +inline bool str_starts_with(const std::basic_string &str, + const T *prefix, + bool case_insensitive = false) +{ + return str_starts_with(str, std::basic_string(prefix), + case_insensitive); +} + /** - * Splits a string of wide characters into its component parts separated by - * the character delimiter. + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). * - * @return a std::vector of the component parts + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix */ -inline std::vector str_split(const std::wstring &str, - wchar_t delimiter) +template +inline bool str_ends_with(const std::basic_string &str, + const std::basic_string &suffix, + bool case_insensitive = false) { - std::vector parts; - std::wstringstream sstr(str); - std::wstring part; + if (str.size() < suffix.size()) + return false; - while (std::getline(sstr, part, delimiter)) - parts.push_back(part); + size_t start = str.size() - suffix.size(); + if (!case_insensitive) + return str.compare(start, suffix.size(), suffix) == 0; - return parts; + for (size_t i = 0; i < suffix.size(); ++i) + if (tolower(str[start + i]) != tolower(suffix[i])) + return false; + return true; } /** - * Splits a string into its component parts separated by the character - * delimiter. + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). * - * @return a std::vector of the component parts + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix */ +template +inline bool str_ends_with(const std::basic_string &str, + const T *suffix, + bool case_insensitive = false) +{ + return str_ends_with(str, std::basic_string(suffix), + case_insensitive); +} -inline std::vector str_split(const std::string &str, char delimiter) { - std::vector parts; - std::stringstream sstr(str); - std::string part; +/** + * Splits a string into its component parts separated by the character + * \p delimiter. + * + * @return An std::vector > of the component parts + */ +template +inline std::vector > str_split( + const std::basic_string &str, + T delimiter) +{ + std::vector > parts; + std::basic_stringstream sstr(str); + std::basic_string part; while (std::getline(sstr, part, delimiter)) parts.push_back(part); @@ -188,70 +278,70 @@ inline std::vector str_split(const std::string &str, char delimiter /** - * Return a copy of s converted to all lowercase characters - * @param s + * @param str + * @return A copy of \p str converted to all lowercase characters. */ -inline std::string lowercase(const std::string &s) +inline std::string lowercase(const std::string &str) { std::string s2; - s2.reserve(s.size()); + s2.reserve(str.size()); - for (size_t i = 0; i < s.size(); i++) - s2 += tolower(s[i]); + for (char i : str) + s2 += tolower(i); return s2; } /** - * Returns a copy of s with leading and trailing whitespace removed. - * @param s + * @param str + * @return A copy of \p str with leading and trailing whitespace removed. */ -inline std::string trim(const std::string &s) +inline std::string trim(const std::string &str) { size_t front = 0; - while (isspace(s[front])) + while (std::isspace(str[front])) ++front; - size_t back = s.size(); - while (back > front && isspace(s[back-1])) + size_t back = str.size(); + while (back > front && std::isspace(str[back - 1])) --back; - return s.substr(front, back - front); + return str.substr(front, back - front); } /** - * Returns true if s should be regarded as (bool) true. Leading and trailing - * whitespace are ignored; case is ignored. Values that will return - * true are "y", "n", "true" and any number that != 0. - * @param s + * Returns whether \p str should be regarded as (bool) true. Case and leading + * and trailing whitespace are ignored. Values that will return + * true are "y", "yes", "true" and any number that is not 0. + * @param str */ -inline bool is_yes(const std::string &s) +inline bool is_yes(const std::string &str) { - std::string s2 = lowercase(trim(s)); + std::string s2 = lowercase(trim(str)); return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0; } /** - * Converts the string s to a signed 32-bit integer. The converted value is - * constrained so that min <= value <= max. + * Converts the string \p str to a signed 32-bit integer. The converted value + * is constrained so that min <= value <= max. * * @see atoi(3) for limitations * - * @param s + * @param str * @param min Range minimum * @param max Range maximum * @return The value converted to a signed 32-bit integer and constrained - * within the range defined by min and max (inclusive) + * within the range defined by min and max (inclusive) */ -inline s32 mystoi(const std::string &s, s32 min, s32 max) +inline s32 mystoi(const std::string &str, s32 min, s32 max) { - s32 i = atoi(s.c_str()); + s32 i = atoi(str.c_str()); if (i < min) i = min; @@ -262,57 +352,37 @@ inline s32 mystoi(const std::string &s, s32 min, s32 max) } -/** - * Returns a 64-bit value reprensented by the string s (decimal). - */ -inline s64 stoi64(const std::string &s) -{ - std::stringstream tmp(s); - s64 t; - tmp >> t; - return t; -} - // MSVC2010 includes it's own versions of these //#if !defined(_MSC_VER) || _MSC_VER < 1600 /** - * Returns a 32-bit value reprensented by the string s (decimal). - * + * Returns a 32-bit value reprensented by the string \p str (decimal). * @see atoi(3) for further limitations */ -inline s32 mystoi(const std::string &s) +inline s32 mystoi(const std::string &str) { - return atoi(s.c_str()); + return atoi(str.c_str()); } /** - * Returns a 32-bit value reprensented by the wide string s (decimal). - * + * Returns s 32-bit value represented by the wide string \p str (decimal). * @see atoi(3) for further limitations */ -inline s32 mystoi(const std::wstring &s) +inline s32 mystoi(const std::wstring &str) { - return atoi(wide_to_narrow(s).c_str()); + return mystoi(wide_to_narrow(str)); } /** - * Returns a float reprensented by the string s (decimal). - * + * Returns a float reprensented by the string \p str (decimal). * @see atof(3) */ -inline float mystof(const std::string &s) +inline float mystof(const std::string &str) { - // This crap causes a segfault in certain cases on MinGW - /*float f; - std::istringstream ss(s); - ss>>f; - return f;*/ - // This works in that case - return atof(s.c_str()); + return atof(str.c_str()); } //#endif @@ -320,89 +390,124 @@ inline float mystof(const std::string &s) #define stoi mystoi #define stof mystof +/// Returns a value represented by the string \p val. +template +inline T from_string(const std::string &str) +{ + std::stringstream tmp(str); + T t; + tmp >> t; + return t; +} -/** - * Returns a string representing the decimal value of the 32-bit value i - */ -inline std::string itos(s32 i) +/// Returns a 64-bit signed value represented by the string \p str (decimal). +inline s64 stoi64(const std::string &str) { return from_string(str); } + +#if __cplusplus < 201103L +namespace std { + +/// Returns a string representing the value \p val. +template +inline string to_string(T val) { - std::ostringstream o; - o << i; - return o.str(); + ostringstream oss; + oss << val; + return oss.str(); } +#define DEFINE_STD_TOSTRING_FLOATINGPOINT(T) \ + template <> \ + inline string to_string(T val) \ + { \ + ostringstream oss; \ + oss << std::fixed \ + << std::setprecision(6) \ + << val; \ + return oss.str(); \ + } +DEFINE_STD_TOSTRING_FLOATINGPOINT(float) +DEFINE_STD_TOSTRING_FLOATINGPOINT(double) +DEFINE_STD_TOSTRING_FLOATINGPOINT(long double) +#undef DEFINE_STD_TOSTRING_FLOATINGPOINT -/** - * Returns a string representing the decimal value of i of the 64-bit value i - */ -inline std::string i64tos(s64 i) { - std::ostringstream o; - o << i; - return o.str(); +/// Returns a wide string representing the value \p val +template +inline wstring to_wstring(T val) +{ + return utf8_to_wide(to_string(val)); +} } +#endif +/// Returns a string representing the decimal value of the 32-bit value \p i. +inline std::string itos(s32 i) { return std::to_string(i); } +/// Returns a string representing the decimal value of the 64-bit value \p i. +inline std::string i64tos(s64 i) { return std::to_string(i); } -/** - * Returns a string representing the real number (decimal) float value i - */ +// std::to_string uses the '%.6f' conversion, which is inconsistent with +// std::ostream::operator<<() and impractical too. ftos() uses the +// more generic and std::ostream::operator<<()-compatible '%G' format. +/// Returns a string representing the decimal value of the float value \p f. inline std::string ftos(float f) { - std::ostringstream o; - o << f; - return o.str(); + std::ostringstream oss; + oss << f; + return oss.str(); } /** - * Replace all occurrences of pattern in str with replacement + * Replace all occurrences of \p pattern in \p str with \p replacement. * - * @param str String to replace pattern with replacement within - * @param pattern The pattern to replace - * @param replacement What to replace the pattern with + * @param str String to replace pattern with replacement within. + * @param pattern The pattern to replace. + * @param replacement What to replace the pattern with. */ -inline void str_replace(std::string &str, std::string const &pattern, - std::string const &replacement) +inline void str_replace(std::string &str, const std::string &pattern, + const std::string &replacement) { std::string::size_type start = str.find(pattern, 0); while (start != str.npos) { str.replace(start, pattern.size(), replacement); - start = str.find(pattern, start+replacement.size()); + start = str.find(pattern, start + replacement.size()); } } - /** - * Replace all occurrances of the character from in str with to. - * - * @param str The string to (potentially) modify - * @param from The character in str to replace - * @param to The replacement character + * Escapes characters [ ] \ , ; that can not be used in formspecs */ -inline void str_replace_char(std::string &str, char from, char to) +inline void str_formspec_escape(std::string &str) { - for (size_t i = 0; i < str.size(); i++) - if (str[i] == from) - str[i] = to; + str_replace(str, "\\", "\\\\"); + str_replace(str, "]", "\\]"); + str_replace(str, "[", "\\["); + str_replace(str, ";", "\\;"); + str_replace(str, ",", "\\,"); } +/** + * Replace all occurrences of the character \p from in \p str with \p to. + * + * @param str The string to (potentially) modify. + * @param from The character in str to replace. + * @param to The replacement character. + */ +void str_replace(std::string &str, char from, char to); + /** * Check that a string only contains whitelisted characters. This is the * opposite of string_allowed_blacklist(). * - * @param s The string to be checked. + * @param str The string to be checked. * @param allowed_chars A string containing permitted characters. * @return true if the string is allowed, otherwise false. * * @see string_allowed_blacklist() */ -inline bool string_allowed(const std::string &s, const std::string &allowed_chars) +inline bool string_allowed(const std::string &str, const std::string &allowed_chars) { - for (size_t i = 0; i < s.size(); i++) - if (allowed_chars.find(s[i]) == std::string::npos) - return false; - - return true; + return str.find_first_not_of(allowed_chars) == str.npos; } @@ -410,42 +515,47 @@ inline bool string_allowed(const std::string &s, const std::string &allowed_char * Check that a string contains no blacklisted characters. This is the * opposite of string_allowed(). * - * @param s The string to be checked. + * @param str The string to be checked. * @param blacklisted_chars A string containing prohibited characters. * @return true if the string is allowed, otherwise false. * @see string_allowed() */ -inline bool string_allowed_blacklist(const std::string &s, +inline bool string_allowed_blacklist(const std::string &str, const std::string &blacklisted_chars) { - for (size_t i = 0; i < s.size(); i++) - if (blacklisted_chars.find(s[i]) != std::string::npos) - return false; - - return true; + return str.find_first_of(blacklisted_chars) == str.npos; } /** - * Create a string based on 'from' where a newline is forcefully inserted every - * 'rowlen' characters. + * Create a string based on \p from where a newline is forcefully inserted + * every \p row_len characters. * * @note This function does not honour word wraps and blindy inserts a newline - * every rowlen characters whether it breaks a word or not. It is - * intended to be used, for example, showing paths in the GUI + * every \p row_len characters whether it breaks a word or not. It is + * intended to be used for, for example, showing paths in the GUI. * - * @param from The string to be wrapped into rows. - * @param rowlen The row length (in characters). + * @note This function doesn't wrap inside utf-8 multibyte sequences and also + * counts multibyte sequences correcly as single characters. + * + * @param from The (utf-8) string to be wrapped into rows. + * @param row_len The row length (in characters). * @return A new string with the wrapping applied. */ -inline std::string wrap_rows(const std::string &from, u32 rowlen) +inline std::string wrap_rows(const std::string &from, + unsigned row_len) { std::string to; + size_t character_idx = 0; for (size_t i = 0; i < from.size(); i++) { - if(i != 0 && i % rowlen == 0) - to += '\n'; + if (!IS_UTF8_MULTB_INNER(from[i])) { + // Wrap string after last inner byte of char + if (character_idx > 0 && character_idx % row_len == 0) + to += '\n'; + character_idx++; + } to += from[i]; } @@ -454,49 +564,181 @@ inline std::string wrap_rows(const std::string &from, u32 rowlen) /** - * Removes all \\ from a string that had been escaped (FormSpec strings) - * + * Removes backslashes from an escaped string (FormSpec strings) */ -inline std::string unescape_string(std::string &s) +template +inline std::basic_string unescape_string(const std::basic_string &s) { - std::string res; - + std::basic_string res; + for (size_t i = 0; i < s.length(); i++) { - if (s[i] == '\\') + if (s[i] == '\\') { i++; + if (i >= s.length()) + break; + } res += s[i]; } - + return res; } +/** + * Remove all escape sequences in \p s. + * + * @param s The string in which to remove escape sequences. + * @return \p s, with escape sequences removed. + */ +template +std::basic_string unescape_enriched(const std::basic_string &s) +{ + std::basic_string output; + size_t i = 0; + while (i < s.length()) { + if (s[i] == '\x1b') { + ++i; + if (i == s.length()) continue; + if (s[i] == '(') { + ++i; + while (i < s.length() && s[i] != ')') { + if (s[i] == '\\') { + ++i; + } + ++i; + } + ++i; + } else { + ++i; + } + continue; + } + output += s[i]; + ++i; + } + return output; +} + +template +std::vector > split(const std::basic_string &s, T delim) +{ + std::vector > tokens; + + std::basic_string current; + bool last_was_escape = false; + for (size_t i = 0; i < s.length(); i++) { + T si = s[i]; + if (last_was_escape) { + current += '\\'; + current += si; + last_was_escape = false; + } else { + if (si == delim) { + tokens.push_back(current); + current = std::basic_string(); + last_was_escape = false; + } else if (si == '\\') { + last_was_escape = true; + } else { + current += si; + last_was_escape = false; + } + } + } + //push last element + tokens.push_back(current); + + return tokens; +} + +std::wstring translate_string(const std::wstring &s); + +inline std::wstring unescape_translate(const std::wstring &s) { + return unescape_enriched(translate_string(s)); +} /** - * Checks that all characters in tocheck are a decimal digits + * Checks that all characters in \p to_check are a decimal digits. * - * @param tocheck - * @return true if tockcheck is not empty and all characters in tocheck are - * decimal digits, otherwise false + * @param to_check + * @return true if to_check is not empty and all characters in to_check are + * decimal digits, otherwise false */ -inline bool is_number(const std::string &tocheck) +inline bool is_number(const std::string &to_check) { - for (size_t i = 0; i < tocheck.size(); i++) - if (!isdigit(tocheck[i])) - return false; + for (char i : to_check) + if (!std::isdigit(i)) + return false; - return !tocheck.empty(); + return !to_check.empty(); } /** - * Returns a C-string, either "true" or "false", corresponding to v + * Returns a C-string, either "true" or "false", corresponding to \p val. * - * @return If v == true, then "true" is returned, otherwise "false" + * @return If \p val is true, then "true" is returned, otherwise "false". */ -inline const char *bool_to_cstr(bool v) +inline const char *bool_to_cstr(bool val) { - return v ? "true" : "false"; + return val ? "true" : "false"; } +inline const std::string duration_to_string(int sec) +{ + int min = sec / 60; + sec %= 60; + int hour = min / 60; + min %= 60; + + std::stringstream ss; + if (hour > 0) { + ss << hour << "h "; + } -#endif + if (min > 0) { + ss << min << "m "; + } + + if (sec > 0) { + ss << sec << "s "; + } + + return ss.str(); +} + +/** + * Joins a vector of strings by the string \p delimiter. + * + * @return A std::string + */ +inline std::string str_join(const std::vector &list, + const std::string &delimiter) +{ + std::ostringstream oss; + bool first = true; + for (const auto &part : list) { + if (!first) + oss << delimiter; + oss << part; + first = false; + } + return oss.str(); +} + +/** + * Create a UTF8 std::string from a irr::core::stringw. + */ +inline std::string stringw_to_utf8(const irr::core::stringw &input) +{ + std::wstring str(input.c_str()); + return wide_to_utf8(str); +} + + /** + * Create a irr::core:stringw from a UTF8 std::string. + */ +inline irr::core::stringw utf8_to_stringw(const std::string &input) +{ + std::wstring str = utf8_to_wide(input); + return irr::core::stringw(str.c_str()); +}