X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Futil%2Fstring.h;h=6fd11fadce21e65d0498f2e28a56b91a65df1d3d;hb=79414aa3e5591fdaffa0956a08610a2228042941;hp=4aeea17dbb7e5bd6c6bfea66ccb10b316ee35354;hpb=ebf7ea50193afe5a3d0968b87b4743eb1ee84025;p=minetest.git diff --git a/src/util/string.h b/src/util/string.h index 4aeea17db..6fd11fadc 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -17,184 +17,374 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef UTIL_STRING_HEADER -#define UTIL_STRING_HEADER +#pragma once -#include "../irrlichttypes.h" -#include +#include "irrlichttypes_bloated.h" +#include "irrString.h" +#include #include #include #include +#include #include +#include +#include +#include + +class Translations; #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) +// Checks whether a value is an ASCII printable character +#define IS_ASCII_PRINTABLE_CHAR(x) \ + (((unsigned int)(x) >= 0x20) && \ + ( (unsigned int)(x) <= 0x7e)) + +// Checks whether a byte is an inner byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_INNER(x) \ + (((unsigned char)(x) >= 0x80) && \ + ( (unsigned char)(x) <= 0xbf)) + +// Checks whether a byte is a start byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_START(x) \ + (((unsigned char)(x) >= 0xc2) && \ + ( (unsigned char)(x) <= 0xf4)) + +// Given a start byte x for an utf-8 multibyte sequence +// it gives the length of the whole sequence in bytes. +#define UTF8_MULTB_START_LEN(x) \ + (((unsigned char)(x) < 0xe0) ? 2 : \ + (((unsigned char)(x) < 0xf0) ? 3 : 4)) + +typedef std::unordered_map StringMap; + struct FlagDesc { const char *name; u32 flag; }; -std::wstring narrow_to_wide(const std::string& mbs); -std::string wide_to_narrow(const std::wstring& wcs); +// try not to convert between wide/utf8 encodings; this can result in data loss +// try to only convert between them when you need to input/output stuff via Irrlicht +std::wstring utf8_to_wide(const std::string &input); +std::string wide_to_utf8(const std::wstring &input); -static inline std::string padStringRight(std::string s, size_t len) +wchar_t *utf8_to_wide_c(const char *str); + +// NEVER use those two functions unless you have a VERY GOOD reason to +// they just convert between wide and multibyte encoding +// multibyte encoding depends on current locale, this is no good, especially on Windows + +// You must free the returned string! +// The returned string is allocated using new +wchar_t *narrow_to_wide_c(const char *str); +std::wstring narrow_to_wide(const std::string &mbs); +std::string wide_to_narrow(const std::wstring &wcs); + +std::string urlencode(const std::string &str); +std::string urldecode(const std::string &str); +u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask); +std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask); +size_t mystrlcpy(char *dst, const char *src, size_t size); +char *mystrtok_r(char *s, const char *sep, char **lasts); +u64 read_seed(const char *str); +bool parseColorString(const std::string &value, video::SColor &color, bool quiet, + unsigned char default_alpha = 0xff); + + +/** + * Returns a copy of \p str with spaces inserted at the right hand side to ensure + * that the string is \p len characters in length. If \p str is <= \p len then the + * returned string will be identical to str. + */ +inline std::string padStringRight(std::string str, size_t len) { - if(len > s.size()) - s.insert(s.end(), len - s.size(), ' '); - return s; + if (len > str.size()) + str.insert(str.end(), len - str.size(), ' '); + + return str; } -// ends: NULL- or ""-terminated array of strings -// Returns "" if no end could be removed. -static inline std::string removeStringEnd(const std::string &s, const char *ends[]) +/** + * Returns a version of \p str with the first occurrence of a string + * contained within ends[] removed from the end of the string. + * + * @param str + * @param ends A NULL- or ""- terminated array of strings to remove from s in + * the copy produced. Note that once one of these strings is removed + * that no further postfixes contained within this array are removed. + * + * @return If no end could be removed then "" is returned. + */ +inline std::string removeStringEnd(const std::string &str, + const char *ends[]) { const char **p = ends; - for(; (*p) && (*p)[0] != '\0'; p++){ + + for (; *p && (*p)[0] != '\0'; p++) { std::string end = *p; - if(s.size() < end.size()) + if (str.size() < end.size()) continue; - if(s.substr(s.size()-end.size(), end.size()) == end) - return s.substr(0, s.size() - end.size()); + if (str.compare(str.size() - end.size(), end.size(), end) == 0) + return str.substr(0, str.size() - end.size()); } + return ""; } -// Tests if two strings are equal, optionally case insensitive -inline bool str_equal(const std::wstring& s1, const std::wstring& s2, + +/** + * Check two strings for equivalence. If \p case_insensitive is true + * then the case of the strings is ignored (default is false). + * + * @param s1 + * @param s2 + * @param case_insensitive + * @return true if the strings match + */ +template +inline bool str_equal(const std::basic_string &s1, + const std::basic_string &s2, bool case_insensitive = false) { - if(case_insensitive) - { - if(s1.size() != s2.size()) - return false; - for(size_t i = 0; i < s1.size(); ++i) - if(tolower(s1[i]) != tolower(s2[i])) - return false; - return true; - } - else - { + if (!case_insensitive) return s1 == s2; - } + + if (s1.size() != s2.size()) + return false; + + for (size_t i = 0; i < s1.size(); ++i) + if(tolower(s1[i]) != tolower(s2[i])) + return false; + + return true; } -// Tests if the second string is a prefix of the first, optionally case insensitive -inline bool str_starts_with(const std::wstring& str, const std::wstring& prefix, + +/** + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param prefix + * @param case_insensitive + * @return true if the str begins with prefix + */ +template +inline bool str_starts_with(const std::basic_string &str, + const std::basic_string &prefix, bool case_insensitive = false) { - if(str.size() < prefix.size()) + if (str.size() < prefix.size()) return false; - if(case_insensitive) - { - for(size_t i = 0; i < prefix.size(); ++i) - if(tolower(str[i]) != tolower(prefix[i])) - return false; - } - else - { - for(size_t i = 0; i < prefix.size(); ++i) - if(str[i] != prefix[i]) - return false; - } + + if (!case_insensitive) + return str.compare(0, prefix.size(), prefix) == 0; + + for (size_t i = 0; i < prefix.size(); ++i) + if (tolower(str[i]) != tolower(prefix[i])) + return false; return true; } -// Split a string using the given delimiter. Returns a vector containing -// the component parts. -inline std::vector str_split(const std::wstring &str, wchar_t delimiter) +/** + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param prefix + * @param case_insensitive + * @return true if the str begins with prefix + */ +template +inline bool str_starts_with(const std::basic_string &str, + const T *prefix, + bool case_insensitive = false) +{ + return str_starts_with(str, std::basic_string(prefix), + case_insensitive); +} + + +/** + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix + */ +template +inline bool str_ends_with(const std::basic_string &str, + const std::basic_string &suffix, + bool case_insensitive = false) { - std::vector parts; - std::wstringstream sstr(str); - std::wstring part; - while(std::getline(sstr, part, delimiter)) + if (str.size() < suffix.size()) + return false; + + size_t start = str.size() - suffix.size(); + if (!case_insensitive) + return str.compare(start, suffix.size(), suffix) == 0; + + for (size_t i = 0; i < suffix.size(); ++i) + if (tolower(str[start + i]) != tolower(suffix[i])) + return false; + return true; +} + + +/** + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix + */ +template +inline bool str_ends_with(const std::basic_string &str, + const T *suffix, + bool case_insensitive = false) +{ + return str_ends_with(str, std::basic_string(suffix), + case_insensitive); +} + + +/** + * Splits a string into its component parts separated by the character + * \p delimiter. + * + * @return An std::vector > of the component parts + */ +template +inline std::vector > str_split( + const std::basic_string &str, + T delimiter) +{ + std::vector > parts; + std::basic_stringstream sstr(str); + std::basic_string part; + + while (std::getline(sstr, part, delimiter)) parts.push_back(part); + return parts; } -inline std::string lowercase(const std::string &s) + +/** + * @param str + * @return A copy of \p str converted to all lowercase characters. + */ +inline std::string lowercase(const std::string &str) { std::string s2; - for(size_t i=0; i= 'A' && c <= 'Z') - c -= 'A' - 'a'; - s2 += c; - } + + s2.reserve(str.size()); + + for (char i : str) + s2 += tolower(i); + return s2; } -inline std::string trim(const std::string &s) + +/** + * @param str + * @return A copy of \p str with leading and trailing whitespace removed. + */ +inline std::string trim(const std::string &str) { size_t front = 0; - while(s[front] == ' ' || - s[front] == '\t' || - s[front] == '\r' || - s[front] == '\n' - ) + + while (std::isspace(str[front])) ++front; - size_t back = s.size(); - while(back > front && - (s[back-1] == ' ' || - s[back-1] == '\t' || - s[back-1] == '\r' || - s[back-1] == '\n' - ) - ) + size_t back = str.size(); + while (back > front && std::isspace(str[back - 1])) --back; - return s.substr(front, back - front); + return str.substr(front, back - front); } -inline bool is_yes(const std::string &s) + +/** + * Returns whether \p str should be regarded as (bool) true. Case and leading + * and trailing whitespace are ignored. Values that will return + * true are "y", "yes", "true" and any number that is not 0. + * @param str + */ +inline bool is_yes(const std::string &str) { - std::string s2 = lowercase(trim(s)); - if(s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0) - return true; - return false; + std::string s2 = lowercase(trim(str)); + + return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0; } -inline s32 mystoi(const std::string &s, s32 min, s32 max) + +/** + * Converts the string \p str to a signed 32-bit integer. The converted value + * is constrained so that min <= value <= max. + * + * @see atoi(3) for limitations + * + * @param str + * @param min Range minimum + * @param max Range maximum + * @return The value converted to a signed 32-bit integer and constrained + * within the range defined by min and max (inclusive) + */ +inline s32 mystoi(const std::string &str, s32 min, s32 max) { - s32 i = atoi(s.c_str()); - if(i < min) + s32 i = atoi(str.c_str()); + + if (i < min) i = min; - if(i > max) + if (i > max) i = max; + return i; } -inline s64 stoi64(const std::string &s) { - std::stringstream tmp(s); - s64 t; - tmp >> t; - return t; -} // MSVC2010 includes it's own versions of these //#if !defined(_MSC_VER) || _MSC_VER < 1600 -inline s32 mystoi(const std::string &s) + +/** + * Returns a 32-bit value reprensented by the string \p str (decimal). + * @see atoi(3) for further limitations + */ +inline s32 mystoi(const std::string &str) { - return atoi(s.c_str()); + return atoi(str.c_str()); } -inline s32 mystoi(const std::wstring &s) + +/** + * Returns s 32-bit value represented by the wide string \p str (decimal). + * @see atoi(3) for further limitations + */ +inline s32 mystoi(const std::wstring &str) { - return atoi(wide_to_narrow(s).c_str()); + return mystoi(wide_to_narrow(str)); } -inline float mystof(const std::string &s) + +/** + * Returns a float reprensented by the string \p str (decimal). + * @see atof(3) + */ +inline float mystof(const std::string &str) { - // This crap causes a segfault in certain cases on MinGW - /*float f; - std::istringstream ss(s); - ss>>f; - return f;*/ - // This works in that case - return atof(s.c_str()); + return atof(str.c_str()); } //#endif @@ -202,142 +392,365 @@ inline float mystof(const std::string &s) #define stoi mystoi #define stof mystof -inline std::string itos(s32 i) +/// Returns a value represented by the string \p val. +template +inline T from_string(const std::string &str) { - std::ostringstream o; - o<> t; + return t; } -inline std::string i64tos(s64 i) { - std::ostringstream o; - o<(str); } + +#if __cplusplus < 201103L +namespace std { + +/// Returns a string representing the value \p val. +template +inline string to_string(T val) +{ + ostringstream oss; + oss << val; + return oss.str(); } +#define DEFINE_STD_TOSTRING_FLOATINGPOINT(T) \ + template <> \ + inline string to_string(T val) \ + { \ + ostringstream oss; \ + oss << std::fixed \ + << std::setprecision(6) \ + << val; \ + return oss.str(); \ + } +DEFINE_STD_TOSTRING_FLOATINGPOINT(float) +DEFINE_STD_TOSTRING_FLOATINGPOINT(double) +DEFINE_STD_TOSTRING_FLOATINGPOINT(long double) + +#undef DEFINE_STD_TOSTRING_FLOATINGPOINT +/// Returns a wide string representing the value \p val +template +inline wstring to_wstring(T val) +{ + return utf8_to_wide(to_string(val)); +} +} +#endif + +/// Returns a string representing the decimal value of the 32-bit value \p i. +inline std::string itos(s32 i) { return std::to_string(i); } +/// Returns a string representing the decimal value of the 64-bit value \p i. +inline std::string i64tos(s64 i) { return std::to_string(i); } + +// std::to_string uses the '%.6f' conversion, which is inconsistent with +// std::ostream::operator<<() and impractical too. ftos() uses the +// more generic and std::ostream::operator<<()-compatible '%G' format. +/// Returns a string representing the decimal value of the float value \p f. inline std::string ftos(float f) { - std::ostringstream o; - o< 0 && character_idx % row_len == 0) + to += '\n'; + character_idx++; + } to += from[i]; } + return to; } -/* - Removes all \\ from a string that had been escaped (FormSpec strings) -*/ -inline std::string unescape_string(std::string &s) + +/** + * Removes backslashes from an escaped string (FormSpec strings) + */ +template +inline std::basic_string unescape_string(const std::basic_string &s) { - std::string res; - + std::basic_string res; + for (size_t i = 0; i < s.length(); i++) { - if (s[i] == '\\') + if (s[i] == '\\') { i++; + if (i >= s.length()) + break; + } res += s[i]; } - + return res; } -inline bool is_number(const std::string& tocheck) +/** + * Remove all escape sequences in \p s. + * + * @param s The string in which to remove escape sequences. + * @return \p s, with escape sequences removed. + */ +template +std::basic_string unescape_enriched(const std::basic_string &s) +{ + std::basic_string output; + size_t i = 0; + while (i < s.length()) { + if (s[i] == '\x1b') { + ++i; + if (i == s.length()) continue; + if (s[i] == '(') { + ++i; + while (i < s.length() && s[i] != ')') { + if (s[i] == '\\') { + ++i; + } + ++i; + } + ++i; + } else { + ++i; + } + continue; + } + output += s[i]; + ++i; + } + return output; +} + +template +std::vector > split(const std::basic_string &s, T delim) { - std::string::const_iterator iter = tocheck.begin(); + std::vector > tokens; - while (iter != tocheck.end() && std::isdigit(*iter)) { - ++iter; + std::basic_string current; + bool last_was_escape = false; + for (size_t i = 0; i < s.length(); i++) { + T si = s[i]; + if (last_was_escape) { + current += '\\'; + current += si; + last_was_escape = false; + } else { + if (si == delim) { + tokens.push_back(current); + current = std::basic_string(); + last_was_escape = false; + } else if (si == '\\') { + last_was_escape = true; + } else { + current += si; + last_was_escape = false; + } + } } + //push last element + tokens.push_back(current); - return ((!tocheck.empty()) && (iter == tocheck.end())); + return tokens; } -std::string translatePassword(std::string playername, std::wstring password); -std::string urlencode(std::string str); -std::string urldecode(std::string str); -u32 readFlagString(std::string str, FlagDesc *flagdesc, u32 *flagmask); -std::string writeFlagString(u32 flags, FlagDesc *flagdesc, u32 flagmask); -size_t mystrlcpy(char *dst, const char *src, size_t size); -char *mystrtok_r(char *s, const char *sep, char **lasts); -u64 read_seed(const char *str); +std::wstring translate_string(const std::wstring &s, Translations *translations); -#endif +std::wstring translate_string(const std::wstring &s); + +inline std::wstring unescape_translate(const std::wstring &s) { + return unescape_enriched(translate_string(s)); +} + +/** + * Checks that all characters in \p to_check are a decimal digits. + * + * @param to_check + * @return true if to_check is not empty and all characters in to_check are + * decimal digits, otherwise false + */ +inline bool is_number(const std::string &to_check) +{ + for (char i : to_check) + if (!std::isdigit(i)) + return false; + + return !to_check.empty(); +} + + +/** + * Returns a C-string, either "true" or "false", corresponding to \p val. + * + * @return If \p val is true, then "true" is returned, otherwise "false". + */ +inline const char *bool_to_cstr(bool val) +{ + return val ? "true" : "false"; +} + +inline const std::string duration_to_string(int sec) +{ + int min = sec / 60; + sec %= 60; + int hour = min / 60; + min %= 60; + + std::stringstream ss; + if (hour > 0) { + ss << hour << "h "; + } + + if (min > 0) { + ss << min << "m "; + } + + if (sec > 0) { + ss << sec << "s "; + } + + return ss.str(); +} + +/** + * Joins a vector of strings by the string \p delimiter. + * + * @return A std::string + */ +inline std::string str_join(const std::vector &list, + const std::string &delimiter) +{ + std::ostringstream oss; + bool first = true; + for (const auto &part : list) { + if (!first) + oss << delimiter; + oss << part; + first = false; + } + return oss.str(); +} + +/** + * Create a UTF8 std::string from a irr::core::stringw. + */ +inline std::string stringw_to_utf8(const irr::core::stringw &input) +{ + std::wstring str(input.c_str()); + return wide_to_utf8(str); +} + + /** + * Create a irr::core:stringw from a UTF8 std::string. + */ +inline irr::core::stringw utf8_to_stringw(const std::string &input) +{ + std::wstring str = utf8_to_wide(input); + return irr::core::stringw(str.c_str()); +} +/** + * Sanitize the name of a new directory. This consists of two stages: + * 1. Check for 'reserved filenames' that can't be used on some filesystems + * and prefix them + * 2. Remove 'unsafe' characters from the name by replacing them with '_' + */ +std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix);