X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Futil%2Fstring.h;h=aa4329f2f536d50571ce07d281acd9595c8cf322;hb=c7bcebb62856ae5fdb23a13e6fa1052eae700ddf;hp=e5a60bc47162824770b8e2af78270fa5d4cba609;hpb=c6c5edd73a97f2991113ba48d9c2415222fe4709;p=minetest.git diff --git a/src/util/string.h b/src/util/string.h index e5a60bc47..aa4329f2f 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -17,312 +17,747 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef UTIL_STRING_HEADER -#define UTIL_STRING_HEADER +#pragma once -#include "../irrlichttypes.h" -#include +#include "irrlichttypes_bloated.h" +#include "irrString.h" +#include #include #include #include +#include #include +#include +#include +#include + +class Translations; + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +// Checks whether a value is an ASCII printable character +#define IS_ASCII_PRINTABLE_CHAR(x) \ + (((unsigned int)(x) >= 0x20) && \ + ( (unsigned int)(x) <= 0x7e)) + +// Checks whether a byte is an inner byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_INNER(x) \ + (((unsigned char)(x) >= 0x80) && \ + ( (unsigned char)(x) <= 0xbf)) + +// Checks whether a byte is a start byte for an utf-8 multibyte sequence +#define IS_UTF8_MULTB_START(x) \ + (((unsigned char)(x) >= 0xc2) && \ + ( (unsigned char)(x) <= 0xf4)) + +// Given a start byte x for an utf-8 multibyte sequence +// it gives the length of the whole sequence in bytes. +#define UTF8_MULTB_START_LEN(x) \ + (((unsigned char)(x) < 0xe0) ? 2 : \ + (((unsigned char)(x) < 0xf0) ? 3 : 4)) + +typedef std::unordered_map StringMap; struct FlagDesc { const char *name; u32 flag; }; -std::wstring narrow_to_wide(const std::string& mbs); -std::string wide_to_narrow(const std::wstring& wcs); +// Try to avoid converting between wide and UTF-8 unless you need to +// input/output stuff via Irrlicht +std::wstring utf8_to_wide(const std::string &input); +std::string wide_to_utf8(const std::wstring &input); + +// You must free the returned string! +// The returned string is allocated using new[] +wchar_t *utf8_to_wide_c(const char *str); + +std::string urlencode(const std::string &str); +std::string urldecode(const std::string &str); +u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask); +std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask); +size_t mystrlcpy(char *dst, const char *src, size_t size); +char *mystrtok_r(char *s, const char *sep, char **lasts); +u64 read_seed(const char *str); +bool parseColorString(const std::string &value, video::SColor &color, bool quiet, + unsigned char default_alpha = 0xff); -static inline std::string padStringRight(std::string s, size_t len) + +/** + * Returns a copy of \p str with spaces inserted at the right hand side to ensure + * that the string is \p len characters in length. If \p str is <= \p len then the + * returned string will be identical to str. + */ +inline std::string padStringRight(std::string str, size_t len) { - if(len > s.size()) - s.insert(s.end(), len - s.size(), ' '); - return s; + if (len > str.size()) + str.insert(str.end(), len - str.size(), ' '); + + return str; } -// ends: NULL- or ""-terminated array of strings -// Returns "" if no end could be removed. -static inline std::string removeStringEnd(const std::string &s, const char *ends[]) +/** + * Returns a version of \p str with the first occurrence of a string + * contained within ends[] removed from the end of the string. + * + * @param str + * @param ends A NULL- or ""- terminated array of strings to remove from s in + * the copy produced. Note that once one of these strings is removed + * that no further postfixes contained within this array are removed. + * + * @return If no end could be removed then "" is returned. + */ +inline std::string removeStringEnd(const std::string &str, + const char *ends[]) { const char **p = ends; - for(; (*p) && (*p)[0] != '\0'; p++){ + + for (; *p && (*p)[0] != '\0'; p++) { std::string end = *p; - if(s.size() < end.size()) + if (str.size() < end.size()) continue; - if(s.substr(s.size()-end.size(), end.size()) == end) - return s.substr(0, s.size() - end.size()); + if (str.compare(str.size() - end.size(), end.size(), end) == 0) + return str.substr(0, str.size() - end.size()); } + return ""; } -// Tests if two strings are equal, optionally case insensitive -inline bool str_equal(const std::wstring& s1, const std::wstring& s2, + +/** + * Check two strings for equivalence. If \p case_insensitive is true + * then the case of the strings is ignored (default is false). + * + * @param s1 + * @param s2 + * @param case_insensitive + * @return true if the strings match + */ +template +inline bool str_equal(const std::basic_string &s1, + const std::basic_string &s2, bool case_insensitive = false) { - if(case_insensitive) - { - if(s1.size() != s2.size()) - return false; - for(size_t i = 0; i < s1.size(); ++i) - if(tolower(s1[i]) != tolower(s2[i])) - return false; - return true; - } - else - { + if (!case_insensitive) return s1 == s2; - } + + if (s1.size() != s2.size()) + return false; + + for (size_t i = 0; i < s1.size(); ++i) + if(tolower(s1[i]) != tolower(s2[i])) + return false; + + return true; } -// Tests if the second string is a prefix of the first, optionally case insensitive -inline bool str_starts_with(const std::wstring& str, const std::wstring& prefix, + +/** + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param prefix + * @param case_insensitive + * @return true if the str begins with prefix + */ +template +inline bool str_starts_with(const std::basic_string &str, + const std::basic_string &prefix, bool case_insensitive = false) { - if(str.size() < prefix.size()) + if (str.size() < prefix.size()) return false; - if(case_insensitive) - { - for(size_t i = 0; i < prefix.size(); ++i) - if(tolower(str[i]) != tolower(prefix[i])) - return false; - } - else - { - for(size_t i = 0; i < prefix.size(); ++i) - if(str[i] != prefix[i]) - return false; - } + + if (!case_insensitive) + return str.compare(0, prefix.size(), prefix) == 0; + + for (size_t i = 0; i < prefix.size(); ++i) + if (tolower(str[i]) != tolower(prefix[i])) + return false; return true; } -// Split a string using the given delimiter. Returns a vector containing -// the component parts. -inline std::vector str_split(const std::wstring &str, wchar_t delimiter) +/** + * Check whether \p str begins with the string prefix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param prefix + * @param case_insensitive + * @return true if the str begins with prefix + */ +template +inline bool str_starts_with(const std::basic_string &str, + const T *prefix, + bool case_insensitive = false) { - std::vector parts; - std::wstringstream sstr(str); - std::wstring part; - while(std::getline(sstr, part, delimiter)) + return str_starts_with(str, std::basic_string(prefix), + case_insensitive); +} + + +/** + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix + */ +template +inline bool str_ends_with(const std::basic_string &str, + const std::basic_string &suffix, + bool case_insensitive = false) +{ + if (str.size() < suffix.size()) + return false; + + size_t start = str.size() - suffix.size(); + if (!case_insensitive) + return str.compare(start, suffix.size(), suffix) == 0; + + for (size_t i = 0; i < suffix.size(); ++i) + if (tolower(str[start + i]) != tolower(suffix[i])) + return false; + return true; +} + + +/** + * Check whether \p str ends with the string suffix. If \p case_insensitive + * is true then the check is case insensitve (default is false; i.e. case is + * significant). + * + * @param str + * @param suffix + * @param case_insensitive + * @return true if the str begins with suffix + */ +template +inline bool str_ends_with(const std::basic_string &str, + const T *suffix, + bool case_insensitive = false) +{ + return str_ends_with(str, std::basic_string(suffix), + case_insensitive); +} + + +/** + * Splits a string into its component parts separated by the character + * \p delimiter. + * + * @return An std::vector > of the component parts + */ +template +inline std::vector > str_split( + const std::basic_string &str, + T delimiter) +{ + std::vector > parts; + std::basic_stringstream sstr(str); + std::basic_string part; + + while (std::getline(sstr, part, delimiter)) parts.push_back(part); + return parts; } -inline std::string lowercase(const std::string &s) + +/** + * @param str + * @return A copy of \p str converted to all lowercase characters. + */ +inline std::string lowercase(const std::string &str) { std::string s2; - for(size_t i=0; i= 'A' && c <= 'Z') - c -= 'A' - 'a'; - s2 += c; - } + + s2.reserve(str.size()); + + for (char i : str) + s2 += tolower(i); + return s2; } -inline std::string trim(const std::string &s) + +/** + * @param str + * @return A copy of \p str with leading and trailing whitespace removed. + */ +inline std::string trim(const std::string &str) { size_t front = 0; - while(s[front] == ' ' || - s[front] == '\t' || - s[front] == '\r' || - s[front] == '\n' - ) + size_t back = str.size(); + + while (front < back && std::isspace(str[front])) ++front; - size_t back = s.size(); - while(back > front && - (s[back-1] == ' ' || - s[back-1] == '\t' || - s[back-1] == '\r' || - s[back-1] == '\n' - ) - ) + while (back > front && std::isspace(str[back - 1])) --back; - return s.substr(front, back - front); + return str.substr(front, back - front); } -inline bool is_yes(const std::string &s) + +/** + * Returns whether \p str should be regarded as (bool) true. Case and leading + * and trailing whitespace are ignored. Values that will return + * true are "y", "yes", "true" and any number that is not 0. + * @param str + */ +inline bool is_yes(const std::string &str) { - std::string s2 = lowercase(trim(s)); - if(s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0) - return true; - return false; + std::string s2 = lowercase(trim(str)); + + return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0; } -inline s32 mystoi(const std::string &s, s32 min, s32 max) + +/** + * Converts the string \p str to a signed 32-bit integer. The converted value + * is constrained so that min <= value <= max. + * + * @see atoi(3) for limitations + * + * @param str + * @param min Range minimum + * @param max Range maximum + * @return The value converted to a signed 32-bit integer and constrained + * within the range defined by min and max (inclusive) + */ +inline s32 mystoi(const std::string &str, s32 min, s32 max) { - s32 i = atoi(s.c_str()); - if(i < min) + s32 i = atoi(str.c_str()); + + if (i < min) i = min; - if(i > max) + if (i > max) i = max; + return i; } -inline s64 stoi64(const std::string &s) { - std::stringstream tmp(s); - long long t; +/** + * Returns a 32-bit value reprensented by the string \p str (decimal). + * @see atoi(3) for further limitations + */ +inline s32 mystoi(const std::string &str) +{ + return atoi(str.c_str()); +} + +/** + * Returns a float reprensented by the string \p str (decimal). + * @see atof(3) + */ +inline float mystof(const std::string &str) +{ + return atof(str.c_str()); +} + +#define stoi mystoi +#define stof mystof + +/// Returns a value represented by the string \p val. +template +inline T from_string(const std::string &str) +{ + std::stringstream tmp(str); + T t; tmp >> t; return t; } -// MSVC2010 includes it's own versions of these -//#if !defined(_MSC_VER) || _MSC_VER < 1600 +/// Returns a 64-bit signed value represented by the string \p str (decimal). +inline s64 stoi64(const std::string &str) { return from_string(str); } + +#if __cplusplus < 201103L +namespace std { -inline s32 mystoi(const std::string &s) +/// Returns a string representing the value \p val. +template +inline string to_string(T val) { - return atoi(s.c_str()); + ostringstream oss; + oss << val; + return oss.str(); } +#define DEFINE_STD_TOSTRING_FLOATINGPOINT(T) \ + template <> \ + inline string to_string(T val) \ + { \ + ostringstream oss; \ + oss << std::fixed \ + << std::setprecision(6) \ + << val; \ + return oss.str(); \ + } +DEFINE_STD_TOSTRING_FLOATINGPOINT(float) +DEFINE_STD_TOSTRING_FLOATINGPOINT(double) +DEFINE_STD_TOSTRING_FLOATINGPOINT(long double) + +#undef DEFINE_STD_TOSTRING_FLOATINGPOINT -inline s32 mystoi(const std::wstring &s) +/// Returns a wide string representing the value \p val +template +inline wstring to_wstring(T val) { - return atoi(wide_to_narrow(s).c_str()); + return utf8_to_wide(to_string(val)); } +} +#endif + +/// Returns a string representing the decimal value of the 32-bit value \p i. +inline std::string itos(s32 i) { return std::to_string(i); } +/// Returns a string representing the decimal value of the 64-bit value \p i. +inline std::string i64tos(s64 i) { return std::to_string(i); } -inline float mystof(const std::string &s) +// std::to_string uses the '%.6f' conversion, which is inconsistent with +// std::ostream::operator<<() and impractical too. ftos() uses the +// more generic and std::ostream::operator<<()-compatible '%G' format. +/// Returns a string representing the decimal value of the float value \p f. +inline std::string ftos(float f) { - // This crap causes a segfault in certain cases on MinGW - /*float f; - std::istringstream ss(s); - ss>>f; - return f;*/ - // This works in that case - return atof(s.c_str()); + std::ostringstream oss; + oss << f; + return oss.str(); } -//#endif -#define stoi mystoi -#define stof mystof +/** + * Replace all occurrences of \p pattern in \p str with \p replacement. + * + * @param str String to replace pattern with replacement within. + * @param pattern The pattern to replace. + * @param replacement What to replace the pattern with. + */ +inline void str_replace(std::string &str, const std::string &pattern, + const std::string &replacement) +{ + std::string::size_type start = str.find(pattern, 0); + while (start != str.npos) { + str.replace(start, pattern.size(), replacement); + start = str.find(pattern, start + replacement.size()); + } +} -inline std::string itos(s32 i) +/** + * Escapes characters [ ] \ , ; that can not be used in formspecs + */ +inline void str_formspec_escape(std::string &str) { - std::ostringstream o; - o< 0 && character_idx % row_len == 0) + to += '\n'; + character_idx++; + } + to += from[i]; } + + return to; } -inline void str_replace_char(std::string & str, char from, char to) + +/** + * Removes backslashes from an escaped string (FormSpec strings) + */ +template +inline std::basic_string unescape_string(const std::basic_string &s) { - for(unsigned int i=0; i res; + + for (size_t i = 0; i < s.length(); i++) { + if (s[i] == '\\') { + i++; + if (i >= s.length()) + break; + } + res += s[i]; } + + return res; } -/* - Checks if a string contains only supplied characters -*/ -inline bool string_allowed(const std::string &s, const std::string &allowed_chars) -{ - for(u32 i=0; i +std::basic_string unescape_enriched(const std::basic_string &s) +{ + std::basic_string output; + size_t i = 0; + while (i < s.length()) { + if (s[i] == '\x1b') { + ++i; + if (i == s.length()) continue; + if (s[i] == '(') { + ++i; + while (i < s.length() && s[i] != ')') { + if (s[i] == '\\') { + ++i; + } + ++i; + } + ++i; + } else { + ++i; } + continue; } - if(confirmed == false) - return false; + output += s[i]; + ++i; } - return true; + return output; } -/* - Checks if a string contains no blacklisted characters (opposite - function of string_allowed()) -*/ -inline bool string_allowed_blacklist(const std::string & s, const std::string & blacklisted_chars) -{ - for(unsigned int i = 0; i < s.length(); i++) - { - bool invalid = false; - for(unsigned int j = 0; j < blacklisted_chars.length(); j++) - { - if(s[i] == blacklisted_chars[j]) - { - invalid = true; - break; +template +std::vector > split(const std::basic_string &s, T delim) +{ + std::vector > tokens; + + std::basic_string current; + bool last_was_escape = false; + for (size_t i = 0; i < s.length(); i++) { + T si = s[i]; + if (last_was_escape) { + current += '\\'; + current += si; + last_was_escape = false; + } else { + if (si == delim) { + tokens.push_back(current); + current = std::basic_string(); + last_was_escape = false; + } else if (si == '\\') { + last_was_escape = true; + } else { + current += si; + last_was_escape = false; } } - if(invalid) - return false; } - return true; + //push last element + tokens.push_back(current); + + return tokens; } -/* - Forcefully wraps string into rows using \n - (no word wrap, used for showing paths in gui) -*/ -inline std::string wrap_rows(const std::string &from, u32 rowlen) +std::wstring translate_string(const std::wstring &s, Translations *translations); + +std::wstring translate_string(const std::wstring &s); + +inline std::wstring unescape_translate(const std::wstring &s) { + return unescape_enriched(translate_string(s)); +} + +/** + * Checks that all characters in \p to_check are a decimal digits. + * + * @param to_check + * @return true if to_check is not empty and all characters in to_check are + * decimal digits, otherwise false + */ +inline bool is_number(const std::string &to_check) { - std::string to; - for(u32 i=0; i 0) { + ss << neg << day << "d"; + if (hour > 0 || min > 0 || sec > 0) + ss << " "; + } + + if (hour > 0) { + ss << neg << hour << "h"; + if (min > 0 || sec > 0) + ss << " "; + } + + if (min > 0) { + ss << neg << min << "min"; + if (sec > 0) + ss << " "; + } + + if (sec > 0 || total_sec == 0) { + ss << neg << sec << "s"; + } + + return ss.str(); } -/* - Removes all \\ from a string that had been escaped (FormSpec strings) -*/ -inline std::string unescape_string(std::string &s) +/** + * Joins a vector of strings by the string \p delimiter. + * + * @return A std::string + */ +inline std::string str_join(const std::vector &list, + const std::string &delimiter) { - std::string res; - - for (size_t i = 0; i < s.length(); i++) { - if (s[i] == '\\') - i++; - res += s[i]; + std::ostringstream oss; + bool first = true; + for (const auto &part : list) { + if (!first) + oss << delimiter; + oss << part; + first = false; } - - return res; + return oss.str(); } -std::string translatePassword(std::string playername, std::wstring password); -std::string urlencode(std::string str); -std::string urldecode(std::string str); -u32 readFlagString(std::string str, FlagDesc *flagdesc); -std::string writeFlagString(u32 flags, FlagDesc *flagdesc); -char *mystrtok_r(char *s, const char *sep, char **lasts); -u64 read_seed(const char *str); +/** + * Create a UTF8 std::string from a irr::core::stringw. + */ +inline std::string stringw_to_utf8(const irr::core::stringw &input) +{ + std::wstring str(input.c_str()); + return wide_to_utf8(str); +} -#endif + /** + * Create a irr::core:stringw from a UTF8 std::string. + */ +inline irr::core::stringw utf8_to_stringw(const std::string &input) +{ + std::wstring str = utf8_to_wide(input); + return irr::core::stringw(str.c_str()); +} +/** + * Sanitize the name of a new directory. This consists of two stages: + * 1. Check for 'reserved filenames' that can't be used on some filesystems + * and add a prefix to them + * 2. Remove 'unsafe' characters from the name by replacing them with '_' + */ +std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix); + +/** + * Prints a sanitized version of a string without control characters. + * '\t' and '\n' are allowed, as are UTF-8 control characters (e.g. RTL). + * ASCII control characters are replaced with their hex encoding in angle + * brackets (e.g. "a\x1eb" -> "a<1e>b"). + */ +void safe_print_string(std::ostream &os, const std::string &str);