#include "log.h"
#include "hex.h"
-#include "../porting.h"
+#include "porting.h"
+#include "translation.h"
+#include <algorithm>
+#include <array>
#include <sstream>
#include <iomanip>
#include <map>
#define BSD_ICONV_USED
#endif
-static bool parseHexColorString(const std::string &value, video::SColor &color);
+static bool parseHexColorString(const std::string &value, video::SColor &color,
+ unsigned char default_alpha = 0xff);
static bool parseNamedColorString(const std::string &value, video::SColor &color);
#ifndef _WIN32
-bool convert(const char *to, const char *from, char *outbuf,
- size_t outbuf_size, char *inbuf, size_t inbuf_size)
+static bool convert(const char *to, const char *from, char *outbuf,
+ size_t *outbuf_size, char *inbuf, size_t inbuf_size)
{
iconv_t cd = iconv_open(to, from);
#else
char *inbuf_ptr = inbuf;
#endif
-
char *outbuf_ptr = outbuf;
size_t *inbuf_left_ptr = &inbuf_size;
- size_t *outbuf_left_ptr = &outbuf_size;
+ const size_t old_outbuf_size = *outbuf_size;
size_t old_size = inbuf_size;
while (inbuf_size > 0) {
- iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr);
+ iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
if (inbuf_size == old_size) {
iconv_close(cd);
return false;
}
iconv_close(cd);
+ *outbuf_size = old_outbuf_size - *outbuf_size;
return true;
}
+#ifdef __ANDROID__
+// On Android iconv disagrees how big a wchar_t is for whatever reason
+const char *DEFAULT_ENCODING = "UTF-32LE";
+#else
+const char *DEFAULT_ENCODING = "WCHAR_T";
+#endif
+
std::wstring utf8_to_wide(const std::string &input)
{
- size_t inbuf_size = input.length() + 1;
+ const size_t inbuf_size = input.length();
// maximum possible size, every character is sizeof(wchar_t) bytes
- size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t);
+ size_t outbuf_size = input.length() * sizeof(wchar_t);
- char *inbuf = new char[inbuf_size];
+ char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
memcpy(inbuf, input.c_str(), inbuf_size);
- char *outbuf = new char[outbuf_size];
- memset(outbuf, 0, outbuf_size);
+ std::wstring out;
+ out.resize(outbuf_size / sizeof(wchar_t));
- if (!convert("WCHAR_T", "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size)) {
+#ifdef __ANDROID__
+ SANITY_CHECK(sizeof(wchar_t) == 4);
+#endif
+
+ char *outbuf = reinterpret_cast<char*>(&out[0]);
+ if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
<< " into wstring" << std::endl;
delete[] inbuf;
- delete[] outbuf;
return L"<invalid UTF-8 string>";
}
- std::wstring out((wchar_t *)outbuf);
-
delete[] inbuf;
- delete[] outbuf;
+ out.resize(outbuf_size / sizeof(wchar_t));
return out;
}
-#ifdef __ANDROID__
-// TODO: this is an ugly fix for wide_to_utf8 somehow not working on android
-std::string wide_to_utf8(const std::wstring &input)
-{
- return wide_to_narrow(input);
-}
-#else
std::string wide_to_utf8(const std::wstring &input)
{
- size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t);
- // maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes
- size_t outbuf_size = (input.length() + 1) * 6;
+ const size_t inbuf_size = input.length() * sizeof(wchar_t);
+ // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
+ size_t outbuf_size = input.length() * 4;
- char *inbuf = new char[inbuf_size];
+ char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
memcpy(inbuf, input.c_str(), inbuf_size);
- char *outbuf = new char[outbuf_size];
- memset(outbuf, 0, outbuf_size);
+ std::string out;
+ out.resize(outbuf_size);
- if (!convert("UTF-8", "WCHAR_T", outbuf, outbuf_size, inbuf, inbuf_size)) {
+ if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
<< " into UTF-8 string" << std::endl;
delete[] inbuf;
- delete[] outbuf;
- return "<invalid wstring>";
+ return "<invalid wide string>";
}
- std::string out(outbuf);
-
delete[] inbuf;
- delete[] outbuf;
+ out.resize(outbuf_size);
return out;
}
-#endif
#else // _WIN32
std::wstring utf8_to_wide(const std::string &input)
wchar_t *utf8_to_wide_c(const char *str)
{
- std::wstring ret = utf8_to_wide(std::string(str)).c_str();
+ std::wstring ret = utf8_to_wide(std::string(str));
size_t len = ret.length();
wchar_t *ret_c = new wchar_t[len + 1];
- memset(ret_c, 0, (len + 1) * sizeof(wchar_t));
- memcpy(ret_c, ret.c_str(), len * sizeof(wchar_t));
+ memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
return ret_c;
}
-// You must free the returned string!
-// The returned string is allocated using new
-wchar_t *narrow_to_wide_c(const char *str)
-{
- wchar_t *nstr = NULL;
-#if defined(_WIN32)
- int nResult = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, 0, 0);
- if (nResult == 0) {
- errorstream<<"gettext: MultiByteToWideChar returned null"<<std::endl;
- } else {
- nstr = new wchar_t[nResult];
- MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, (WCHAR *) nstr, nResult);
- }
-#else
- size_t len = strlen(str);
- nstr = new wchar_t[len + 1];
- std::wstring intermediate = narrow_to_wide(str);
- memset(nstr, 0, (len + 1) * sizeof(wchar_t));
- memcpy(nstr, intermediate.c_str(), len * sizeof(wchar_t));
-#endif
-
- return nstr;
-}
-
-
-#ifdef __ANDROID__
-
-const wchar_t* wide_chars =
- L" !\"#$%&'()*+,-./0123456789:;<=>?@"
- L"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
- L"abcdefghijklmnopqrstuvwxyz{|}~";
-
-int wctomb(char *s, wchar_t wc)
-{
- for (unsigned int j = 0; j < (sizeof(wide_chars)/sizeof(wchar_t));j++) {
- if (wc == wide_chars[j]) {
- *s = (char) (j+32);
- return 1;
- }
- else if (wc == L'\n') {
- *s = '\n';
- return 1;
- }
- }
- return -1;
-}
-
-int mbtowc(wchar_t *pwc, const char *s, size_t n)
-{
- std::wstring intermediate = narrow_to_wide(s);
-
- if (intermediate.length() > 0) {
- *pwc = intermediate[0];
- return 1;
- }
- else {
- return -1;
- }
-}
-
-std::wstring narrow_to_wide(const std::string &mbs) {
- size_t wcl = mbs.size();
-
- std::wstring retval = L"";
-
- for (unsigned int i = 0; i < wcl; i++) {
- if (((unsigned char) mbs[i] >31) &&
- ((unsigned char) mbs[i] < 127)) {
-
- retval += wide_chars[(unsigned char) mbs[i] -32];
- }
- //handle newline
- else if (mbs[i] == '\n') {
- retval += L'\n';
- }
- }
-
- return retval;
-}
-
-#else // not Android
-
-std::wstring narrow_to_wide(const std::string &mbs)
-{
- size_t wcl = mbs.size();
- Buffer<wchar_t> wcs(wcl + 1);
- size_t len = mbstowcs(*wcs, mbs.c_str(), wcl);
- if (len == (size_t)(-1))
- return L"<invalid multibyte string>";
- wcs[len] = 0;
- return *wcs;
-}
-
-#endif
-
-#ifdef __ANDROID__
-
-std::string wide_to_narrow(const std::wstring &wcs) {
- size_t mbl = wcs.size()*4;
-
- std::string retval = "";
- for (unsigned int i = 0; i < wcs.size(); i++) {
- wchar_t char1 = (wchar_t) wcs[i];
-
- if (char1 == L'\n') {
- retval += '\n';
- continue;
- }
-
- for (unsigned int j = 0; j < wcslen(wide_chars);j++) {
- wchar_t char2 = (wchar_t) wide_chars[j];
-
- if (char1 == char2) {
- char toadd = (j+32);
- retval += toadd;
- break;
- }
- }
- }
-
- return retval;
-}
-
-#else // not Android
-
-std::string wide_to_narrow(const std::wstring &wcs)
-{
- size_t mbl = wcs.size() * 4;
- SharedBuffer<char> mbs(mbl+1);
- size_t len = wcstombs(*mbs, wcs.c_str(), mbl);
- if (len == (size_t)(-1))
- return "Character conversion failed!";
- else
- mbs[len] = 0;
- return *mbs;
-}
-
-#endif
-
-std::string urlencode(std::string str)
+std::string urlencode(const std::string &str)
{
// Encodes non-unreserved URI characters by a percent sign
// followed by two hex digits. See RFC 3986, section 2.3.
static const char url_hex_chars[] = "0123456789ABCDEF";
std::ostringstream oss(std::ios::binary);
- for (u32 i = 0; i < str.size(); i++) {
- unsigned char c = str[i];
- if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~')
+ for (unsigned char c : str) {
+ if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
oss << c;
- else
+ } else {
oss << "%"
<< url_hex_chars[(c & 0xf0) >> 4]
<< url_hex_chars[c & 0x0f];
+ }
}
return oss.str();
}
-std::string urldecode(std::string str)
+std::string urldecode(const std::string &str)
{
// Inverse of urlencode
std::ostringstream oss(std::ios::binary);
hex_digit_decode(str[i+2], lowvalue)) {
oss << (char) ((highvalue << 4) | lowvalue);
i += 2;
- }
- else
+ } else {
oss << str[i];
+ }
}
return oss.str();
}
u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask)
{
- u32 result = 0, mask = 0;
+ u32 result = 0;
+ u32 mask = 0;
char *s = &str[0];
- char *flagstr, *strpos = NULL;
+ char *flagstr;
+ char *strpos = nullptr;
while ((flagstr = strtok_r(s, ",", &strpos))) {
- s = NULL;
+ s = nullptr;
while (*flagstr == ' ' || *flagstr == '\t')
flagstr++;
s++;
if (!*s)
- return NULL;
+ return nullptr;
t = s;
while (*t) {
return num;
}
-bool parseColorString(const std::string &value, video::SColor &color, bool quiet)
+bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
+ unsigned char default_alpha)
{
bool success;
if (value[0] == '#')
- success = parseHexColorString(value, color);
+ success = parseHexColorString(value, color, default_alpha);
else
success = parseNamedColorString(value, color);
return success;
}
-static bool parseHexColorString(const std::string &value, video::SColor &color)
+static bool parseHexColorString(const std::string &value, video::SColor &color,
+ unsigned char default_alpha)
{
- unsigned char components[] = { 0x00, 0x00, 0x00, 0xff }; // R,G,B,A
+ unsigned char components[] = { 0x00, 0x00, 0x00, default_alpha }; // R,G,B,A
if (value[0] != '#')
return false;
colors["darkgoldenrod"] = 0xb8860b;
colors["darkgray"] = 0xa9a9a9;
colors["darkgreen"] = 0x006400;
+ colors["darkgrey"] = 0xa9a9a9;
colors["darkkhaki"] = 0xbdb76b;
colors["darkmagenta"] = 0x8b008b;
colors["darkolivegreen"] = 0x556b2f;
colors["darkseagreen"] = 0x8fbc8f;
colors["darkslateblue"] = 0x483d8b;
colors["darkslategray"] = 0x2f4f4f;
+ colors["darkslategrey"] = 0x2f4f4f;
colors["darkturquoise"] = 0x00ced1;
colors["darkviolet"] = 0x9400d3;
colors["deeppink"] = 0xff1493;
colors["deepskyblue"] = 0x00bfff;
colors["dimgray"] = 0x696969;
+ colors["dimgrey"] = 0x696969;
colors["dodgerblue"] = 0x1e90ff;
colors["firebrick"] = 0xb22222;
colors["floralwhite"] = 0xfffaf0;
colors["gray"] = 0x808080;
colors["green"] = 0x008000;
colors["greenyellow"] = 0xadff2f;
+ colors["grey"] = 0x808080;
colors["honeydew"] = 0xf0fff0;
colors["hotpink"] = 0xff69b4;
colors["indianred"] = 0xcd5c5c;
colors["lightgoldenrodyellow"] = 0xfafad2;
colors["lightgray"] = 0xd3d3d3;
colors["lightgreen"] = 0x90ee90;
+ colors["lightgrey"] = 0xd3d3d3;
colors["lightpink"] = 0xffb6c1;
colors["lightsalmon"] = 0xffa07a;
colors["lightseagreen"] = 0x20b2aa;
colors["lightskyblue"] = 0x87cefa;
colors["lightslategray"] = 0x778899;
+ colors["lightslategrey"] = 0x778899;
colors["lightsteelblue"] = 0xb0c4de;
colors["lightyellow"] = 0xffffe0;
colors["lime"] = 0x00ff00;
colors["skyblue"] = 0x87ceeb;
colors["slateblue"] = 0x6a5acd;
colors["slategray"] = 0x708090;
+ colors["slategrey"] = 0x708090;
colors["snow"] = 0xfffafa;
colors["springgreen"] = 0x00ff7f;
colors["steelblue"] = 0x4682b4;
color_name = value;
}
- color_name = lowercase(value);
+ color_name = lowercase(color_name);
std::map<const std::string, unsigned>::const_iterator it;
it = named_colors.colors.find(color_name);
return true;
}
-std::wstring removeChatEscapes(const std::wstring &s) {
- std::wstring output;
- size_t i = 0;
+void str_replace(std::string &str, char from, char to)
+{
+ std::replace(str.begin(), str.end(), from, to);
+}
+
+/* Translated strings have the following format:
+ * \x1bT marks the beginning of a translated string
+ * \x1bE marks its end
+ *
+ * \x1bF marks the beginning of an argument, and \x1bE its end.
+ *
+ * Arguments are *not* translated, as they may contain escape codes.
+ * Thus, if you want a translated argument, it should be inside \x1bT/\x1bE tags as well.
+ *
+ * This representation is chosen so that clients ignoring escape codes will
+ * see untranslated strings.
+ *
+ * For instance, suppose we have a string such as "@1 Wool" with the argument "White"
+ * The string will be sent as "\x1bT\x1bF\x1bTWhite\x1bE\x1bE Wool\x1bE"
+ * To translate this string, we extract what is inside \x1bT/\x1bE tags.
+ * When we notice the \x1bF tag, we recursively extract what is there up to the \x1bE end tag,
+ * translating it as well.
+ * We get the argument "White", translated, and create a template string with "@1" instead of it.
+ * We finally get the template "@1 Wool" that was used in the beginning, which we translate
+ * before filling it again.
+ */
+
+void translate_all(const std::wstring &s, size_t &i,
+ Translations *translations, std::wstring &res);
+
+void translate_string(const std::wstring &s, Translations *translations,
+ const std::wstring &textdomain, size_t &i, std::wstring &res)
+{
+ std::wostringstream output;
+ std::vector<std::wstring> args;
+ int arg_number = 1;
while (i < s.length()) {
- if (s[i] == L'\v') {
+ // Not an escape sequence: just add the character.
+ if (s[i] != '\x1b') {
+ output.put(s[i]);
+ // The character is a literal '@'; add it twice
+ // so that it is not mistaken for an argument.
+ if (s[i] == L'@')
+ output.put(L'@');
++i;
- if (i == s.length()) continue;
- if (s[i] == L'(') {
- ++i;
- while (i < s.length() && s[i] != L')') {
- if (s[i] == L'\\') {
- ++i;
- }
+ continue;
+ }
+
+ // We have an escape sequence: locate it and its data
+ // It is either a single character, or it begins with '('
+ // and extends up to the following ')', with '\' as an escape character.
+ ++i;
+ size_t start_index = i;
+ size_t length;
+ if (i == s.length()) {
+ length = 0;
+ } else if (s[i] == L'(') {
+ ++i;
+ ++start_index;
+ while (i < s.length() && s[i] != L')') {
+ if (s[i] == L'\\')
++i;
- }
- ++i;
- } else {
++i;
}
+ length = i - start_index;
+ ++i;
+ if (i > s.length())
+ i = s.length();
+ } else {
+ ++i;
+ length = 1;
+ }
+ std::wstring escape_sequence(s, start_index, length);
+
+ // The escape sequence is now reconstructed.
+ std::vector<std::wstring> parts = split(escape_sequence, L'@');
+ if (parts[0] == L"E") {
+ // "End of translation" escape sequence. We are done locating the string to translate.
+ break;
+ } else if (parts[0] == L"F") {
+ // "Start of argument" escape sequence.
+ // Recursively translate the argument, and add it to the argument list.
+ // Add an "@n" instead of the argument to the template to translate.
+ if (arg_number >= 10) {
+ errorstream << "Ignoring too many arguments to translation" << std::endl;
+ std::wstring arg;
+ translate_all(s, i, translations, arg);
+ args.push_back(arg);
+ continue;
+ }
+ output.put(L'@');
+ output << arg_number;
+ ++arg_number;
+ std::wstring arg;
+ translate_all(s, i, translations, arg);
+ args.push_back(arg);
+ } else {
+ // This is an escape sequence *inside* the template string to translate itself.
+ // This should not happen, show an error message.
+ errorstream << "Ignoring escape sequence '"
+ << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
+ }
+ }
+
+ std::wstring toutput;
+ // Translate the template.
+ if (translations != nullptr)
+ toutput = translations->getTranslation(
+ textdomain, output.str());
+ else
+ toutput = output.str();
+
+ // Put back the arguments in the translated template.
+ std::wostringstream result;
+ size_t j = 0;
+ while (j < toutput.length()) {
+ // Normal character, add it to output and continue.
+ if (toutput[j] != L'@' || j == toutput.length() - 1) {
+ result.put(toutput[j]);
+ ++j;
+ continue;
+ }
+
+ ++j;
+ // Literal escape for '@'.
+ if (toutput[j] == L'@') {
+ result.put(L'@');
+ ++j;
+ continue;
+ }
+
+ // Here we have an argument; get its index and add the translated argument to the output.
+ int arg_index = toutput[j] - L'1';
+ ++j;
+ if (0 <= arg_index && (size_t)arg_index < args.size()) {
+ result << args[arg_index];
+ } else {
+ // This is not allowed: show an error message
+ errorstream << "Ignoring out-of-bounds argument escape sequence in translation" << std::endl;
+ }
+ }
+ res = result.str();
+}
+
+void translate_all(const std::wstring &s, size_t &i,
+ Translations *translations, std::wstring &res)
+{
+ std::wostringstream output;
+ while (i < s.length()) {
+ // Not an escape sequence: just add the character.
+ if (s[i] != '\x1b') {
+ output.put(s[i]);
+ ++i;
continue;
}
- output += s[i];
+
+ // We have an escape sequence: locate it and its data
+ // It is either a single character, or it begins with '('
+ // and extends up to the following ')', with '\' as an escape character.
+ size_t escape_start = i;
++i;
+ size_t start_index = i;
+ size_t length;
+ if (i == s.length()) {
+ length = 0;
+ } else if (s[i] == L'(') {
+ ++i;
+ ++start_index;
+ while (i < s.length() && s[i] != L')') {
+ if (s[i] == L'\\') {
+ ++i;
+ }
+ ++i;
+ }
+ length = i - start_index;
+ ++i;
+ if (i > s.length())
+ i = s.length();
+ } else {
+ ++i;
+ length = 1;
+ }
+ std::wstring escape_sequence(s, start_index, length);
+
+ // The escape sequence is now reconstructed.
+ std::vector<std::wstring> parts = split(escape_sequence, L'@');
+ if (parts[0] == L"E") {
+ // "End of argument" escape sequence. Exit.
+ break;
+ } else if (parts[0] == L"T") {
+ // Beginning of translated string.
+ std::wstring textdomain;
+ if (parts.size() > 1)
+ textdomain = parts[1];
+ std::wstring translated;
+ translate_string(s, translations, textdomain, i, translated);
+ output << translated;
+ } else {
+ // Another escape sequence, such as colors. Preserve it.
+ output << std::wstring(s, escape_start, i - escape_start);
+ }
}
- return output;
+
+ res = output.str();
}
-void str_replace(std::string &str, char from, char to)
+// Translate string server side
+std::wstring translate_string(const std::wstring &s, Translations *translations)
{
- std::replace(str.begin(), str.end(), from, to);
+ size_t i = 0;
+ std::wstring res;
+ translate_all(s, i, translations, res);
+ return res;
+}
+
+// Translate string client side
+std::wstring translate_string(const std::wstring &s)
+{
+#ifdef SERVER
+ return translate_string(s, nullptr);
+#else
+ return translate_string(s, g_client_translations);
+#endif
+}
+
+static const std::array<std::wstring, 22> disallowed_dir_names = {
+ // Problematic filenames from here:
+ // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
+ L"CON",
+ L"PRN",
+ L"AUX",
+ L"NUL",
+ L"COM1",
+ L"COM2",
+ L"COM3",
+ L"COM4",
+ L"COM5",
+ L"COM6",
+ L"COM7",
+ L"COM8",
+ L"COM9",
+ L"LPT1",
+ L"LPT2",
+ L"LPT3",
+ L"LPT4",
+ L"LPT5",
+ L"LPT6",
+ L"LPT7",
+ L"LPT8",
+ L"LPT9",
+};
+
+/**
+ * List of characters that are blacklisted from created directories
+ */
+static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
+
+/**
+ * Sanitize the name of a new directory. This consists of two stages:
+ * 1. Check for 'reserved filenames' that can't be used on some filesystems
+ * and add a prefix to them
+ * 2. Remove 'unsafe' characters from the name by replacing them with '_'
+ */
+std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
+{
+ std::wstring safe_name = utf8_to_wide(str);
+
+ for (std::wstring disallowed_name : disallowed_dir_names) {
+ if (str_equal(safe_name, disallowed_name, true)) {
+ safe_name = utf8_to_wide(optional_prefix) + safe_name;
+ break;
+ }
+ }
+
+ for (unsigned long i = 0; i < safe_name.length(); i++) {
+ bool is_valid = true;
+
+ // Unlikely, but control characters should always be blacklisted
+ if (safe_name[i] < 32) {
+ is_valid = false;
+ } else if (safe_name[i] < 128) {
+ is_valid = disallowed_path_chars.find_first_of(safe_name[i])
+ == std::wstring::npos;
+ }
+
+ if (!is_valid)
+ safe_name[i] = '_';
+ }
+
+ return wide_to_utf8(safe_name);
}