include/irrUString.h

   1 /*
   2    Basic Unicode string class for Irrlicht.
   3    Copyright (c) 2009-2011 John Norman
   4
   5    This software is provided 'as-is', without any express or implied
   6    warranty. In no event will the authors be held liable for any
   7    damages arising from the use of this software.
   8
   9    Permission is granted to anyone to use this software for any
  10    purpose, including commercial applications, and to alter it and
  11    redistribute it freely, subject to the following restrictions:
  12
  13    1. The origin of this software must not be misrepresented; you
  14       must not claim that you wrote the original software. If you use
  15       this software in a product, an acknowledgment in the product
  16       documentation would be appreciated but is not required.
  17
  18    2. Altered source versions must be plainly marked as such, and
  19       must not be misrepresented as being the original software.
  20
  21    3. This notice may not be removed or altered from any source
  22       distribution.
  23
  24    The original version of this class can be located at:
  25    http://irrlicht.suckerfreegames.com/
  26
  27    John Norman
  28    john@suckerfreegames.com
  29 */
  30
  31 #pragma once
  32
  33 #include <stdio.h>
  34 #include <string.h>
  35 #include <stdlib.h>
  36 #include <cstddef>
  37
  38 #ifdef _WIN32
  39 #define __BYTE_ORDER 0
  40 #define __LITTLE_ENDIAN 0
  41 #define __BIG_ENDIAN 1
  42 #elif defined(__MACH__) && defined(__APPLE__)
  43 #include <machine/endian.h>
  44 #elif defined(__FreeBSD__) || defined(__DragonFly__)
  45 #include <sys/endian.h>
  46 #else
  47 #include <endian.h>
  48 #endif
  49
  50 #include <utility>
  51
  52 #include <string>
  53 #include <iterator>
  54 #include <ostream>
  55
  56 #include "irrTypes.h"
  57 #include "irrAllocator.h"
  58 #include "irrArray.h"
  59 #include "irrMath.h"
  60 #include "irrString.h"
  61 #include "path.h"
  62
  63 //! UTF-16 surrogate start values.
  64 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
  65 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
  66
  67 //! Is a UTF-16 code point a surrogate?
  68 #define UTF16_IS_SURROGATE(c)           (((c) & 0xF800) == 0xD800)
  69 #define UTF16_IS_SURROGATE_HI(c)        (((c) & 0xFC00) == 0xD800)
  70 #define UTF16_IS_SURROGATE_LO(c)        (((c) & 0xFC00) == 0xDC00)
  71
  72
  73 namespace irr
  74 {
  75
  76         // Define our character types.
  77         typedef char32_t uchar32_t;
  78         typedef char16_t uchar16_t;
  79         typedef char uchar8_t;
  80
  81 namespace core
  82 {
  83
  84 namespace unicode
  85 {
  86
  87 //! The unicode replacement character.  Used to replace invalid characters.
  88 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
  89
  90 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
  91 //! \param high The high value of the pair.
  92 //! \param low The low value of the pair.
  93 //! \return The UTF-32 character expressed by the surrogate pair.
  94 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
  95 {
  96         // Convert the surrogate pair into a single UTF-32 character.
  97         uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
  98         uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
  99         return (wu << 16) | x;
 100 }
 101
 102 //! Swaps the endianness of a 16-bit value.
 103 //! \return The new value.
 104 inline uchar16_t swapEndian16(const uchar16_t& c)
 105 {
 106         return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
 107 }
 108
 109 //! Swaps the endianness of a 32-bit value.
 110 //! \return The new value.
 111 inline uchar32_t swapEndian32(const uchar32_t& c)
 112 {
 113         return  ((c >> 24) & 0x000000FF) |
 114                         ((c >> 8)  & 0x0000FF00) |
 115                         ((c << 8)  & 0x00FF0000) |
 116                         ((c << 24) & 0xFF000000);
 117 }
 118
 119 //! The Unicode byte order mark.
 120 const u16 BOM = 0xFEFF;
 121
 122 //! The size of the Unicode byte order mark in terms of the Unicode character size.
 123 const u8 BOM_UTF8_LEN = 3;
 124 const u8 BOM_UTF16_LEN = 1;
 125 const u8 BOM_UTF32_LEN = 1;
 126
 127 //! Unicode byte order marks for file operations.
 128 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
 129 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
 130 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
 131 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
 132 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
 133
 134 //! The size in bytes of the Unicode byte marks for file operations.
 135 const u8 BOM_ENCODE_UTF8_LEN = 3;
 136 const u8 BOM_ENCODE_UTF16_LEN = 2;
 137 const u8 BOM_ENCODE_UTF32_LEN = 4;
 138
 139 //! Unicode encoding type.
 140 enum EUTF_ENCODE
 141 {
 142         EUTFE_NONE              = 0,
 143         EUTFE_UTF8,
 144         EUTFE_UTF16,
 145         EUTFE_UTF16_LE,
 146         EUTFE_UTF16_BE,
 147         EUTFE_UTF32,
 148         EUTFE_UTF32_LE,
 149         EUTFE_UTF32_BE
 150 };
 151
 152 //! Unicode endianness.
 153 enum EUTF_ENDIAN
 154 {
 155         EUTFEE_NATIVE   = 0,
 156         EUTFEE_LITTLE,
 157         EUTFEE_BIG
 158 };
 159
 160 //! Returns the specified unicode byte order mark in a byte array.
 161 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
 162 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
 163                 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
 164 //! \return An array that contains a byte order mark.
 165 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
 166 {
 167 #define COPY_ARRAY(source, size) \
 168         memcpy(ret.pointer(), source, size); \
 169         ret.set_used(size)
 170
 171         core::array<u8> ret(4);
 172         switch (mode)
 173         {
 174                 case EUTFE_UTF8:
 175                         COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
 176                         break;
 177                 case EUTFE_UTF16:
 178                         #ifdef __BIG_ENDIAN__
 179                                 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 180                         #else
 181                                 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 182                         #endif
 183                         break;
 184                 case EUTFE_UTF16_BE:
 185                         COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 186                         break;
 187                 case EUTFE_UTF16_LE:
 188                         COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 189                         break;
 190                 case EUTFE_UTF32:
 191                         #ifdef __BIG_ENDIAN__
 192                                 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 193                         #else
 194                                 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 195                         #endif
 196                         break;
 197                 case EUTFE_UTF32_BE:
 198                         COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 199                         break;
 200                 case EUTFE_UTF32_LE:
 201                         COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 202                         break;
 203                 case EUTFE_NONE:
 204                         // TODO sapier: fixed warning only,
 205                         // don't know if something needs to be done here
 206                         break;
 207         }
 208         return ret;
 209
 210 #undef COPY_ARRAY
 211 }
 212
 213 //! Detects if the given data stream starts with a unicode BOM.
 214 //! \param data The data stream to check.
 215 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
 216 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
 217 {
 218         if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
 219         if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
 220         if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
 221         if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
 222         if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
 223         return EUTFE_NONE;
 224 }
 225
 226 } // end namespace unicode
 227
 228
 229 //! UTF-16 string class.
 230 template <typename TAlloc = irrAllocator<uchar16_t> >
 231 class ustring16
 232 {
 233 public:
 234
 235         ///------------------///
 236         /// iterator classes ///
 237         ///------------------///
 238
 239         //! Access an element in a unicode string, allowing one to change it.
 240         class _ustring16_iterator_access
 241         {
 242                 public:
 243                         _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
 244
 245                         //! Allow the class to be interpreted as a single UTF-32 character.
 246                         operator uchar32_t() const
 247                         {
 248                                 return _get();
 249                         }
 250
 251                         //! Allow one to change the character in the unicode string.
 252                         //! \param c The new character to use.
 253                         //! \return Myself.
 254                         _ustring16_iterator_access& operator=(const uchar32_t c)
 255                         {
 256                                 _set(c);
 257                                 return *this;
 258                         }
 259
 260                         //! Increments the value by 1.
 261                         //! \return Myself.
 262                         _ustring16_iterator_access& operator++()
 263                         {
 264                                 _set(_get() + 1);
 265                                 return *this;
 266                         }
 267
 268                         //! Increments the value by 1, returning the old value.
 269                         //! \return A unicode character.
 270                         uchar32_t operator++(int)
 271                         {
 272                                 uchar32_t old = _get();
 273                                 _set(old + 1);
 274                                 return old;
 275                         }
 276
 277                         //! Decrements the value by 1.
 278                         //! \return Myself.
 279                         _ustring16_iterator_access& operator--()
 280                         {
 281                                 _set(_get() - 1);
 282                                 return *this;
 283                         }
 284
 285                         //! Decrements the value by 1, returning the old value.
 286                         //! \return A unicode character.
 287                         uchar32_t operator--(int)
 288                         {
 289                                 uchar32_t old = _get();
 290                                 _set(old - 1);
 291                                 return old;
 292                         }
 293
 294                         //! Adds to the value by a specified amount.
 295                         //! \param val The amount to add to this character.
 296                         //! \return Myself.
 297                         _ustring16_iterator_access& operator+=(int val)
 298                         {
 299                                 _set(_get() + val);
 300                                 return *this;
 301                         }
 302
 303                         //! Subtracts from the value by a specified amount.
 304                         //! \param val The amount to subtract from this character.
 305                         //! \return Myself.
 306                         _ustring16_iterator_access& operator-=(int val)
 307                         {
 308                                 _set(_get() - val);
 309                                 return *this;
 310                         }
 311
 312                         //! Multiples the value by a specified amount.
 313                         //! \param val The amount to multiply this character by.
 314                         //! \return Myself.
 315                         _ustring16_iterator_access& operator*=(int val)
 316                         {
 317                                 _set(_get() * val);
 318                                 return *this;
 319                         }
 320
 321                         //! Divides the value by a specified amount.
 322                         //! \param val The amount to divide this character by.
 323                         //! \return Myself.
 324                         _ustring16_iterator_access& operator/=(int val)
 325                         {
 326                                 _set(_get() / val);
 327                                 return *this;
 328                         }
 329
 330                         //! Modulos the value by a specified amount.
 331                         //! \param val The amount to modulo this character by.
 332                         //! \return Myself.
 333                         _ustring16_iterator_access& operator%=(int val)
 334                         {
 335                                 _set(_get() % val);
 336                                 return *this;
 337                         }
 338
 339                         //! Adds to the value by a specified amount.
 340                         //! \param val The amount to add to this character.
 341                         //! \return A unicode character.
 342                         uchar32_t operator+(int val) const
 343                         {
 344                                 return _get() + val;
 345                         }
 346
 347                         //! Subtracts from the value by a specified amount.
 348                         //! \param val The amount to subtract from this character.
 349                         //! \return A unicode character.
 350                         uchar32_t operator-(int val) const
 351                         {
 352                                 return _get() - val;
 353                         }
 354
 355                         //! Multiplies the value by a specified amount.
 356                         //! \param val The amount to multiply this character by.
 357                         //! \return A unicode character.
 358                         uchar32_t operator*(int val) const
 359                         {
 360                                 return _get() * val;
 361                         }
 362
 363                         //! Divides the value by a specified amount.
 364                         //! \param val The amount to divide this character by.
 365                         //! \return A unicode character.
 366                         uchar32_t operator/(int val) const
 367                         {
 368                                 return _get() / val;
 369                         }
 370
 371                         //! Modulos the value by a specified amount.
 372                         //! \param val The amount to modulo this character by.
 373                         //! \return A unicode character.
 374                         uchar32_t operator%(int val) const
 375                         {
 376                                 return _get() % val;
 377                         }
 378
 379                 private:
 380                         //! Gets a uchar32_t from our current position.
 381                         uchar32_t _get() const
 382                         {
 383                                 const uchar16_t* a = ref->c_str();
 384                                 if (!UTF16_IS_SURROGATE(a[pos]))
 385                                         return static_cast<uchar32_t>(a[pos]);
 386                                 else
 387                                 {
 388                                         if (pos + 1 >= ref->size_raw())
 389                                                 return 0;
 390
 391                                         return unicode::toUTF32(a[pos], a[pos + 1]);
 392                                 }
 393                         }
 394
 395                         //! Sets a uchar32_t at our current position.
 396                         void _set(uchar32_t c)
 397                         {
 398                                 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
 399                                 const uchar16_t* a = ref2->c_str();
 400                                 if (c > 0xFFFF)
 401                                 {
 402                                         // c will be multibyte, so split it up into the high and low surrogate pairs.
 403                                         uchar16_t x = static_cast<uchar16_t>(c);
 404                                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
 405                                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
 406
 407                                         // If the previous position was a surrogate pair, just replace them.  Else, insert the low pair.
 408                                         if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
 409                                                 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
 410                                         else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
 411
 412                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 413                                 }
 414                                 else
 415                                 {
 416                                         // c will be a single byte.
 417                                         uchar16_t vh = static_cast<uchar16_t>(c);
 418
 419                                         // If the previous position was a surrogate pair, remove the extra byte.
 420                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 421                                                 ref2->erase_raw(static_cast<u32>(pos) + 1);
 422
 423                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 424                                 }
 425                         }
 426
 427                         const ustring16<TAlloc>* ref;
 428                         u32 pos;
 429         };
 430         typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
 431
 432
 433         //! Iterator to iterate through a UTF-16 string.
 434         class _ustring16_const_iterator : public std::iterator<
 435                 std::bidirectional_iterator_tag,        // iterator_category
 436                 access,                                                         // value_type
 437                 ptrdiff_t,                                                      // difference_type
 438                 const access,                                           // pointer
 439                 const access                                            // reference
 440         >
 441         {
 442                 public:
 443                         typedef _ustring16_const_iterator _Iter;
 444                         typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
 445                         typedef const access const_pointer;
 446                         typedef const access const_reference;
 447
 448                         typedef typename _Base::value_type value_type;
 449                         typedef typename _Base::difference_type difference_type;
 450                         typedef typename _Base::difference_type distance_type;
 451                         typedef typename _Base::pointer pointer;
 452                         typedef const_reference reference;
 453
 454                         //! Constructors.
 455                         _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
 456                         _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
 457                         _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
 458                         {
 459                                 if (ref->size_raw() == 0 || p == 0)
 460                                         return;
 461
 462                                 // Go to the appropriate position.
 463                                 u32 i = p;
 464                                 u32 sr = ref->size_raw();
 465                                 const uchar16_t* a = ref->c_str();
 466                                 while (i != 0 && pos < sr)
 467                                 {
 468                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 469                                                 pos += 2;
 470                                         else ++pos;
 471                                         --i;
 472                                 }
 473                         }
 474
 475                         //! Test for equalness.
 476                         bool operator==(const _Iter& iter) const
 477                         {
 478                                 if (ref == iter.ref && pos == iter.pos)
 479                                         return true;
 480                                 return false;
 481                         }
 482
 483                         //! Test for unequalness.
 484                         bool operator!=(const _Iter& iter) const
 485                         {
 486                                 if (ref != iter.ref || pos != iter.pos)
 487                                         return true;
 488                                 return false;
 489                         }
 490
 491                         //! Switch to the next full character in the string.
 492                         _Iter& operator++()
 493                         {       // ++iterator
 494                                 if (pos == ref->size_raw()) return *this;
 495                                 const uchar16_t* a = ref->c_str();
 496                                 if (UTF16_IS_SURROGATE_HI(a[pos]))
 497                                         pos += 2;                       // TODO: check for valid low surrogate?
 498                                 else ++pos;
 499                                 if (pos > ref->size_raw()) pos = ref->size_raw();
 500                                 return *this;
 501                         }
 502
 503                         //! Switch to the next full character in the string, returning the previous position.
 504                         _Iter operator++(int)
 505                         {       // iterator++
 506                                 _Iter _tmp(*this);
 507                                 ++*this;
 508                                 return _tmp;
 509                         }
 510
 511                         //! Switch to the previous full character in the string.
 512                         _Iter& operator--()
 513                         {       // --iterator
 514                                 if (pos == 0) return *this;
 515                                 const uchar16_t* a = ref->c_str();
 516                                 --pos;
 517                                 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0)  // low surrogate, go back one more.
 518                                         --pos;
 519                                 return *this;
 520                         }
 521
 522                         //! Switch to the previous full character in the string, returning the previous position.
 523                         _Iter operator--(int)
 524                         {       // iterator--
 525                                 _Iter _tmp(*this);
 526                                 --*this;
 527                                 return _tmp;
 528                         }
 529
 530                         //! Advance a specified number of full characters in the string.
 531                         //! \return Myself.
 532                         _Iter& operator+=(const difference_type v)
 533                         {
 534                                 if (v == 0) return *this;
 535                                 if (v < 0) return operator-=(v * -1);
 536
 537                                 if (pos >= ref->size_raw())
 538                                         return *this;
 539
 540                                 // Go to the appropriate position.
 541                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 542                                 u32 i = (u32)v;
 543                                 u32 sr = ref->size_raw();
 544                                 const uchar16_t* a = ref->c_str();
 545                                 while (i != 0 && pos < sr)
 546                                 {
 547                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 548                                                 pos += 2;
 549                                         else ++pos;
 550                                         --i;
 551                                 }
 552                                 if (pos > sr)
 553                                         pos = sr;
 554
 555                                 return *this;
 556                         }
 557
 558                         //! Go back a specified number of full characters in the string.
 559                         //! \return Myself.
 560                         _Iter& operator-=(const difference_type v)
 561                         {
 562                                 if (v == 0) return *this;
 563                                 if (v > 0) return operator+=(v * -1);
 564
 565                                 if (pos == 0)
 566                                         return *this;
 567
 568                                 // Go to the appropriate position.
 569                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 570                                 u32 i = (u32)v;
 571                                 const uchar16_t* a = ref->c_str();
 572                                 while (i != 0 && pos != 0)
 573                                 {
 574                                         --pos;
 575                                         if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
 576                                                 --pos;
 577                                         --i;
 578                                 }
 579
 580                                 return *this;
 581                         }
 582
 583                         //! Return a new iterator that is a variable number of full characters forward from the current position.
 584                         _Iter operator+(const difference_type v) const
 585                         {
 586                                 _Iter ret(*this);
 587                                 ret += v;
 588                                 return ret;
 589                         }
 590
 591                         //! Return a new iterator that is a variable number of full characters backward from the current position.
 592                         _Iter operator-(const difference_type v) const
 593                         {
 594                                 _Iter ret(*this);
 595                                 ret -= v;
 596                                 return ret;
 597                         }
 598
 599                         //! Returns the distance between two iterators.
 600                         difference_type operator-(const _Iter& iter) const
 601                         {
 602                                 // Make sure we reference the same object!
 603                                 if (ref != iter.ref)
 604                                         return difference_type();
 605
 606                                 _Iter i = iter;
 607                                 difference_type ret;
 608
 609                                 // Walk up.
 610                                 if (pos > i.pos)
 611                                 {
 612                                         while (pos > i.pos)
 613                                         {
 614                                                 ++i;
 615                                                 ++ret;
 616                                         }
 617                                         return ret;
 618                                 }
 619
 620                                 // Walk down.
 621                                 while (pos < i.pos)
 622                                 {
 623                                         --i;
 624                                         --ret;
 625                                 }
 626                                 return ret;
 627                         }
 628
 629                         //! Accesses the full character at the iterator's position.
 630                         const_reference operator*() const
 631                         {
 632                                 if (pos >= ref->size_raw())
 633                                 {
 634                                         const uchar16_t* a = ref->c_str();
 635                                         u32 p = ref->size_raw();
 636                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 637                                                 --p;
 638                                         reference ret(ref, p);
 639                                         return ret;
 640                                 }
 641                                 const_reference ret(ref, pos);
 642                                 return ret;
 643                         }
 644
 645                         //! Accesses the full character at the iterator's position.
 646                         reference operator*()
 647                         {
 648                                 if (pos >= ref->size_raw())
 649                                 {
 650                                         const uchar16_t* a = ref->c_str();
 651                                         u32 p = ref->size_raw();
 652                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 653                                                 --p;
 654                                         reference ret(ref, p);
 655                                         return ret;
 656                                 }
 657                                 reference ret(ref, pos);
 658                                 return ret;
 659                         }
 660
 661                         //! Accesses the full character at the iterator's position.
 662                         const_pointer operator->() const
 663                         {
 664                                 return operator*();
 665                         }
 666
 667                         //! Accesses the full character at the iterator's position.
 668                         pointer operator->()
 669                         {
 670                                 return operator*();
 671                         }
 672
 673                         //! Is the iterator at the start of the string?
 674                         bool atStart() const
 675                         {
 676                                 return pos == 0;
 677                         }
 678
 679                         //! Is the iterator at the end of the string?
 680                         bool atEnd() const
 681                         {
 682                                 const uchar16_t* a = ref->c_str();
 683                                 if (UTF16_IS_SURROGATE(a[pos]))
 684                                         return (pos + 1) >= ref->size_raw();
 685                                 else return pos >= ref->size_raw();
 686                         }
 687
 688                         //! Moves the iterator to the start of the string.
 689                         void toStart()
 690                         {
 691                                 pos = 0;
 692                         }
 693
 694                         //! Moves the iterator to the end of the string.
 695                         void toEnd()
 696                         {
 697                                 pos = ref->size_raw();
 698                         }
 699
 700                         //! Returns the iterator's position.
 701                         //! \return The iterator's position.
 702                         u32 getPos() const
 703                         {
 704                                 return pos;
 705                         }
 706
 707                 protected:
 708                         const ustring16<TAlloc>* ref;
 709                         u32 pos;
 710         };
 711
 712         //! Iterator to iterate through a UTF-16 string.
 713         class _ustring16_iterator : public _ustring16_const_iterator
 714         {
 715                 public:
 716                         typedef _ustring16_iterator _Iter;
 717                         typedef _ustring16_const_iterator _Base;
 718                         typedef typename _Base::const_pointer const_pointer;
 719                         typedef typename _Base::const_reference const_reference;
 720
 721
 722                         typedef typename _Base::value_type value_type;
 723                         typedef typename _Base::difference_type difference_type;
 724                         typedef typename _Base::distance_type distance_type;
 725                         typedef access pointer;
 726                         typedef access reference;
 727
 728                         using _Base::pos;
 729                         using _Base::ref;
 730
 731                         //! Constructors.
 732                         _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
 733                         _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
 734                         _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
 735
 736                         //! Accesses the full character at the iterator's position.
 737                         reference operator*() const
 738                         {
 739                                 if (pos >= ref->size_raw())
 740                                 {
 741                                         const uchar16_t* a = ref->c_str();
 742                                         u32 p = ref->size_raw();
 743                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 744                                                 --p;
 745                                         reference ret(ref, p);
 746                                         return ret;
 747                                 }
 748                                 reference ret(ref, pos);
 749                                 return ret;
 750                         }
 751
 752                         //! Accesses the full character at the iterator's position.
 753                         reference operator*()
 754                         {
 755                                 if (pos >= ref->size_raw())
 756                                 {
 757                                         const uchar16_t* a = ref->c_str();
 758                                         u32 p = ref->size_raw();
 759                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 760                                                 --p;
 761                                         reference ret(ref, p);
 762                                         return ret;
 763                                 }
 764                                 reference ret(ref, pos);
 765                                 return ret;
 766                         }
 767
 768                         //! Accesses the full character at the iterator's position.
 769                         pointer operator->() const
 770                         {
 771                                 return operator*();
 772                         }
 773
 774                         //! Accesses the full character at the iterator's position.
 775                         pointer operator->()
 776                         {
 777                                 return operator*();
 778                         }
 779         };
 780
 781         typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
 782         typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
 783
 784         ///----------------------///
 785         /// end iterator classes ///
 786         ///----------------------///
 787
 788         //! Default constructor
 789         ustring16()
 790         : array(0), allocated(1), used(0)
 791         {
 792 #if __BYTE_ORDER == __BIG_ENDIAN
 793                 encoding = unicode::EUTFE_UTF16_BE;
 794 #else
 795                 encoding = unicode::EUTFE_UTF16_LE;
 796 #endif
 797                 array = allocator.allocate(1); // new u16[1];
 798                 array[0] = 0x0;
 799         }
 800
 801
 802         //! Constructor
 803         ustring16(const ustring16<TAlloc>& other)
 804         : array(0), allocated(0), used(0)
 805         {
 806 #if __BYTE_ORDER == __BIG_ENDIAN
 807                 encoding = unicode::EUTFE_UTF16_BE;
 808 #else
 809                 encoding = unicode::EUTFE_UTF16_LE;
 810 #endif
 811                 *this = other;
 812         }
 813
 814
 815         //! Constructor from other string types
 816         template <class B>
 817         ustring16(const string<B>& other)
 818         : array(0), allocated(0), used(0)
 819         {
 820 #if __BYTE_ORDER == __BIG_ENDIAN
 821                 encoding = unicode::EUTFE_UTF16_BE;
 822 #else
 823                 encoding = unicode::EUTFE_UTF16_LE;
 824 #endif
 825                 *this = other;
 826         }
 827
 828         //! Constructor from std::string
 829         template <class B, class A, typename Alloc>
 830         ustring16(const std::basic_string<B, A, Alloc>& other)
 831         : array(0), allocated(0), used(0)
 832         {
 833 #if __BYTE_ORDER == __BIG_ENDIAN
 834                 encoding = unicode::EUTFE_UTF16_BE;
 835 #else
 836                 encoding = unicode::EUTFE_UTF16_LE;
 837 #endif
 838                 *this = other.c_str();
 839         }
 840
 841
 842         //! Constructor from iterator.
 843         template <typename Itr>
 844         ustring16(Itr first, Itr last)
 845         : array(0), allocated(0), used(0)
 846         {
 847 #if __BYTE_ORDER == __BIG_ENDIAN
 848                 encoding = unicode::EUTFE_UTF16_BE;
 849 #else
 850                 encoding = unicode::EUTFE_UTF16_LE;
 851 #endif
 852                 reserve(std::distance(first, last));
 853                 array[used] = 0;
 854
 855                 for (; first != last; ++first)
 856                         append((uchar32_t)*first);
 857         }
 858
 859         //! Constructor for copying a UTF-8 string from a pointer.
 860         ustring16(const uchar8_t* const c)
 861         : array(0), allocated(0), used(0)
 862         {
 863 #if __BYTE_ORDER == __BIG_ENDIAN
 864                 encoding = unicode::EUTFE_UTF16_BE;
 865 #else
 866                 encoding = unicode::EUTFE_UTF16_LE;
 867 #endif
 868
 869                 append(c);
 870         }
 871
 872
 873         //! Constructor for copying a UTF-8 string from a single char.
 874         ustring16(const char c)
 875         : array(0), allocated(0), used(0)
 876         {
 877 #if __BYTE_ORDER == __BIG_ENDIAN
 878                 encoding = unicode::EUTFE_UTF16_BE;
 879 #else
 880                 encoding = unicode::EUTFE_UTF16_LE;
 881 #endif
 882
 883                 append((uchar32_t)c);
 884         }
 885
 886
 887         //! Constructor for copying a UTF-8 string from a pointer with a given length.
 888         ustring16(const uchar8_t* const c, u32 length)
 889         : array(0), allocated(0), used(0)
 890         {
 891 #if __BYTE_ORDER == __BIG_ENDIAN
 892                 encoding = unicode::EUTFE_UTF16_BE;
 893 #else
 894                 encoding = unicode::EUTFE_UTF16_LE;
 895 #endif
 896
 897                 append(c, length);
 898         }
 899
 900
 901         //! Constructor for copying a UTF-16 string from a pointer.
 902         ustring16(const uchar16_t* const c)
 903         : array(0), allocated(0), used(0)
 904         {
 905 #if __BYTE_ORDER == __BIG_ENDIAN
 906                 encoding = unicode::EUTFE_UTF16_BE;
 907 #else
 908                 encoding = unicode::EUTFE_UTF16_LE;
 909 #endif
 910
 911                 append(c);
 912         }
 913
 914
 915         //! Constructor for copying a UTF-16 string from a pointer with a given length
 916         ustring16(const uchar16_t* const c, u32 length)
 917         : array(0), allocated(0), used(0)
 918         {
 919 #if __BYTE_ORDER == __BIG_ENDIAN
 920                 encoding = unicode::EUTFE_UTF16_BE;
 921 #else
 922                 encoding = unicode::EUTFE_UTF16_LE;
 923 #endif
 924
 925                 append(c, length);
 926         }
 927
 928
 929         //! Constructor for copying a UTF-32 string from a pointer.
 930         ustring16(const uchar32_t* const c)
 931         : array(0), allocated(0), used(0)
 932         {
 933 #if __BYTE_ORDER == __BIG_ENDIAN
 934                 encoding = unicode::EUTFE_UTF16_BE;
 935 #else
 936                 encoding = unicode::EUTFE_UTF16_LE;
 937 #endif
 938
 939                 append(c);
 940         }
 941
 942
 943         //! Constructor for copying a UTF-32 from a pointer with a given length.
 944         ustring16(const uchar32_t* const c, u32 length)
 945         : array(0), allocated(0), used(0)
 946         {
 947 #if __BYTE_ORDER == __BIG_ENDIAN
 948                 encoding = unicode::EUTFE_UTF16_BE;
 949 #else
 950                 encoding = unicode::EUTFE_UTF16_LE;
 951 #endif
 952
 953                 append(c, length);
 954         }
 955
 956
 957         //! Constructor for copying a wchar_t string from a pointer.
 958         ustring16(const wchar_t* const c)
 959         : array(0), allocated(0), used(0)
 960         {
 961 #if __BYTE_ORDER == __BIG_ENDIAN
 962                 encoding = unicode::EUTFE_UTF16_BE;
 963 #else
 964                 encoding = unicode::EUTFE_UTF16_LE;
 965 #endif
 966
 967                 if (sizeof(wchar_t) == 4)
 968                         append(reinterpret_cast<const uchar32_t* const>(c));
 969                 else if (sizeof(wchar_t) == 2)
 970                         append(reinterpret_cast<const uchar16_t* const>(c));
 971                 else if (sizeof(wchar_t) == 1)
 972                         append(reinterpret_cast<const uchar8_t* const>(c));
 973         }
 974
 975
 976         //! Constructor for copying a wchar_t string from a pointer with a given length.
 977         ustring16(const wchar_t* const c, u32 length)
 978         : array(0), allocated(0), used(0)
 979         {
 980 #if __BYTE_ORDER == __BIG_ENDIAN
 981                 encoding = unicode::EUTFE_UTF16_BE;
 982 #else
 983                 encoding = unicode::EUTFE_UTF16_LE;
 984 #endif
 985
 986                 if (sizeof(wchar_t) == 4)
 987                         append(reinterpret_cast<const uchar32_t* const>(c), length);
 988                 else if (sizeof(wchar_t) == 2)
 989                         append(reinterpret_cast<const uchar16_t* const>(c), length);
 990                 else if (sizeof(wchar_t) == 1)
 991                         append(reinterpret_cast<const uchar8_t* const>(c), length);
 992         }
 993
 994
 995         //! Constructor for moving a ustring16
 996         ustring16(ustring16<TAlloc>&& other)
 997         : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
 998         {
 999                 //std::cout << "MOVE constructor" << std::endl;
1000                 other.array = 0;
1001                 other.allocated = 0;
1002                 other.used = 0;
1003         }
1004
1005         //! Destructor
1006         ~ustring16()
1007         {
1008                 allocator.deallocate(array); // delete [] array;
1009         }
1010
1011
1012         //! Assignment operator
1013         ustring16& operator=(const ustring16<TAlloc>& other)
1014         {
1015                 if (this == &other)
1016                         return *this;
1017
1018                 used = other.size_raw();
1019                 if (used >= allocated)
1020                 {
1021                         allocator.deallocate(array); // delete [] array;
1022                         allocated = used + 1;
1023                         array = allocator.allocate(used + 1); //new u16[used];
1024                 }
1025
1026                 const uchar16_t* p = other.c_str();
1027                 for (u32 i=0; i<=used; ++i, ++p)
1028                         array[i] = *p;
1029
1030                 array[used] = 0;
1031
1032                 // Validate our new UTF-16 string.
1033                 validate();
1034
1035                 return *this;
1036         }
1037
1038         //! Move assignment operator
1039         ustring16& operator=(ustring16<TAlloc>&& other)
1040         {
1041                 if (this != &other)
1042                 {
1043                         //std::cout << "MOVE operator=" << std::endl;
1044                         allocator.deallocate(array);
1045
1046                         array = other.array;
1047                         allocated = other.allocated;
1048                         encoding = other.encoding;
1049                         used = other.used;
1050                         other.array = 0;
1051                         other.used = 0;
1052                 }
1053                 return *this;
1054         }
1055
1056         //! Assignment operator for other string types
1057         template <class B>
1058         ustring16<TAlloc>& operator=(const string<B>& other)
1059         {
1060                 *this = other.c_str();
1061                 return *this;
1062         }
1063
1064
1065         //! Assignment operator for UTF-8 strings
1066         ustring16<TAlloc>& operator=(const uchar8_t* const c)
1067         {
1068                 if (!array)
1069                 {
1070                         array = allocator.allocate(1); //new u16[1];
1071                         allocated = 1;
1072                 }
1073                 used = 0;
1074                 array[used] = 0x0;
1075                 if (!c) return *this;
1076
1077                 //! Append our string now.
1078                 append(c);
1079                 return *this;
1080         }
1081
1082
1083         //! Assignment operator for UTF-16 strings
1084         ustring16<TAlloc>& operator=(const uchar16_t* const c)
1085         {
1086                 if (!array)
1087                 {
1088                         array = allocator.allocate(1); //new u16[1];
1089                         allocated = 1;
1090                 }
1091                 used = 0;
1092                 array[used] = 0x0;
1093                 if (!c) return *this;
1094
1095                 //! Append our string now.
1096                 append(c);
1097                 return *this;
1098         }
1099
1100
1101         //! Assignment operator for UTF-32 strings
1102         ustring16<TAlloc>& operator=(const uchar32_t* const c)
1103         {
1104                 if (!array)
1105                 {
1106                         array = allocator.allocate(1); //new u16[1];
1107                         allocated = 1;
1108                 }
1109                 used = 0;
1110                 array[used] = 0x0;
1111                 if (!c) return *this;
1112
1113                 //! Append our string now.
1114                 append(c);
1115                 return *this;
1116         }
1117
1118
1119         //! Assignment operator for wchar_t strings.
1120         /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1121                 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1122                 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1123         ustring16<TAlloc>& operator=(const wchar_t* const c)
1124         {
1125                 if (sizeof(wchar_t) == 4)
1126                         *this = reinterpret_cast<const uchar32_t* const>(c);
1127                 else if (sizeof(wchar_t) == 2)
1128                         *this = reinterpret_cast<const uchar16_t* const>(c);
1129                 else if (sizeof(wchar_t) == 1)
1130                         *this = reinterpret_cast<const uchar8_t* const>(c);
1131
1132                 return *this;
1133         }
1134
1135
1136         //! Assignment operator for other strings.
1137         /** Note that this assumes that a correct unicode string is stored in the string. **/
1138         template <class B>
1139         ustring16<TAlloc>& operator=(const B* const c)
1140         {
1141                 if (sizeof(B) == 4)
1142                         *this = reinterpret_cast<const uchar32_t* const>(c);
1143                 else if (sizeof(B) == 2)
1144                         *this = reinterpret_cast<const uchar16_t* const>(c);
1145                 else if (sizeof(B) == 1)
1146                         *this = reinterpret_cast<const uchar8_t* const>(c);
1147
1148                 return *this;
1149         }
1150
1151
1152         //! Direct access operator
1153         access operator [](const u32 index)
1154         {
1155                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1156                 iterator iter(*this, index);
1157                 return iter.operator*();
1158         }
1159
1160
1161         //! Direct access operator
1162         const access operator [](const u32 index) const
1163         {
1164                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1165                 const_iterator iter(*this, index);
1166                 return iter.operator*();
1167         }
1168
1169
1170         //! Equality operator
1171         bool operator ==(const uchar16_t* const str) const
1172         {
1173                 if (!str)
1174                         return false;
1175
1176                 u32 i;
1177                 for(i=0; array[i] && str[i]; ++i)
1178                         if (array[i] != str[i])
1179                                 return false;
1180
1181                 return !array[i] && !str[i];
1182         }
1183
1184
1185         //! Equality operator
1186         bool operator ==(const ustring16<TAlloc>& other) const
1187         {
1188                 for(u32 i=0; array[i] && other.array[i]; ++i)
1189                         if (array[i] != other.array[i])
1190                                 return false;
1191
1192                 return used == other.used;
1193         }
1194
1195
1196         //! Is smaller comparator
1197         bool operator <(const ustring16<TAlloc>& other) const
1198         {
1199                 for(u32 i=0; array[i] && other.array[i]; ++i)
1200                 {
1201                         s32 diff = array[i] - other.array[i];
1202                         if ( diff )
1203                                 return diff < 0;
1204                 }
1205
1206                 return used < other.used;
1207         }
1208
1209
1210         //! Inequality operator
1211         bool operator !=(const uchar16_t* const str) const
1212         {
1213                 return !(*this == str);
1214         }
1215
1216
1217         //! Inequality operator
1218         bool operator !=(const ustring16<TAlloc>& other) const
1219         {
1220                 return !(*this == other);
1221         }
1222
1223
1224         //! Returns the length of a ustring16 in full characters.
1225         //! \return Length of a ustring16 in full characters.
1226         u32 size() const
1227         {
1228                 const_iterator i(*this, 0);
1229                 u32 pos = 0;
1230                 while (!i.atEnd())
1231                 {
1232                         ++i;
1233                         ++pos;
1234                 }
1235                 return pos;
1236         }
1237
1238
1239         //! Informs if the ustring is empty or not.
1240         //! \return True if the ustring is empty, false if not.
1241         bool empty() const
1242         {
1243                 return (size_raw() == 0);
1244         }
1245
1246
1247         //! Returns a pointer to the raw UTF-16 string data.
1248         //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1249         const uchar16_t* c_str() const
1250         {
1251                 return array;
1252         }
1253
1254
1255         //! Compares the first n characters of this string with another.
1256         //! \param other Other string to compare to.
1257         //! \param n Number of characters to compare.
1258         //! \return True if the n first characters of both strings are equal.
1259         bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1260         {
1261                 u32 i;
1262                 const uchar16_t* oa = other.c_str();
1263                 for(i=0; i < n && array[i] && oa[i]; ++i)
1264                         if (array[i] != oa[i])
1265                                 return false;
1266
1267                 // if one (or both) of the strings was smaller then they
1268                 // are only equal if they have the same length
1269                 return (i == n) || (used == other.used);
1270         }
1271
1272
1273         //! Compares the first n characters of this string with another.
1274         //! \param str Other string to compare to.
1275         //! \param n Number of characters to compare.
1276         //! \return True if the n first characters of both strings are equal.
1277         bool equalsn(const uchar16_t* const str, u32 n) const
1278         {
1279                 if (!str)
1280                         return false;
1281                 u32 i;
1282                 for(i=0; i < n && array[i] && str[i]; ++i)
1283                         if (array[i] != str[i])
1284                                 return false;
1285
1286                 // if one (or both) of the strings was smaller then they
1287                 // are only equal if they have the same length
1288                 return (i == n) || (array[i] == 0 && str[i] == 0);
1289         }
1290
1291
1292         //! Appends a character to this ustring16
1293         //! \param character The character to append.
1294         //! \return A reference to our current string.
1295         ustring16<TAlloc>& append(uchar32_t character)
1296         {
1297                 if (used + 2 >= allocated)
1298                         reallocate(used + 2);
1299
1300                 if (character > 0xFFFF)
1301                 {
1302                         used += 2;
1303
1304                         // character will be multibyte, so split it up into a surrogate pair.
1305                         uchar16_t x = static_cast<uchar16_t>(character);
1306                         uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1307                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1308                         array[used-2] = vh;
1309                         array[used-1] = vl;
1310                 }
1311                 else
1312                 {
1313                         ++used;
1314                         array[used-1] = character;
1315                 }
1316                 array[used] = 0;
1317
1318                 return *this;
1319         }
1320
1321
1322         //! Appends a UTF-8 string to this ustring16
1323         //! \param other The UTF-8 string to append.
1324         //! \param length The length of the string to append.
1325         //! \return A reference to our current string.
1326         ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1327         {
1328                 if (!other)
1329                         return *this;
1330
1331                 // Determine if the string is long enough for a BOM.
1332                 u32 len = 0;
1333                 const uchar8_t* p = other;
1334                 do
1335                 {
1336                         ++len;
1337                 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1338
1339                 // Check for BOM.
1340                 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1341                 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1342                 {
1343                         if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1344                                 c_bom = unicode::EUTFE_UTF8;
1345                 }
1346
1347                 // If a BOM was found, don't include it in the string.
1348                 const uchar8_t* c2 = other;
1349                 if (c_bom != unicode::EUTFE_NONE)
1350                 {
1351                         c2 = other + unicode::BOM_UTF8_LEN;
1352                         length -= unicode::BOM_UTF8_LEN;
1353                 }
1354
1355                 // Calculate the size of the string to read in.
1356                 len = 0;
1357                 p = c2;
1358                 do
1359                 {
1360                         ++len;
1361                 } while(*p++ && len < length);
1362                 if (len > length)
1363                         len = length;
1364
1365                 // If we need to grow the array, do it now.
1366                 if (used + len >= allocated)
1367                         reallocate(used + (len * 2));
1368                 u32 start = used;
1369
1370                 // Convert UTF-8 to UTF-16.
1371                 u32 pos = start;
1372                 for (u32 l = 0; l<len;)
1373                 {
1374                         ++used;
1375                         if (((c2[l] >> 6) & 0x03) == 0x02)
1376                         {       // Invalid continuation byte.
1377                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1378                                 ++l;
1379                         }
1380                         else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1381                         {       // Invalid byte - overlong encoding.
1382                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1383                                 ++l;
1384                         }
1385                         else if ((c2[l] & 0xF8) == 0xF0)
1386                         {       // 4 bytes UTF-8, 2 bytes UTF-16.
1387                                 // Check for a full string.
1388                                 if ((l + 3) >= len)
1389                                 {
1390                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1391                                         l += 3;
1392                                         break;
1393                                 }
1394
1395                                 // Validate.
1396                                 bool valid = true;
1397                                 u8 l2 = 0;
1398                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1399                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1400                                 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1401                                 if (!valid)
1402                                 {
1403                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1404                                         l += l2;
1405                                         continue;
1406                                 }
1407
1408                                 // Decode.
1409                                 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1410                                 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1411                                 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1412                                 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1413
1414                                 // Split v up into a surrogate pair.
1415                                 uchar16_t x = static_cast<uchar16_t>(v);
1416                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1417                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1418
1419                                 array[pos++] = vh;
1420                                 array[pos++] = vl;
1421                                 l += 4;
1422                                 ++used;         // Using two shorts this time, so increase used by 1.
1423                         }
1424                         else if ((c2[l] & 0xF0) == 0xE0)
1425                         {       // 3 bytes UTF-8, 1 byte UTF-16.
1426                                 // Check for a full string.
1427                                 if ((l + 2) >= len)
1428                                 {
1429                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1430                                         l += 2;
1431                                         break;
1432                                 }
1433
1434                                 // Validate.
1435                                 bool valid = true;
1436                                 u8 l2 = 0;
1437                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1438                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1439                                 if (!valid)
1440                                 {
1441                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1442                                         l += l2;
1443                                         continue;
1444                                 }
1445
1446                                 // Decode.
1447                                 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1448                                 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1449                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1450                                 array[pos++] = ch;
1451                                 l += 3;
1452                         }
1453                         else if ((c2[l] & 0xE0) == 0xC0)
1454                         {       // 2 bytes UTF-8, 1 byte UTF-16.
1455                                 // Check for a full string.
1456                                 if ((l + 1) >= len)
1457                                 {
1458                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1459                                         l += 1;
1460                                         break;
1461                                 }
1462
1463                                 // Validate.
1464                                 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1465                                 {
1466                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1467                                         ++l;
1468                                         continue;
1469                                 }
1470
1471                                 // Decode.
1472                                 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1473                                 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1474                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1475                                 array[pos++] = ch;
1476                                 l += 2;
1477                         }
1478                         else
1479                         {       // 1 byte UTF-8, 1 byte UTF-16.
1480                                 // Validate.
1481                                 if (c2[l] > 0x7F)
1482                                 {       // Values above 0xF4 are restricted and aren't used.  By now, anything above 0x7F is invalid.
1483                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1484                                 }
1485                                 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1486                                 ++l;
1487                         }
1488                 }
1489                 array[used] = 0;
1490
1491                 // Validate our new UTF-16 string.
1492                 validate();
1493
1494                 return *this;
1495         }
1496
1497
1498         //! Appends a UTF-16 string to this ustring16
1499         //! \param other The UTF-16 string to append.
1500         //! \param length The length of the string to append.
1501         //! \return A reference to our current string.
1502         ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1503         {
1504                 if (!other)
1505                         return *this;
1506
1507                 // Determine if the string is long enough for a BOM.
1508                 u32 len = 0;
1509                 const uchar16_t* p = other;
1510                 do
1511                 {
1512                         ++len;
1513                 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1514
1515                 // Check for the BOM to determine the string's endianness.
1516                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1517                 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1518                         c_end = unicode::EUTFEE_LITTLE;
1519                 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1520                         c_end = unicode::EUTFEE_BIG;
1521
1522                 // If a BOM was found, don't include it in the string.
1523                 const uchar16_t* c2 = other;
1524                 if (c_end != unicode::EUTFEE_NATIVE)
1525                 {
1526                         c2 = other + unicode::BOM_UTF16_LEN;
1527                         length -= unicode::BOM_UTF16_LEN;
1528                 }
1529
1530                 // Calculate the size of the string to read in.
1531                 len = 0;
1532                 p = c2;
1533                 do
1534                 {
1535                         ++len;
1536                 } while(*p++ && len < length);
1537                 if (len > length)
1538                         len = length;
1539
1540                 // If we need to grow the size of the array, do it now.
1541                 if (used + len >= allocated)
1542                         reallocate(used + (len * 2));
1543                 u32 start = used;
1544                 used += len;
1545
1546                 // Copy the string now.
1547                 unicode::EUTF_ENDIAN m_end = getEndianness();
1548                 for (u32 l = start; l < start + len; ++l)
1549                 {
1550                         array[l] = (uchar16_t)c2[l];
1551                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1552                                 array[l] = unicode::swapEndian16(array[l]);
1553                 }
1554
1555                 array[used] = 0;
1556
1557                 // Validate our new UTF-16 string.
1558                 validate();
1559                 return *this;
1560         }
1561
1562
1563         //! Appends a UTF-32 string to this ustring16
1564         //! \param other The UTF-32 string to append.
1565         //! \param length The length of the string to append.
1566         //! \return A reference to our current string.
1567         ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1568         {
1569                 if (!other)
1570                         return *this;
1571
1572                 // Check for the BOM to determine the string's endianness.
1573                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1574                 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1575                         c_end = unicode::EUTFEE_LITTLE;
1576                 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1577                         c_end = unicode::EUTFEE_BIG;
1578
1579                 // If a BOM was found, don't include it in the string.
1580                 const uchar32_t* c2 = other;
1581                 if (c_end != unicode::EUTFEE_NATIVE)
1582                 {
1583                         c2 = other + unicode::BOM_UTF32_LEN;
1584                         length -= unicode::BOM_UTF32_LEN;
1585                 }
1586
1587                 // Calculate the size of the string to read in.
1588                 u32 len = 0;
1589                 const uchar32_t* p = c2;
1590                 do
1591                 {
1592                         ++len;
1593                 } while(*p++ && len < length);
1594                 if (len > length)
1595                         len = length;
1596
1597                 // If we need to grow the size of the array, do it now.
1598                 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1599                 if (used + (len * 2) >= allocated)
1600                         reallocate(used + ((len * 2) * 2));
1601                 u32 start = used;
1602
1603                 // Convert UTF-32 to UTF-16.
1604                 unicode::EUTF_ENDIAN m_end = getEndianness();
1605                 u32 pos = start;
1606                 for (u32 l = 0; l<len; ++l)
1607                 {
1608                         ++used;
1609
1610                         uchar32_t ch = c2[l];
1611                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1612                                 ch = unicode::swapEndian32(ch);
1613
1614                         if (ch > 0xFFFF)
1615                         {
1616                                 // Split ch up into a surrogate pair as it is over 16 bits long.
1617                                 uchar16_t x = static_cast<uchar16_t>(ch);
1618                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1619                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1620                                 array[pos++] = vh;
1621                                 array[pos++] = vl;
1622                                 ++used;         // Using two shorts, so increased used again.
1623                         }
1624                         else if (ch >= 0xD800 && ch <= 0xDFFF)
1625                         {
1626                                 // Between possible UTF-16 surrogates (invalid!)
1627                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1628                         }
1629                         else array[pos++] = static_cast<uchar16_t>(ch);
1630                 }
1631                 array[used] = 0;
1632
1633                 // Validate our new UTF-16 string.
1634                 validate();
1635
1636                 return *this;
1637         }
1638
1639
1640         //! Appends a ustring16 to this ustring16
1641         //! \param other The string to append to this one.
1642         //! \return A reference to our current string.
1643         ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1644         {
1645                 const uchar16_t* oa = other.c_str();
1646
1647                 u32 len = other.size_raw();
1648
1649                 if (used + len >= allocated)
1650                         reallocate(used + len);
1651
1652                 for (u32 l=0; l<len; ++l)
1653                         array[used+l] = oa[l];
1654
1655                 used += len;
1656                 array[used] = 0;
1657
1658                 return *this;
1659         }
1660
1661
1662         //! Appends a certain amount of characters of a ustring16 to this ustring16.
1663         //! \param other The string to append to this one.
1664         //! \param length How many characters of the other string to add to this one.
1665         //! \return A reference to our current string.
1666         ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1667         {
1668                 if (other.size() == 0)
1669                         return *this;
1670
1671                 if (other.size() < length)
1672                 {
1673                         append(other);
1674                         return *this;
1675                 }
1676
1677                 if (used + length * 2 >= allocated)
1678                         reallocate(used + length * 2);
1679
1680                 const_iterator iter(other, 0);
1681                 u32 l = length;
1682                 while (!iter.atEnd() && l)
1683                 {
1684                         uchar32_t c = *iter;
1685                         append(c);
1686                         ++iter;
1687                         --l;
1688                 }
1689
1690                 return *this;
1691         }
1692
1693
1694         //! Reserves some memory.
1695         //! \param count The amount of characters to reserve.
1696         void reserve(u32 count)
1697         {
1698                 if (count < allocated)
1699                         return;
1700
1701                 reallocate(count);
1702         }
1703
1704
1705         //! Finds first occurrence of character.
1706         //! \param c The character to search for.
1707         //! \return Position where the character has been found, or -1 if not found.
1708         s32 findFirst(uchar32_t c) const
1709         {
1710                 const_iterator i(*this, 0);
1711
1712                 s32 pos = 0;
1713                 while (!i.atEnd())
1714                 {
1715                         uchar32_t t = *i;
1716                         if (c == t)
1717                                 return pos;
1718                         ++pos;
1719                         ++i;
1720                 }
1721
1722                 return -1;
1723         }
1724
1725         //! Finds first occurrence of a character of a list.
1726         //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1727         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1728         //! \return Position where one of the characters has been found, or -1 if not found.
1729         s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1730         {
1731                 if (!c || !count)
1732                         return -1;
1733
1734                 const_iterator i(*this, 0);
1735
1736                 s32 pos = 0;
1737                 while (!i.atEnd())
1738                 {
1739                         uchar32_t t = *i;
1740                         for (u32 j=0; j<count; ++j)
1741                                 if (t == c[j])
1742                                         return pos;
1743                         ++pos;
1744                         ++i;
1745                 }
1746
1747                 return -1;
1748         }
1749
1750
1751         //! Finds first position of a character not in a given list.
1752         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1753         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1754         //! \return Position where the character has been found, or -1 if not found.
1755         s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1756         {
1757                 if (!c || !count)
1758                         return -1;
1759
1760                 const_iterator i(*this, 0);
1761
1762                 s32 pos = 0;
1763                 while (!i.atEnd())
1764                 {
1765                         uchar32_t t = *i;
1766                         u32 j;
1767                         for (j=0; j<count; ++j)
1768                                 if (t == c[j])
1769                                         break;
1770
1771                         if (j==count)
1772                                 return pos;
1773                         ++pos;
1774                         ++i;
1775                 }
1776
1777                 return -1;
1778         }
1779
1780         //! Finds last position of a character not in a given list.
1781         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1782         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1783         //! \return Position where the character has been found, or -1 if not found.
1784         s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1785         {
1786                 if (!c || !count)
1787                         return -1;
1788
1789                 const_iterator i(end());
1790                 --i;
1791
1792                 s32 pos = size() - 1;
1793                 while (!i.atStart())
1794                 {
1795                         uchar32_t t = *i;
1796                         u32 j;
1797                         for (j=0; j<count; ++j)
1798                                 if (t == c[j])
1799                                         break;
1800
1801                         if (j==count)
1802                                 return pos;
1803                         --pos;
1804                         --i;
1805                 }
1806
1807                 return -1;
1808         }
1809
1810         //! Finds next occurrence of character.
1811         //! \param c The character to search for.
1812         //! \param startPos The position in the string to start searching.
1813         //! \return Position where the character has been found, or -1 if not found.
1814         s32 findNext(uchar32_t c, u32 startPos) const
1815         {
1816                 const_iterator i(*this, startPos);
1817
1818                 s32 pos = startPos;
1819                 while (!i.atEnd())
1820                 {
1821                         uchar32_t t = *i;
1822                         if (t == c)
1823                                 return pos;
1824                         ++pos;
1825                         ++i;
1826                 }
1827
1828                 return -1;
1829         }
1830
1831
1832         //! Finds last occurrence of character.
1833         //! \param c The character to search for.
1834         //! \param start The start position of the reverse search ( default = -1, on end ).
1835         //! \return Position where the character has been found, or -1 if not found.
1836         s32 findLast(uchar32_t c, s32 start = -1) const
1837         {
1838                 u32 s = size();
1839                 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1840
1841                 const_iterator i(*this, start);
1842                 u32 pos = start;
1843                 while (!i.atStart())
1844                 {
1845                         uchar32_t t = *i;
1846                         if (t == c)
1847                                 return pos;
1848                         --pos;
1849                         --i;
1850                 }
1851
1852                 return -1;
1853         }
1854
1855         //! Finds last occurrence of a character in a list.
1856         //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1857         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1858         //! \return Position where one of the characters has been found, or -1 if not found.
1859         s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1860         {
1861                 if (!c || !count)
1862                         return -1;
1863
1864                 const_iterator i(end());
1865                 --i;
1866
1867                 s32 pos = size();
1868                 while (!i.atStart())
1869                 {
1870                         uchar32_t t = *i;
1871                         for (u32 j=0; j<count; ++j)
1872                                 if (t == c[j])
1873                                         return pos;
1874                         --pos;
1875                         --i;
1876                 }
1877
1878                 return -1;
1879         }
1880
1881
1882         //! Finds another ustring16 in this ustring16.
1883         //! \param str The string to find.
1884         //! \param start The start position of the search.
1885         //! \return Positions where the ustring16 has been found, or -1 if not found.
1886         s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1887         {
1888                 u32 my_size = size();
1889                 u32 their_size = str.size();
1890
1891                 if (their_size == 0 || my_size - start < their_size)
1892                         return -1;
1893
1894                 const_iterator i(*this, start);
1895
1896                 s32 pos = start;
1897                 while (!i.atEnd())
1898                 {
1899                         const_iterator i2(i);
1900                         const_iterator j(str, 0);
1901                         uchar32_t t1 = (uchar32_t)*i2;
1902                         uchar32_t t2 = (uchar32_t)*j;
1903                         while (t1 == t2)
1904                         {
1905                                 ++i2;
1906                                 ++j;
1907                                 if (j.atEnd())
1908                                         return pos;
1909                                 t1 = (uchar32_t)*i2;
1910                                 t2 = (uchar32_t)*j;
1911                         }
1912                         ++i;
1913                         ++pos;
1914                 }
1915
1916                 return -1;
1917         }
1918
1919
1920         //! Finds another ustring16 in this ustring16.
1921         //! \param str The string to find.
1922         //! \param start The start position of the search.
1923         //! \return Positions where the string has been found, or -1 if not found.
1924         s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1925         {
1926                 const uchar16_t* data = str.c_str();
1927                 if (data && *data)
1928                 {
1929                         u32 len = 0;
1930
1931                         while (data[len])
1932                                 ++len;
1933
1934                         if (len > used)
1935                                 return -1;
1936
1937                         for (u32 i=start; i<=used-len; ++i)
1938                         {
1939                                 u32 j=0;
1940
1941                                 while(data[j] && array[i+j] == data[j])
1942                                         ++j;
1943
1944                                 if (!data[j])
1945                                         return i;
1946                         }
1947                 }
1948
1949                 return -1;
1950         }
1951
1952
1953         //! Returns a substring.
1954         //! \param begin: Start of substring.
1955         //! \param length: Length of substring.
1956         //! \return A reference to our current string.
1957         ustring16<TAlloc> subString(u32 begin, s32 length) const
1958         {
1959                 u32 len = size();
1960                 // if start after ustring16
1961                 // or no proper substring length
1962                 if ((length <= 0) || (begin>=len))
1963                         return ustring16<TAlloc>("");
1964                 // clamp length to maximal value
1965                 if ((length+begin) > len)
1966                         length = len-begin;
1967
1968                 ustring16<TAlloc> o;
1969                 o.reserve((length+1) * 2);
1970
1971                 const_iterator i(*this, begin);
1972                 while (!i.atEnd() && length)
1973                 {
1974                         o.append(*i);
1975                         ++i;
1976                         --length;
1977                 }
1978
1979                 return o;
1980         }
1981
1982
1983         //! Appends a character to this ustring16.
1984         //! \param c Character to append.
1985         //! \return A reference to our current string.
1986         ustring16<TAlloc>& operator += (char c)
1987         {
1988                 append((uchar32_t)c);
1989                 return *this;
1990         }
1991
1992
1993         //! Appends a character to this ustring16.
1994         //! \param c Character to append.
1995         //! \return A reference to our current string.
1996         ustring16<TAlloc>& operator += (uchar32_t c)
1997         {
1998                 append(c);
1999                 return *this;
2000         }
2001
2002
2003         //! Appends a number to this ustring16.
2004         //! \param c Number to append.
2005         //! \return A reference to our current string.
2006         ustring16<TAlloc>& operator += (short c)
2007         {
2008                 append(core::stringc(c));
2009                 return *this;
2010         }
2011
2012
2013         //! Appends a number to this ustring16.
2014         //! \param c Number to append.
2015         //! \return A reference to our current string.
2016         ustring16<TAlloc>& operator += (unsigned short c)
2017         {
2018                 append(core::stringc(c));
2019                 return *this;
2020         }
2021
2022
2023         //! Appends a number to this ustring16.
2024         //! \param c Number to append.
2025         //! \return A reference to our current string.
2026         ustring16<TAlloc>& operator += (int c)
2027         {
2028                 append(core::stringc(c));
2029                 return *this;
2030         }
2031
2032
2033         //! Appends a number to this ustring16.
2034         //! \param c Number to append.
2035         //! \return A reference to our current string.
2036         ustring16<TAlloc>& operator += (unsigned int c)
2037         {
2038                 append(core::stringc(c));
2039                 return *this;
2040         }
2041
2042
2043         //! Appends a number to this ustring16.
2044         //! \param c Number to append.
2045         //! \return A reference to our current string.
2046         ustring16<TAlloc>& operator += (long c)
2047         {
2048                 append(core::stringc(c));
2049                 return *this;
2050         }
2051
2052
2053         //! Appends a number to this ustring16.
2054         //! \param c Number to append.
2055         //! \return A reference to our current string.
2056         ustring16<TAlloc>& operator += (unsigned long c)
2057         {
2058                 append(core::stringc(c));
2059                 return *this;
2060         }
2061
2062
2063         //! Appends a number to this ustring16.
2064         //! \param c Number to append.
2065         //! \return A reference to our current string.
2066         ustring16<TAlloc>& operator += (double c)
2067         {
2068                 append(core::stringc(c));
2069                 return *this;
2070         }
2071
2072
2073         //! Appends a char ustring16 to this ustring16.
2074         //! \param c Char ustring16 to append.
2075         //! \return A reference to our current string.
2076         ustring16<TAlloc>& operator += (const uchar16_t* const c)
2077         {
2078                 append(c);
2079                 return *this;
2080         }
2081
2082
2083         //! Appends a ustring16 to this ustring16.
2084         //! \param other ustring16 to append.
2085         //! \return A reference to our current string.
2086         ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2087         {
2088                 append(other);
2089                 return *this;
2090         }
2091
2092
2093         //! Replaces all characters of a given type with another one.
2094         //! \param toReplace Character to replace.
2095         //! \param replaceWith Character replacing the old one.
2096         //! \return A reference to our current string.
2097         ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2098         {
2099                 iterator i(*this, 0);
2100                 while (!i.atEnd())
2101                 {
2102                         typename ustring16<TAlloc>::access a = *i;
2103                         if ((uchar32_t)a == toReplace)
2104                                 a = replaceWith;
2105                         ++i;
2106                 }
2107                 return *this;
2108         }
2109
2110
2111         //! Replaces all instances of a string with another one.
2112         //! \param toReplace The string to replace.
2113         //! \param replaceWith The string replacing the old one.
2114         //! \return A reference to our current string.
2115         ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2116         {
2117                 if (toReplace.size() == 0)
2118                         return *this;
2119
2120                 const uchar16_t* other = toReplace.c_str();
2121                 const uchar16_t* replace = replaceWith.c_str();
2122                 const u32 other_size = toReplace.size_raw();
2123                 const u32 replace_size = replaceWith.size_raw();
2124
2125                 // Determine the delta.  The algorithm will change depending on the delta.
2126                 s32 delta = replace_size - other_size;
2127
2128                 // A character for character replace.  The string will not shrink or grow.
2129                 if (delta == 0)
2130                 {
2131                         s32 pos = 0;
2132                         while ((pos = find_raw(other, pos)) != -1)
2133                         {
2134                                 for (u32 i = 0; i < replace_size; ++i)
2135                                         array[pos + i] = replace[i];
2136                                 ++pos;
2137                         }
2138                         return *this;
2139                 }
2140
2141                 // We are going to be removing some characters.  The string will shrink.
2142                 if (delta < 0)
2143                 {
2144                         u32 i = 0;
2145                         for (u32 pos = 0; pos <= used; ++i, ++pos)
2146                         {
2147                                 // Is this potentially a match?
2148                                 if (array[pos] == *other)
2149                                 {
2150                                         // Check to see if we have a match.
2151                                         u32 j;
2152                                         for (j = 0; j < other_size; ++j)
2153                                         {
2154                                                 if (array[pos + j] != other[j])
2155                                                         break;
2156                                         }
2157
2158                                         // If we have a match, replace characters.
2159                                         if (j == other_size)
2160                                         {
2161                                                 for (j = 0; j < replace_size; ++j)
2162                                                         array[i + j] = replace[j];
2163                                                 i += replace_size - 1;
2164                                                 pos += other_size - 1;
2165                                                 continue;
2166                                         }
2167                                 }
2168
2169                                 // No match found, just copy characters.
2170                                 array[i - 1] = array[pos];
2171                         }
2172                         array[i] = 0;
2173                         used = i;
2174
2175                         return *this;
2176                 }
2177
2178                 // We are going to be adding characters, so the string size will increase.
2179                 // Count the number of times toReplace exists in the string so we can allocate the new size.
2180                 u32 find_count = 0;
2181                 s32 pos = 0;
2182                 while ((pos = find_raw(other, pos)) != -1)
2183                 {
2184                         ++find_count;
2185                         ++pos;
2186                 }
2187
2188                 // Re-allocate the string now, if needed.
2189                 u32 len = delta * find_count;
2190                 if (used + len >= allocated)
2191                         reallocate(used + len);
2192
2193                 // Start replacing.
2194                 pos = 0;
2195                 while ((pos = find_raw(other, pos)) != -1)
2196                 {
2197                         uchar16_t* start = array + pos + other_size - 1;
2198                         uchar16_t* ptr   = array + used;
2199                         uchar16_t* end   = array + used + delta;
2200
2201                         // Shift characters to make room for the string.
2202                         while (ptr != start)
2203                         {
2204                                 *end = *ptr;
2205                                 --ptr;
2206                                 --end;
2207                         }
2208
2209                         // Add the new string now.
2210                         for (u32 i = 0; i < replace_size; ++i)
2211                                 array[pos + i] = replace[i];
2212
2213                         pos += replace_size;
2214                         used += delta;
2215                 }
2216
2217                 // Terminate the string and return ourself.
2218                 array[used] = 0;
2219                 return *this;
2220         }
2221
2222
2223         //! Removes characters from a ustring16..
2224         //! \param c The character to remove.
2225         //! \return A reference to our current string.
2226         ustring16<TAlloc>& remove(uchar32_t c)
2227         {
2228                 u32 pos = 0;
2229                 u32 found = 0;
2230                 u32 len = (c > 0xFFFF ? 2 : 1);         // Remove characters equal to the size of c as a UTF-16 character.
2231                 for (u32 i=0; i<=used; ++i)
2232                 {
2233                         uchar32_t uc32 = 0;
2234                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2235                                 uc32 |= array[i];
2236                         else if (i + 1 <= used)
2237                         {
2238                                 // Convert the surrogate pair into a single UTF-32 character.
2239                                 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2240                         }
2241                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2242
2243                         if (uc32 == c)
2244                         {
2245                                 found += len;
2246                                 continue;
2247                         }
2248
2249                         array[pos++] = array[i];
2250                         if (len2 == 2)
2251                                 array[pos++] = array[++i];
2252                 }
2253                 used -= found;
2254                 array[used] = 0;
2255                 return *this;
2256         }
2257
2258
2259         //! Removes a ustring16 from the ustring16.
2260         //! \param toRemove The string to remove.
2261         //! \return A reference to our current string.
2262         ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2263         {
2264                 u32 size = toRemove.size_raw();
2265                 if (size == 0) return *this;
2266
2267                 const uchar16_t* tra = toRemove.c_str();
2268                 u32 pos = 0;
2269                 u32 found = 0;
2270                 for (u32 i=0; i<=used; ++i)
2271                 {
2272                         u32 j = 0;
2273                         while (j < size)
2274                         {
2275                                 if (array[i + j] != tra[j])
2276                                         break;
2277                                 ++j;
2278                         }
2279                         if (j == size)
2280                         {
2281                                 found += size;
2282                                 i += size - 1;
2283                                 continue;
2284                         }
2285
2286                         array[pos++] = array[i];
2287                 }
2288                 used -= found;
2289                 array[used] = 0;
2290                 return *this;
2291         }
2292
2293
2294         //! Removes characters from the ustring16.
2295         //! \param characters The characters to remove.
2296         //! \return A reference to our current string.
2297         ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2298         {
2299                 if (characters.size_raw() == 0)
2300                         return *this;
2301
2302                 u32 pos = 0;
2303                 u32 found = 0;
2304                 const_iterator iter(characters);
2305                 for (u32 i=0; i<=used; ++i)
2306                 {
2307                         uchar32_t uc32 = 0;
2308                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2309                                 uc32 |= array[i];
2310                         else if (i + 1 <= used)
2311                         {
2312                                 // Convert the surrogate pair into a single UTF-32 character.
2313                                 uc32 = unicode::toUTF32(array[i], array[i+1]);
2314                         }
2315                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2316
2317                         bool cont = false;
2318                         iter.toStart();
2319                         while (!iter.atEnd())
2320                         {
2321                                 uchar32_t c = *iter;
2322                                 if (uc32 == c)
2323                                 {
2324                                         found += (c > 0xFFFF ? 2 : 1);          // Remove characters equal to the size of c as a UTF-16 character.
2325                                         ++i;
2326                                         cont = true;
2327                                         break;
2328                                 }
2329                                 ++iter;
2330                         }
2331                         if (cont) continue;
2332
2333                         array[pos++] = array[i];
2334                         if (len2 == 2)
2335                                 array[pos++] = array[++i];
2336                 }
2337                 used -= found;
2338                 array[used] = 0;
2339                 return *this;
2340         }
2341
2342
2343         //! Trims the ustring16.
2344         //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2345         //! \param whitespace The characters that are to be considered as whitespace.
2346         //! \return A reference to our current string.
2347         ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2348         {
2349                 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2350
2351                 // find start and end of the substring without the specified characters
2352                 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2353                 if (begin == -1)
2354                         return (*this="");
2355
2356                 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2357
2358                 return (*this = subString(begin, (end +1) - begin));
2359         }
2360
2361
2362         //! Erases a character from the ustring16.
2363         //! May be slow, because all elements following after the erased element have to be copied.
2364         //! \param index Index of element to be erased.
2365         //! \return A reference to our current string.
2366         ustring16<TAlloc>& erase(u32 index)
2367         {
2368                 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2369
2370                 iterator i(*this, index);
2371
2372                 uchar32_t t = *i;
2373                 u32 len = (t > 0xFFFF ? 2 : 1);
2374
2375                 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2376                         array[j - len] = array[j];
2377
2378                 used -= len;
2379                 array[used] = 0;
2380
2381                 return *this;
2382         }
2383
2384
2385         //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2386         //! \return A reference to our current string.
2387         ustring16<TAlloc>& validate()
2388         {
2389                 // Validate all unicode characters.
2390                 for (u32 i=0; i<allocated; ++i)
2391                 {
2392                         // Terminate on existing null.
2393                         if (array[i] == 0)
2394                         {
2395                                 used = i;
2396                                 return *this;
2397                         }
2398                         if (UTF16_IS_SURROGATE(array[i]))
2399                         {
2400                                 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2401                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2402                                 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2403                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2404                                 ++i;
2405                         }
2406                         if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2407                                 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2408                 }
2409
2410                 // terminate
2411                 used = 0;
2412                 if (allocated > 0)
2413                 {
2414                         used = allocated - 1;
2415                         array[used] = 0;
2416                 }
2417                 return *this;
2418         }
2419
2420
2421         //! Gets the last char of the ustring16, or 0.
2422         //! \return The last char of the ustring16, or 0.
2423         uchar32_t lastChar() const
2424         {
2425                 if (used < 1)
2426                         return 0;
2427
2428                 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2429                 {
2430                         // Make sure we have a paired surrogate.
2431                         if (used < 2)
2432                                 return 0;
2433
2434                         // Check for an invalid surrogate.
2435                         if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2436                                 return 0;
2437
2438                         // Convert the surrogate pair into a single UTF-32 character.
2439                         return unicode::toUTF32(array[used-2], array[used-1]);
2440                 }
2441                 else
2442                 {
2443                         return array[used-1];
2444                 }
2445         }
2446
2447
2448         //! Split the ustring16 into parts.
2449         /** This method will split a ustring16 at certain delimiter characters
2450         into the container passed in as reference. The type of the container
2451         has to be given as template parameter. It must provide a push_back and
2452         a size method.
2453         \param ret The result container
2454         \param c C-style ustring16 of delimiter characters
2455         \param count Number of delimiter characters
2456         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2457         container. If two delimiters occur without a character in between, an
2458         empty substring would be placed in the result. If this flag is set,
2459         only non-empty strings are stored.
2460         \param keepSeparators Flag which allows to add the separator to the
2461         result ustring16. If this flag is true, the concatenation of the
2462         substrings results in the original ustring16. Otherwise, only the
2463         characters between the delimiters are returned.
2464         \return The number of resulting substrings
2465         */
2466         template<class container>
2467         u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2468         {
2469                 if (!c)
2470                         return 0;
2471
2472                 const_iterator i(*this);
2473                 const u32 oldSize=ret.size();
2474                 u32 pos = 0;
2475                 u32 lastpos = 0;
2476                 u32 lastpospos = 0;
2477                 bool lastWasSeparator = false;
2478                 while (!i.atEnd())
2479                 {
2480                         uchar32_t ch = *i;
2481                         bool foundSeparator = false;
2482                         for (u32 j=0; j<count; ++j)
2483                         {
2484                                 if (ch == c[j])
2485                                 {
2486                                         if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2487                                                         !lastWasSeparator)
2488                                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2489                                         foundSeparator = true;
2490                                         lastpos = (keepSeparators ? pos : pos + 1);
2491                                         lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2492                                         break;
2493                                 }
2494                         }
2495                         lastWasSeparator = foundSeparator;
2496                         ++pos;
2497                         ++i;
2498                 }
2499                 u32 s = size() + 1;
2500                 if (s > lastpos)
2501                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2502                 return ret.size()-oldSize;
2503         }
2504
2505
2506         //! Split the ustring16 into parts.
2507         /** This method will split a ustring16 at certain delimiter characters
2508         into the container passed in as reference. The type of the container
2509         has to be given as template parameter. It must provide a push_back and
2510         a size method.
2511         \param ret The result container
2512         \param c A unicode string of delimiter characters
2513         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2514         container. If two delimiters occur without a character in between, an
2515         empty substring would be placed in the result. If this flag is set,
2516         only non-empty strings are stored.
2517         \param keepSeparators Flag which allows to add the separator to the
2518         result ustring16. If this flag is true, the concatenation of the
2519         substrings results in the original ustring16. Otherwise, only the
2520         characters between the delimiters are returned.
2521         \return The number of resulting substrings
2522         */
2523         template<class container>
2524         u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2525         {
2526                 core::array<uchar32_t> v = c.toUTF32();
2527                 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2528         }
2529
2530
2531         //! Gets the size of the allocated memory buffer for the string.
2532         //! \return The size of the allocated memory buffer.
2533         u32 capacity() const
2534         {
2535                 return allocated;
2536         }
2537
2538
2539         //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2540         //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2541         u32 size_raw() const
2542         {
2543                 return used;
2544         }
2545
2546
2547         //! Inserts a character into the string.
2548         //! \param c The character to insert.
2549         //! \param pos The position to insert the character.
2550         //! \return A reference to our current string.
2551         ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2552         {
2553                 u8 len = (c > 0xFFFF ? 2 : 1);
2554
2555                 if (used + len >= allocated)
2556                         reallocate(used + len);
2557
2558                 used += len;
2559
2560                 iterator iter(*this, pos);
2561                 for (u32 i = used - 2; i > iter.getPos(); --i)
2562                         array[i] = array[i - len];
2563
2564                 if (c > 0xFFFF)
2565                 {
2566                         // c will be multibyte, so split it up into a surrogate pair.
2567                         uchar16_t x = static_cast<uchar16_t>(c);
2568                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2569                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2570                         array[iter.getPos()] = vh;
2571                         array[iter.getPos()+1] = vl;
2572                 }
2573                 else
2574                 {
2575                         array[iter.getPos()] = static_cast<uchar16_t>(c);
2576                 }
2577                 array[used] = 0;
2578                 return *this;
2579         }
2580
2581
2582         //! Inserts a string into the string.
2583         //! \param c The string to insert.
2584         //! \param pos The position to insert the string.
2585         //! \return A reference to our current string.
2586         ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2587         {
2588                 u32 len = c.size_raw();
2589                 if (len == 0) return *this;
2590
2591                 if (used + len >= allocated)
2592                         reallocate(used + len);
2593
2594                 used += len;
2595
2596                 iterator iter(*this, pos);
2597                 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2598                         array[i] = array[i - len];
2599
2600                 const uchar16_t* s = c.c_str();
2601                 for (u32 i = 0; i < len; ++i)
2602                 {
2603                         array[pos++] = *s;
2604                         ++s;
2605                 }
2606
2607                 array[used] = 0;
2608                 return *this;
2609         }
2610
2611
2612         //! Inserts a character into the string.
2613         //! \param c The character to insert.
2614         //! \param pos The position to insert the character.
2615         //! \return A reference to our current string.
2616         ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2617         {
2618                 if (used + 1 >= allocated)
2619                         reallocate(used + 1);
2620
2621                 ++used;
2622
2623                 for (u32 i = used - 1; i > pos; --i)
2624                         array[i] = array[i - 1];
2625
2626                 array[pos] = c;
2627                 array[used] = 0;
2628                 return *this;
2629         }
2630
2631
2632         //! Removes a character from string.
2633         //! \param pos Position of the character to remove.
2634         //! \return A reference to our current string.
2635         ustring16<TAlloc>& erase_raw(u32 pos)
2636         {
2637                 for (u32 i=pos; i<=used; ++i)
2638                 {
2639                         array[i] = array[i + 1];
2640                 }
2641                 --used;
2642                 array[used] = 0;
2643                 return *this;
2644         }
2645
2646
2647         //! Replaces a character in the string.
2648         //! \param c The new character.
2649         //! \param pos The position of the character to replace.
2650         //! \return A reference to our current string.
2651         ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2652         {
2653                 array[pos] = c;
2654                 return *this;
2655         }
2656
2657
2658         //! Returns an iterator to the beginning of the string.
2659         //! \return An iterator to the beginning of the string.
2660         iterator begin()
2661         {
2662                 iterator i(*this, 0);
2663                 return i;
2664         }
2665
2666
2667         //! Returns an iterator to the beginning of the string.
2668         //! \return An iterator to the beginning of the string.
2669         const_iterator begin() const
2670         {
2671                 const_iterator i(*this, 0);
2672                 return i;
2673         }
2674
2675
2676         //! Returns an iterator to the beginning of the string.
2677         //! \return An iterator to the beginning of the string.
2678         const_iterator cbegin() const
2679         {
2680                 const_iterator i(*this, 0);
2681                 return i;
2682         }
2683
2684
2685         //! Returns an iterator to the end of the string.
2686         //! \return An iterator to the end of the string.
2687         iterator end()
2688         {
2689                 iterator i(*this, 0);
2690                 i.toEnd();
2691                 return i;
2692         }
2693
2694
2695         //! Returns an iterator to the end of the string.
2696         //! \return An iterator to the end of the string.
2697         const_iterator end() const
2698         {
2699                 const_iterator i(*this, 0);
2700                 i.toEnd();
2701                 return i;
2702         }
2703
2704
2705         //! Returns an iterator to the end of the string.
2706         //! \return An iterator to the end of the string.
2707         const_iterator cend() const
2708         {
2709                 const_iterator i(*this, 0);
2710                 i.toEnd();
2711                 return i;
2712         }
2713
2714
2715         //! Converts the string to a UTF-8 encoded string.
2716         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2717         //! \return A string containing the UTF-8 encoded string.
2718         core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2719         {
2720                 core::string<uchar8_t> ret;
2721                 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2722                 const_iterator iter(*this, 0);
2723
2724                 // Add the byte order mark if the user wants it.
2725                 if (addBOM)
2726                 {
2727                         ret.append(unicode::BOM_ENCODE_UTF8[0]);
2728                         ret.append(unicode::BOM_ENCODE_UTF8[1]);
2729                         ret.append(unicode::BOM_ENCODE_UTF8[2]);
2730                 }
2731
2732                 while (!iter.atEnd())
2733                 {
2734                         uchar32_t c = *iter;
2735                         if (c > 0xFFFF)
2736                         {       // 4 bytes
2737                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2738                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2739                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2740                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2741                                 ret.append(b1);
2742                                 ret.append(b2);
2743                                 ret.append(b3);
2744                                 ret.append(b4);
2745                         }
2746                         else if (c > 0x7FF)
2747                         {       // 3 bytes
2748                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2749                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2750                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2751                                 ret.append(b1);
2752                                 ret.append(b2);
2753                                 ret.append(b3);
2754                         }
2755                         else if (c > 0x7F)
2756                         {       // 2 bytes
2757                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2758                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2759                                 ret.append(b1);
2760                                 ret.append(b2);
2761                         }
2762                         else
2763                         {       // 1 byte
2764                                 ret.append(static_cast<uchar8_t>(c));
2765                         }
2766                         ++iter;
2767                 }
2768                 return ret;
2769         }
2770
2771
2772         //! Converts the string to a UTF-8 encoded string array.
2773         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2774         //! \return An array containing the UTF-8 encoded string.
2775         core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2776         {
2777                 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2778                 const_iterator iter(*this, 0);
2779
2780                 // Add the byte order mark if the user wants it.
2781                 if (addBOM)
2782                 {
2783                         ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2784                         ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2785                         ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2786                 }
2787
2788                 while (!iter.atEnd())
2789                 {
2790                         uchar32_t c = *iter;
2791                         if (c > 0xFFFF)
2792                         {       // 4 bytes
2793                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2794                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2795                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2796                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2797                                 ret.push_back(b1);
2798                                 ret.push_back(b2);
2799                                 ret.push_back(b3);
2800                                 ret.push_back(b4);
2801                         }
2802                         else if (c > 0x7FF)
2803                         {       // 3 bytes
2804                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2805                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2806                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2807                                 ret.push_back(b1);
2808                                 ret.push_back(b2);
2809                                 ret.push_back(b3);
2810                         }
2811                         else if (c > 0x7F)
2812                         {       // 2 bytes
2813                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2814                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2815                                 ret.push_back(b1);
2816                                 ret.push_back(b2);
2817                         }
2818                         else
2819                         {       // 1 byte
2820                                 ret.push_back(static_cast<uchar8_t>(c));
2821                         }
2822                         ++iter;
2823                 }
2824                 ret.push_back(0);
2825                 return ret;
2826         }
2827
2828
2829         //! Converts the string to a UTF-16 encoded string array.
2830         //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2831         //! \param endian The desired endianness of the string.
2832         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2833         //! \return An array containing the UTF-16 encoded string.
2834         core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2835         {
2836                 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2837                 uchar16_t* ptr = ret.pointer();
2838
2839                 // Add the BOM if specified.
2840                 if (addBOM)
2841                 {
2842                         if (endian == unicode::EUTFEE_NATIVE)
2843                                 *ptr = unicode::BOM;
2844                         else if (endian == unicode::EUTFEE_LITTLE)
2845                         {
2846                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2847                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2848                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2849                         }
2850                         else
2851                         {
2852                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2853                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2854                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2855                         }
2856                         ++ptr;
2857                 }
2858
2859                 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2860                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2861                 {
2862                         for (u32 i = 0; i <= used; ++i)
2863                                 ptr[i] = unicode::swapEndian16(ptr[i]);
2864                 }
2865                 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2866                 ret.push_back(0);
2867                 return ret;
2868         }
2869
2870
2871         //! Converts the string to a UTF-32 encoded string array.
2872         //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
2873         //! \param endian The desired endianness of the string.
2874         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2875         //! \return An array containing the UTF-32 encoded string.
2876         core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2877         {
2878                 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
2879                 const_iterator iter(*this, 0);
2880
2881                 // Add the BOM if specified.
2882                 if (addBOM)
2883                 {
2884                         if (endian == unicode::EUTFEE_NATIVE)
2885                                 ret.push_back(unicode::BOM);
2886                         else
2887                         {
2888                                 union
2889                                 {
2890                                         uchar32_t full;
2891                                         u8 chunk[4];
2892                                 } t;
2893
2894                                 if (endian == unicode::EUTFEE_LITTLE)
2895                                 {
2896                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
2897                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
2898                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
2899                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
2900                                 }
2901                                 else
2902                                 {
2903                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
2904                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
2905                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
2906                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
2907                                 }
2908                                 ret.push_back(t.full);
2909                         }
2910                 }
2911                 ret.push_back(0);
2912
2913                 while (!iter.atEnd())
2914                 {
2915                         uchar32_t c = *iter;
2916                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2917                                 c = unicode::swapEndian32(c);
2918                         ret.push_back(c);
2919                         ++iter;
2920                 }
2921                 return ret;
2922         }
2923
2924
2925         //! Converts the string to a wchar_t encoded string.
2926         /** The size of a wchar_t changes depending on the platform.  This function will store a
2927         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
2928         //! \param endian The desired endianness of the string.
2929         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2930         //! \return A string containing the wchar_t encoded string.
2931         core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2932         {
2933                 if (sizeof(wchar_t) == 4)
2934                 {
2935                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
2936                         core::stringw ret(a.pointer());
2937                         return ret;
2938                 }
2939                 else if (sizeof(wchar_t) == 2)
2940                 {
2941                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
2942                         {
2943                                 core::stringw ret(array);
2944                                 return ret;
2945                         }
2946                         else
2947                         {
2948                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
2949                                 core::stringw ret(a.pointer());
2950                                 return ret;
2951                         }
2952                 }
2953                 else if (sizeof(wchar_t) == 1)
2954                 {
2955                         core::array<uchar8_t> a(toUTF8(addBOM));
2956                         core::stringw ret(a.pointer());
2957                         return ret;
2958                 }
2959
2960                 // Shouldn't happen.
2961                 return core::stringw();
2962         }
2963
2964
2965         //! Converts the string to a wchar_t encoded string array.
2966         /** The size of a wchar_t changes depending on the platform.  This function will store a
2967         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
2968         //! \param endian The desired endianness of the string.
2969         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2970         //! \return An array containing the wchar_t encoded string.
2971         core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2972         {
2973                 if (sizeof(wchar_t) == 4)
2974                 {
2975                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
2976                         core::array<wchar_t> ret(a.size());
2977                         ret.set_used(a.size());
2978                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
2979                         return ret;
2980                 }
2981                 if (sizeof(wchar_t) == 2)
2982                 {
2983                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
2984                         {
2985                                 core::array<wchar_t> ret(used);
2986                                 ret.set_used(used);
2987                                 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
2988                                 return ret;
2989                         }
2990                         else
2991                         {
2992                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
2993                                 core::array<wchar_t> ret(a.size());
2994                                 ret.set_used(a.size());
2995                                 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
2996                                 return ret;
2997                         }
2998                 }
2999                 if (sizeof(wchar_t) == 1)
3000                 {
3001                         core::array<uchar8_t> a(toUTF8(addBOM));
3002                         core::array<wchar_t> ret(a.size());
3003                         ret.set_used(a.size());
3004                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3005                         return ret;
3006                 }
3007
3008                 // Shouldn't happen.
3009                 return core::array<wchar_t>();
3010         }
3011
3012         //! Converts the string to a properly encoded io::path string.
3013         //! \param endian The desired endianness of the string.
3014         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3015         //! \return An io::path string containing the properly encoded string.
3016         io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3017         {
3018                 return toUTF8_s(addBOM);
3019         }
3020
3021         //! Loads an unknown stream of data.
3022         //! Will attempt to determine if the stream is unicode data.  Useful for loading from files.
3023         //! \param data The data stream to load from.
3024         //! \param data_size The length of the data string.
3025         //! \return A reference to our current string.
3026         ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3027         {
3028                 // Clear our string.
3029                 *this = "";
3030                 if (!data)
3031                         return *this;
3032
3033                 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3034                 switch (e)
3035                 {
3036                         default:
3037                         case unicode::EUTFE_UTF8:
3038                                 append((uchar8_t*)data, data_size);
3039                                 break;
3040
3041                         case unicode::EUTFE_UTF16:
3042                         case unicode::EUTFE_UTF16_BE:
3043                         case unicode::EUTFE_UTF16_LE:
3044                                 append((uchar16_t*)data, data_size / 2);
3045                                 break;
3046
3047                         case unicode::EUTFE_UTF32:
3048                         case unicode::EUTFE_UTF32_BE:
3049                         case unicode::EUTFE_UTF32_LE:
3050                                 append((uchar32_t*)data, data_size / 4);
3051                                 break;
3052                 }
3053
3054                 return *this;
3055         }
3056
3057         //! Gets the encoding of the Unicode string this class contains.
3058         //! \return An enum describing the current encoding of this string.
3059         const unicode::EUTF_ENCODE getEncoding() const
3060         {
3061                 return encoding;
3062         }
3063
3064         //! Gets the endianness of the Unicode string this class contains.
3065         //! \return An enum describing the endianness of this string.
3066         const unicode::EUTF_ENDIAN getEndianness() const
3067         {
3068                 if (encoding == unicode::EUTFE_UTF16_LE ||
3069                         encoding == unicode::EUTFE_UTF32_LE)
3070                         return unicode::EUTFEE_LITTLE;
3071                 else return unicode::EUTFEE_BIG;
3072         }
3073
3074 private:
3075
3076         //! Reallocate the string, making it bigger or smaller.
3077         //! \param new_size The new size of the string.
3078         void reallocate(u32 new_size)
3079         {
3080                 uchar16_t* old_array = array;
3081
3082                 array = allocator.allocate(new_size + 1); //new u16[new_size];
3083                 allocated = new_size + 1;
3084                 if (old_array == 0) return;
3085
3086                 u32 amount = used < new_size ? used : new_size;
3087                 for (u32 i=0; i<=amount; ++i)
3088                         array[i] = old_array[i];
3089
3090                 if (allocated <= used)
3091                         used = allocated - 1;
3092
3093                 array[used] = 0;
3094
3095                 allocator.deallocate(old_array); // delete [] old_array;
3096         }
3097
3098         //--- member variables
3099
3100         uchar16_t* array;
3101         unicode::EUTF_ENCODE encoding;
3102         u32 allocated;
3103         u32 used;
3104         TAlloc allocator;
3105         //irrAllocator<uchar16_t> allocator;
3106 };
3107
3108 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3109
3110
3111 //! Appends two ustring16s.
3112 template <typename TAlloc>
3113 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3114 {
3115         ustring16<TAlloc> ret(left);
3116         ret += right;
3117         return ret;
3118 }
3119
3120
3121 //! Appends a ustring16 and a null-terminated unicode string.
3122 template <typename TAlloc, class B>
3123 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3124 {
3125         ustring16<TAlloc> ret(left);
3126         ret += right;
3127         return ret;
3128 }
3129
3130
3131 //! Appends a ustring16 and a null-terminated unicode string.
3132 template <class B, typename TAlloc>
3133 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3134 {
3135         ustring16<TAlloc> ret(left);
3136         ret += right;
3137         return ret;
3138 }
3139
3140
3141 //! Appends a ustring16 and an Irrlicht string.
3142 template <typename TAlloc, typename B>
3143 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B>& right)
3144 {
3145         ustring16<TAlloc> ret(left);
3146         ret += right;
3147         return ret;
3148 }
3149
3150
3151 //! Appends a ustring16 and an Irrlicht string.
3152 template <typename TAlloc, typename B>
3153 inline ustring16<TAlloc> operator+(const string<B>& left, const ustring16<TAlloc>& right)
3154 {
3155         ustring16<TAlloc> ret(left);
3156         ret += right;
3157         return ret;
3158 }
3159
3160
3161 //! Appends a ustring16 and a std::basic_string.
3162 template <typename TAlloc, typename B, typename A, typename BAlloc>
3163 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3164 {
3165         ustring16<TAlloc> ret(left);
3166         ret += right;
3167         return ret;
3168 }
3169
3170
3171 //! Appends a ustring16 and a std::basic_string.
3172 template <typename TAlloc, typename B, typename A, typename BAlloc>
3173 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3174 {
3175         ustring16<TAlloc> ret(left);
3176         ret += right;
3177         return ret;
3178 }
3179
3180
3181 //! Appends a ustring16 and a char.
3182 template <typename TAlloc>
3183 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3184 {
3185         ustring16<TAlloc> ret(left);
3186         ret += right;
3187         return ret;
3188 }
3189
3190
3191 //! Appends a ustring16 and a char.
3192 template <typename TAlloc>
3193 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3194 {
3195         ustring16<TAlloc> ret(left);
3196         ret += right;
3197         return ret;
3198 }
3199
3200
3201 //! Appends a ustring16 and a uchar32_t.
3202 template <typename TAlloc>
3203 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3204 {
3205         ustring16<TAlloc> ret(left);
3206         ret += right;
3207         return ret;
3208 }
3209
3210
3211 //! Appends a ustring16 and a uchar32_t.
3212 template <typename TAlloc>
3213 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3214 {
3215         ustring16<TAlloc> ret(left);
3216         ret += right;
3217         return ret;
3218 }
3219
3220
3221 //! Appends a ustring16 and a short.
3222 template <typename TAlloc>
3223 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3224 {
3225         ustring16<TAlloc> ret(left);
3226         ret += core::stringc(right);
3227         return ret;
3228 }
3229
3230
3231 //! Appends a ustring16 and a short.
3232 template <typename TAlloc>
3233 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3234 {
3235         ustring16<TAlloc> ret((core::stringc(left)));
3236         ret += right;
3237         return ret;
3238 }
3239
3240
3241 //! Appends a ustring16 and an unsigned short.
3242 template <typename TAlloc>
3243 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3244 {
3245         ustring16<TAlloc> ret(left);
3246         ret += core::stringc(right);
3247         return ret;
3248 }
3249
3250
3251 //! Appends a ustring16 and an unsigned short.
3252 template <typename TAlloc>
3253 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3254 {
3255         ustring16<TAlloc> ret((core::stringc(left)));
3256         ret += right;
3257         return ret;
3258 }
3259
3260
3261 //! Appends a ustring16 and an int.
3262 template <typename TAlloc>
3263 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3264 {
3265         ustring16<TAlloc> ret(left);
3266         ret += core::stringc(right);
3267         return ret;
3268 }
3269
3270
3271 //! Appends a ustring16 and an int.
3272 template <typename TAlloc>
3273 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3274 {
3275         ustring16<TAlloc> ret((core::stringc(left)));
3276         ret += right;
3277         return ret;
3278 }
3279
3280
3281 //! Appends a ustring16 and an unsigned int.
3282 template <typename TAlloc>
3283 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3284 {
3285         ustring16<TAlloc> ret(left);
3286         ret += core::stringc(right);
3287         return ret;
3288 }
3289
3290
3291 //! Appends a ustring16 and an unsigned int.
3292 template <typename TAlloc>
3293 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3294 {
3295         ustring16<TAlloc> ret((core::stringc(left)));
3296         ret += right;
3297         return ret;
3298 }
3299
3300
3301 //! Appends a ustring16 and a long.
3302 template <typename TAlloc>
3303 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3304 {
3305         ustring16<TAlloc> ret(left);
3306         ret += core::stringc(right);
3307         return ret;
3308 }
3309
3310
3311 //! Appends a ustring16 and a long.
3312 template <typename TAlloc>
3313 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3314 {
3315         ustring16<TAlloc> ret((core::stringc(left)));
3316         ret += right;
3317         return ret;
3318 }
3319
3320
3321 //! Appends a ustring16 and an unsigned long.
3322 template <typename TAlloc>
3323 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3324 {
3325         ustring16<TAlloc> ret(left);
3326         ret += core::stringc(right);
3327         return ret;
3328 }
3329
3330
3331 //! Appends a ustring16 and an unsigned long.
3332 template <typename TAlloc>
3333 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3334 {
3335         ustring16<TAlloc> ret((core::stringc(left)));
3336         ret += right;
3337         return ret;
3338 }
3339
3340
3341 //! Appends a ustring16 and a float.
3342 template <typename TAlloc>
3343 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3344 {
3345         ustring16<TAlloc> ret(left);
3346         ret += core::stringc(right);
3347         return ret;
3348 }
3349
3350
3351 //! Appends a ustring16 and a float.
3352 template <typename TAlloc>
3353 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3354 {
3355         ustring16<TAlloc> ret((core::stringc(left)));
3356         ret += right;
3357         return ret;
3358 }
3359
3360
3361 //! Appends a ustring16 and a double.
3362 template <typename TAlloc>
3363 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3364 {
3365         ustring16<TAlloc> ret(left);
3366         ret += core::stringc(right);
3367         return ret;
3368 }
3369
3370
3371 //! Appends a ustring16 and a double.
3372 template <typename TAlloc>
3373 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3374 {
3375         ustring16<TAlloc> ret((core::stringc(left)));
3376         ret += right;
3377         return ret;
3378 }
3379
3380
3381 //! Appends two ustring16s.
3382 template <typename TAlloc>
3383 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3384 {
3385         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3386         right.insert(left, 0);
3387         return std::move(right);
3388 }
3389
3390
3391 //! Appends two ustring16s.
3392 template <typename TAlloc>
3393 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3394 {
3395         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3396         left.append(right);
3397         return std::move(left);
3398 }
3399
3400
3401 //! Appends two ustring16s.
3402 template <typename TAlloc>
3403 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3404 {
3405         //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3406         if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3407                 (right.capacity() - right.size_raw() < left.size_raw()))
3408         {
3409                 left.append(right);
3410                 return std::move(left);
3411         }
3412         else
3413         {
3414                 right.insert(left, 0);
3415                 return std::move(right);
3416         }
3417 }
3418
3419
3420 //! Appends a ustring16 and a null-terminated unicode string.
3421 template <typename TAlloc, class B>
3422 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3423 {
3424         //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3425         left.append(right);
3426         return std::move(left);
3427 }
3428
3429
3430 //! Appends a ustring16 and a null-terminated unicode string.
3431 template <class B, typename TAlloc>
3432 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3433 {
3434         //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3435         right.insert(left, 0);
3436         return std::move(right);
3437 }
3438
3439
3440 //! Appends a ustring16 and an Irrlicht string.
3441 template <typename TAlloc, typename B>
3442 inline ustring16<TAlloc>&& operator+(const string<B>& left, ustring16<TAlloc>&& right)
3443 {
3444         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3445         right.insert(left, 0);
3446         return std::move(right);
3447 }
3448
3449
3450 //! Appends a ustring16 and an Irrlicht string.
3451 template <typename TAlloc, typename B>
3452 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B>& right)
3453 {
3454         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3455         left.append(right);
3456         return std::move(left);
3457 }
3458
3459
3460 //! Appends a ustring16 and a std::basic_string.
3461 template <typename TAlloc, typename B, typename A, typename BAlloc>
3462 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3463 {
3464         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3465         right.insert(core::ustring16<TAlloc>(left), 0);
3466         return std::move(right);
3467 }
3468
3469
3470 //! Appends a ustring16 and a std::basic_string.
3471 template <typename TAlloc, typename B, typename A, typename BAlloc>
3472 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3473 {
3474         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3475         left.append(right);
3476         return std::move(left);
3477 }
3478
3479
3480 //! Appends a ustring16 and a char.
3481 template <typename TAlloc>
3482 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3483 {
3484         left.append((uchar32_t)right);
3485         return std::move(left);
3486 }
3487
3488
3489 //! Appends a ustring16 and a char.
3490 template <typename TAlloc>
3491 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3492 {
3493         right.insert((uchar32_t)left, 0);
3494         return std::move(right);
3495 }
3496
3497
3498 //! Appends a ustring16 and a uchar32_t.
3499 template <typename TAlloc>
3500 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3501 {
3502         left.append(right);
3503         return std::move(left);
3504 }
3505
3506
3507 //! Appends a ustring16 and a uchar32_t.
3508 template <typename TAlloc>
3509 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3510 {
3511         right.insert(left, 0);
3512         return std::move(right);
3513 }
3514
3515
3516 //! Appends a ustring16 and a short.
3517 template <typename TAlloc>
3518 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3519 {
3520         left.append(core::stringc(right));
3521         return std::move(left);
3522 }
3523
3524
3525 //! Appends a ustring16 and a short.
3526 template <typename TAlloc>
3527 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3528 {
3529         right.insert(core::stringc(left), 0);
3530         return std::move(right);
3531 }
3532
3533
3534 //! Appends a ustring16 and an unsigned short.
3535 template <typename TAlloc>
3536 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3537 {
3538         left.append(core::stringc(right));
3539         return std::move(left);
3540 }
3541
3542
3543 //! Appends a ustring16 and an unsigned short.
3544 template <typename TAlloc>
3545 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3546 {
3547         right.insert(core::stringc(left), 0);
3548         return std::move(right);
3549 }
3550
3551
3552 //! Appends a ustring16 and an int.
3553 template <typename TAlloc>
3554 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3555 {
3556         left.append(core::stringc(right));
3557         return std::move(left);
3558 }
3559
3560
3561 //! Appends a ustring16 and an int.
3562 template <typename TAlloc>
3563 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3564 {
3565         right.insert(core::stringc(left), 0);
3566         return std::move(right);
3567 }
3568
3569
3570 //! Appends a ustring16 and an unsigned int.
3571 template <typename TAlloc>
3572 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3573 {
3574         left.append(core::stringc(right));
3575         return std::move(left);
3576 }
3577
3578
3579 //! Appends a ustring16 and an unsigned int.
3580 template <typename TAlloc>
3581 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3582 {
3583         right.insert(core::stringc(left), 0);
3584         return std::move(right);
3585 }
3586
3587
3588 //! Appends a ustring16 and a long.
3589 template <typename TAlloc>
3590 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3591 {
3592         left.append(core::stringc(right));
3593         return std::move(left);
3594 }
3595
3596
3597 //! Appends a ustring16 and a long.
3598 template <typename TAlloc>
3599 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3600 {
3601         right.insert(core::stringc(left), 0);
3602         return std::move(right);
3603 }
3604
3605
3606 //! Appends a ustring16 and an unsigned long.
3607 template <typename TAlloc>
3608 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3609 {
3610         left.append(core::stringc(right));
3611         return std::move(left);
3612 }
3613
3614
3615 //! Appends a ustring16 and an unsigned long.
3616 template <typename TAlloc>
3617 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3618 {
3619         right.insert(core::stringc(left), 0);
3620         return std::move(right);
3621 }
3622
3623
3624 //! Appends a ustring16 and a float.
3625 template <typename TAlloc>
3626 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3627 {
3628         left.append(core::stringc(right));
3629         return std::move(left);
3630 }
3631
3632
3633 //! Appends a ustring16 and a float.
3634 template <typename TAlloc>
3635 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3636 {
3637         right.insert(core::stringc(left), 0);
3638         return std::move(right);
3639 }
3640
3641
3642 //! Appends a ustring16 and a double.
3643 template <typename TAlloc>
3644 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3645 {
3646         left.append(core::stringc(right));
3647         return std::move(left);
3648 }
3649
3650
3651 //! Appends a ustring16 and a double.
3652 template <typename TAlloc>
3653 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3654 {
3655         right.insert(core::stringc(left), 0);
3656         return std::move(right);
3657 }
3658
3659
3660 //! Writes a ustring16 to an ostream.
3661 template <typename TAlloc>
3662 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3663 {
3664         out << in.toUTF8_s().c_str();
3665         return out;
3666 }
3667
3668 //! Writes a ustring16 to a wostream.
3669 template <typename TAlloc>
3670 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3671 {
3672         out << in.toWCHAR_s().c_str();
3673         return out;
3674 }
3675
3676 namespace unicode
3677 {
3678
3679 //! Hashing algorithm for hashing a ustring.  Used for things like unordered_maps.
3680 //! Algorithm taken from std::hash<std::string>.
3681 class hash : public std::unary_function<core::ustring, size_t>
3682 {
3683         public:
3684                 size_t operator()(const core::ustring& s) const
3685                 {
3686                         size_t ret = 2166136261U;
3687                         size_t index = 0;
3688                         size_t stride = 1 + s.size_raw() / 10;
3689
3690                         core::ustring::const_iterator i = s.begin();
3691                         while (i != s.end())
3692                         {
3693                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
3694                                 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3695                                 index += stride;
3696                                 i += stride;
3697                         }
3698                         return (ret);
3699                 }
3700 };
3701
3702 } // end namespace unicode
3703
3704 } // end namespace core
3705 } // end namespace irr