src/irrlicht_changes/irrUString.h

   1 /*
   2    Basic Unicode string class for Irrlicht.
   3    Copyright (c) 2009-2011 John Norman
   4
   5    This software is provided 'as-is', without any express or implied
   6    warranty. In no event will the authors be held liable for any
   7    damages arising from the use of this software.
   8
   9    Permission is granted to anyone to use this software for any
  10    purpose, including commercial applications, and to alter it and
  11    redistribute it freely, subject to the following restrictions:
  12
  13    1. The origin of this software must not be misrepresented; you
  14       must not claim that you wrote the original software. If you use
  15       this software in a product, an acknowledgment in the product
  16       documentation would be appreciated but is not required.
  17
  18    2. Altered source versions must be plainly marked as such, and
  19       must not be misrepresented as being the original software.
  20
  21    3. This notice may not be removed or altered from any source
  22       distribution.
  23
  24    The original version of this class can be located at:
  25    http://irrlicht.suckerfreegames.com/
  26
  27    John Norman
  28    john@suckerfreegames.com
  29 */
  30
  31 #pragma once
  32
  33 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
  34 #       define USTRING_CPP0X
  35 #       if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
  36 #               define USTRING_CPP0X_NEWLITERALS
  37 #       endif
  38 #endif
  39
  40 #include <stdio.h>
  41 #include <string.h>
  42 #include <stdlib.h>
  43 #include <cstddef>
  44
  45 #ifdef _WIN32
  46 #define __BYTE_ORDER 0
  47 #define __LITTLE_ENDIAN 0
  48 #define __BIG_ENDIAN 1
  49 #elif defined(__MACH__) && defined(__APPLE__)
  50 #include <machine/endian.h>
  51 #elif defined(__FreeBSD__)
  52 #include <sys/endian.h>
  53 #else
  54 #include <endian.h>
  55 #endif
  56
  57 #ifdef USTRING_CPP0X
  58 #       include <utility>
  59 #endif
  60
  61 #ifndef USTRING_NO_STL
  62 #       include <string>
  63 #       include <iterator>
  64 #       include <ostream>
  65 #endif
  66
  67 #include "irrTypes.h"
  68 #include "irrAllocator.h"
  69 #include "irrArray.h"
  70 #include "irrMath.h"
  71 #include "irrString.h"
  72 #include "path.h"
  73
  74 //! UTF-16 surrogate start values.
  75 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
  76 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
  77
  78 //! Is a UTF-16 code point a surrogate?
  79 #define UTF16_IS_SURROGATE(c)           (((c) & 0xF800) == 0xD800)
  80 #define UTF16_IS_SURROGATE_HI(c)        (((c) & 0xFC00) == 0xD800)
  81 #define UTF16_IS_SURROGATE_LO(c)        (((c) & 0xFC00) == 0xDC00)
  82
  83
  84 namespace irr
  85 {
  86
  87         // Define our character types.
  88 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
  89         typedef char32_t uchar32_t;
  90         typedef char16_t uchar16_t;
  91         typedef char uchar8_t;
  92 #else
  93         typedef u32 uchar32_t;
  94         typedef u16 uchar16_t;
  95         typedef u8 uchar8_t;
  96 #endif
  97
  98 namespace core
  99 {
 100
 101 namespace unicode
 102 {
 103
 104 //! The unicode replacement character.  Used to replace invalid characters.
 105 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
 106
 107 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
 108 //! \param high The high value of the pair.
 109 //! \param low The low value of the pair.
 110 //! \return The UTF-32 character expressed by the surrogate pair.
 111 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
 112 {
 113         // Convert the surrogate pair into a single UTF-32 character.
 114         uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
 115         uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
 116         return (wu << 16) | x;
 117 }
 118
 119 //! Swaps the endianness of a 16-bit value.
 120 //! \return The new value.
 121 inline uchar16_t swapEndian16(const uchar16_t& c)
 122 {
 123         return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
 124 }
 125
 126 //! Swaps the endianness of a 32-bit value.
 127 //! \return The new value.
 128 inline uchar32_t swapEndian32(const uchar32_t& c)
 129 {
 130         return  ((c >> 24) & 0x000000FF) |
 131                         ((c >> 8)  & 0x0000FF00) |
 132                         ((c << 8)  & 0x00FF0000) |
 133                         ((c << 24) & 0xFF000000);
 134 }
 135
 136 //! The Unicode byte order mark.
 137 const u16 BOM = 0xFEFF;
 138
 139 //! The size of the Unicode byte order mark in terms of the Unicode character size.
 140 const u8 BOM_UTF8_LEN = 3;
 141 const u8 BOM_UTF16_LEN = 1;
 142 const u8 BOM_UTF32_LEN = 1;
 143
 144 //! Unicode byte order marks for file operations.
 145 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
 146 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
 147 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
 148 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
 149 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
 150
 151 //! The size in bytes of the Unicode byte marks for file operations.
 152 const u8 BOM_ENCODE_UTF8_LEN = 3;
 153 const u8 BOM_ENCODE_UTF16_LEN = 2;
 154 const u8 BOM_ENCODE_UTF32_LEN = 4;
 155
 156 //! Unicode encoding type.
 157 enum EUTF_ENCODE
 158 {
 159         EUTFE_NONE              = 0,
 160         EUTFE_UTF8,
 161         EUTFE_UTF16,
 162         EUTFE_UTF16_LE,
 163         EUTFE_UTF16_BE,
 164         EUTFE_UTF32,
 165         EUTFE_UTF32_LE,
 166         EUTFE_UTF32_BE
 167 };
 168
 169 //! Unicode endianness.
 170 enum EUTF_ENDIAN
 171 {
 172         EUTFEE_NATIVE   = 0,
 173         EUTFEE_LITTLE,
 174         EUTFEE_BIG
 175 };
 176
 177 //! Returns the specified unicode byte order mark in a byte array.
 178 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
 179 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
 180                 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
 181 //! \return An array that contains a byte order mark.
 182 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
 183 {
 184 #define COPY_ARRAY(source, size) \
 185         memcpy(ret.pointer(), source, size); \
 186         ret.set_used(size)
 187
 188         core::array<u8> ret(4);
 189         switch (mode)
 190         {
 191                 case EUTFE_UTF8:
 192                         COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
 193                         break;
 194                 case EUTFE_UTF16:
 195                         #ifdef __BIG_ENDIAN__
 196                                 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 197                         #else
 198                                 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 199                         #endif
 200                         break;
 201                 case EUTFE_UTF16_BE:
 202                         COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 203                         break;
 204                 case EUTFE_UTF16_LE:
 205                         COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 206                         break;
 207                 case EUTFE_UTF32:
 208                         #ifdef __BIG_ENDIAN__
 209                                 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 210                         #else
 211                                 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 212                         #endif
 213                         break;
 214                 case EUTFE_UTF32_BE:
 215                         COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 216                         break;
 217                 case EUTFE_UTF32_LE:
 218                         COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 219                         break;
 220                 case EUTFE_NONE:
 221                         // TODO sapier: fixed warning only,
 222                         // don't know if something needs to be done here
 223                         break;
 224         }
 225         return ret;
 226
 227 #undef COPY_ARRAY
 228 }
 229
 230 //! Detects if the given data stream starts with a unicode BOM.
 231 //! \param data The data stream to check.
 232 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
 233 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
 234 {
 235         if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
 236         if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
 237         if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
 238         if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
 239         if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
 240         return EUTFE_NONE;
 241 }
 242
 243 } // end namespace unicode
 244
 245
 246 //! UTF-16 string class.
 247 template <typename TAlloc = irrAllocator<uchar16_t> >
 248 class ustring16
 249 {
 250 public:
 251
 252         ///------------------///
 253         /// iterator classes ///
 254         ///------------------///
 255
 256         //! Access an element in a unicode string, allowing one to change it.
 257         class _ustring16_iterator_access
 258         {
 259                 public:
 260                         _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
 261
 262                         //! Allow the class to be interpreted as a single UTF-32 character.
 263                         operator uchar32_t() const
 264                         {
 265                                 return _get();
 266                         }
 267
 268                         //! Allow one to change the character in the unicode string.
 269                         //! \param c The new character to use.
 270                         //! \return Myself.
 271                         _ustring16_iterator_access& operator=(const uchar32_t c)
 272                         {
 273                                 _set(c);
 274                                 return *this;
 275                         }
 276
 277                         //! Increments the value by 1.
 278                         //! \return Myself.
 279                         _ustring16_iterator_access& operator++()
 280                         {
 281                                 _set(_get() + 1);
 282                                 return *this;
 283                         }
 284
 285                         //! Increments the value by 1, returning the old value.
 286                         //! \return A unicode character.
 287                         uchar32_t operator++(int)
 288                         {
 289                                 uchar32_t old = _get();
 290                                 _set(old + 1);
 291                                 return old;
 292                         }
 293
 294                         //! Decrements the value by 1.
 295                         //! \return Myself.
 296                         _ustring16_iterator_access& operator--()
 297                         {
 298                                 _set(_get() - 1);
 299                                 return *this;
 300                         }
 301
 302                         //! Decrements the value by 1, returning the old value.
 303                         //! \return A unicode character.
 304                         uchar32_t operator--(int)
 305                         {
 306                                 uchar32_t old = _get();
 307                                 _set(old - 1);
 308                                 return old;
 309                         }
 310
 311                         //! Adds to the value by a specified amount.
 312                         //! \param val The amount to add to this character.
 313                         //! \return Myself.
 314                         _ustring16_iterator_access& operator+=(int val)
 315                         {
 316                                 _set(_get() + val);
 317                                 return *this;
 318                         }
 319
 320                         //! Subtracts from the value by a specified amount.
 321                         //! \param val The amount to subtract from this character.
 322                         //! \return Myself.
 323                         _ustring16_iterator_access& operator-=(int val)
 324                         {
 325                                 _set(_get() - val);
 326                                 return *this;
 327                         }
 328
 329                         //! Multiples the value by a specified amount.
 330                         //! \param val The amount to multiply this character by.
 331                         //! \return Myself.
 332                         _ustring16_iterator_access& operator*=(int val)
 333                         {
 334                                 _set(_get() * val);
 335                                 return *this;
 336                         }
 337
 338                         //! Divides the value by a specified amount.
 339                         //! \param val The amount to divide this character by.
 340                         //! \return Myself.
 341                         _ustring16_iterator_access& operator/=(int val)
 342                         {
 343                                 _set(_get() / val);
 344                                 return *this;
 345                         }
 346
 347                         //! Modulos the value by a specified amount.
 348                         //! \param val The amount to modulo this character by.
 349                         //! \return Myself.
 350                         _ustring16_iterator_access& operator%=(int val)
 351                         {
 352                                 _set(_get() % val);
 353                                 return *this;
 354                         }
 355
 356                         //! Adds to the value by a specified amount.
 357                         //! \param val The amount to add to this character.
 358                         //! \return A unicode character.
 359                         uchar32_t operator+(int val) const
 360                         {
 361                                 return _get() + val;
 362                         }
 363
 364                         //! Subtracts from the value by a specified amount.
 365                         //! \param val The amount to subtract from this character.
 366                         //! \return A unicode character.
 367                         uchar32_t operator-(int val) const
 368                         {
 369                                 return _get() - val;
 370                         }
 371
 372                         //! Multiplies the value by a specified amount.
 373                         //! \param val The amount to multiply this character by.
 374                         //! \return A unicode character.
 375                         uchar32_t operator*(int val) const
 376                         {
 377                                 return _get() * val;
 378                         }
 379
 380                         //! Divides the value by a specified amount.
 381                         //! \param val The amount to divide this character by.
 382                         //! \return A unicode character.
 383                         uchar32_t operator/(int val) const
 384                         {
 385                                 return _get() / val;
 386                         }
 387
 388                         //! Modulos the value by a specified amount.
 389                         //! \param val The amount to modulo this character by.
 390                         //! \return A unicode character.
 391                         uchar32_t operator%(int val) const
 392                         {
 393                                 return _get() % val;
 394                         }
 395
 396                 private:
 397                         //! Gets a uchar32_t from our current position.
 398                         uchar32_t _get() const
 399                         {
 400                                 const uchar16_t* a = ref->c_str();
 401                                 if (!UTF16_IS_SURROGATE(a[pos]))
 402                                         return static_cast<uchar32_t>(a[pos]);
 403                                 else
 404                                 {
 405                                         if (pos + 1 >= ref->size_raw())
 406                                                 return 0;
 407
 408                                         return unicode::toUTF32(a[pos], a[pos + 1]);
 409                                 }
 410                         }
 411
 412                         //! Sets a uchar32_t at our current position.
 413                         void _set(uchar32_t c)
 414                         {
 415                                 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
 416                                 const uchar16_t* a = ref2->c_str();
 417                                 if (c > 0xFFFF)
 418                                 {
 419                                         // c will be multibyte, so split it up into the high and low surrogate pairs.
 420                                         uchar16_t x = static_cast<uchar16_t>(c);
 421                                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
 422                                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
 423
 424                                         // If the previous position was a surrogate pair, just replace them.  Else, insert the low pair.
 425                                         if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
 426                                                 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
 427                                         else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
 428
 429                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 430                                 }
 431                                 else
 432                                 {
 433                                         // c will be a single byte.
 434                                         uchar16_t vh = static_cast<uchar16_t>(c);
 435
 436                                         // If the previous position was a surrogate pair, remove the extra byte.
 437                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 438                                                 ref2->erase_raw(static_cast<u32>(pos) + 1);
 439
 440                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 441                                 }
 442                         }
 443
 444                         const ustring16<TAlloc>* ref;
 445                         u32 pos;
 446         };
 447         typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
 448
 449
 450         //! Iterator to iterate through a UTF-16 string.
 451 #ifndef USTRING_NO_STL
 452         class _ustring16_const_iterator : public std::iterator<
 453                 std::bidirectional_iterator_tag,        // iterator_category
 454                 access,                                                         // value_type
 455                 ptrdiff_t,                                                      // difference_type
 456                 const access,                                           // pointer
 457                 const access                                            // reference
 458         >
 459 #else
 460         class _ustring16_const_iterator
 461 #endif
 462         {
 463                 public:
 464                         typedef _ustring16_const_iterator _Iter;
 465                         typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
 466                         typedef const access const_pointer;
 467                         typedef const access const_reference;
 468
 469 #ifndef USTRING_NO_STL
 470                         typedef typename _Base::value_type value_type;
 471                         typedef typename _Base::difference_type difference_type;
 472                         typedef typename _Base::difference_type distance_type;
 473                         typedef typename _Base::pointer pointer;
 474                         typedef const_reference reference;
 475 #else
 476                         typedef access value_type;
 477                         typedef u32 difference_type;
 478                         typedef u32 distance_type;
 479                         typedef const_pointer pointer;
 480                         typedef const_reference reference;
 481 #endif
 482
 483                         //! Constructors.
 484                         _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
 485                         _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
 486                         _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
 487                         {
 488                                 if (ref->size_raw() == 0 || p == 0)
 489                                         return;
 490
 491                                 // Go to the appropriate position.
 492                                 u32 i = p;
 493                                 u32 sr = ref->size_raw();
 494                                 const uchar16_t* a = ref->c_str();
 495                                 while (i != 0 && pos < sr)
 496                                 {
 497                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 498                                                 pos += 2;
 499                                         else ++pos;
 500                                         --i;
 501                                 }
 502                         }
 503
 504                         //! Test for equalness.
 505                         bool operator==(const _Iter& iter) const
 506                         {
 507                                 if (ref == iter.ref && pos == iter.pos)
 508                                         return true;
 509                                 return false;
 510                         }
 511
 512                         //! Test for unequalness.
 513                         bool operator!=(const _Iter& iter) const
 514                         {
 515                                 if (ref != iter.ref || pos != iter.pos)
 516                                         return true;
 517                                 return false;
 518                         }
 519
 520                         //! Switch to the next full character in the string.
 521                         _Iter& operator++()
 522                         {       // ++iterator
 523                                 if (pos == ref->size_raw()) return *this;
 524                                 const uchar16_t* a = ref->c_str();
 525                                 if (UTF16_IS_SURROGATE_HI(a[pos]))
 526                                         pos += 2;                       // TODO: check for valid low surrogate?
 527                                 else ++pos;
 528                                 if (pos > ref->size_raw()) pos = ref->size_raw();
 529                                 return *this;
 530                         }
 531
 532                         //! Switch to the next full character in the string, returning the previous position.
 533                         _Iter operator++(int)
 534                         {       // iterator++
 535                                 _Iter _tmp(*this);
 536                                 ++*this;
 537                                 return _tmp;
 538                         }
 539
 540                         //! Switch to the previous full character in the string.
 541                         _Iter& operator--()
 542                         {       // --iterator
 543                                 if (pos == 0) return *this;
 544                                 const uchar16_t* a = ref->c_str();
 545                                 --pos;
 546                                 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0)  // low surrogate, go back one more.
 547                                         --pos;
 548                                 return *this;
 549                         }
 550
 551                         //! Switch to the previous full character in the string, returning the previous position.
 552                         _Iter operator--(int)
 553                         {       // iterator--
 554                                 _Iter _tmp(*this);
 555                                 --*this;
 556                                 return _tmp;
 557                         }
 558
 559                         //! Advance a specified number of full characters in the string.
 560                         //! \return Myself.
 561                         _Iter& operator+=(const difference_type v)
 562                         {
 563                                 if (v == 0) return *this;
 564                                 if (v < 0) return operator-=(v * -1);
 565
 566                                 if (pos >= ref->size_raw())
 567                                         return *this;
 568
 569                                 // Go to the appropriate position.
 570                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 571                                 u32 i = (u32)v;
 572                                 u32 sr = ref->size_raw();
 573                                 const uchar16_t* a = ref->c_str();
 574                                 while (i != 0 && pos < sr)
 575                                 {
 576                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 577                                                 pos += 2;
 578                                         else ++pos;
 579                                         --i;
 580                                 }
 581                                 if (pos > sr)
 582                                         pos = sr;
 583
 584                                 return *this;
 585                         }
 586
 587                         //! Go back a specified number of full characters in the string.
 588                         //! \return Myself.
 589                         _Iter& operator-=(const difference_type v)
 590                         {
 591                                 if (v == 0) return *this;
 592                                 if (v > 0) return operator+=(v * -1);
 593
 594                                 if (pos == 0)
 595                                         return *this;
 596
 597                                 // Go to the appropriate position.
 598                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 599                                 u32 i = (u32)v;
 600                                 const uchar16_t* a = ref->c_str();
 601                                 while (i != 0 && pos != 0)
 602                                 {
 603                                         --pos;
 604                                         if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
 605                                                 --pos;
 606                                         --i;
 607                                 }
 608
 609                                 return *this;
 610                         }
 611
 612                         //! Return a new iterator that is a variable number of full characters forward from the current position.
 613                         _Iter operator+(const difference_type v) const
 614                         {
 615                                 _Iter ret(*this);
 616                                 ret += v;
 617                                 return ret;
 618                         }
 619
 620                         //! Return a new iterator that is a variable number of full characters backward from the current position.
 621                         _Iter operator-(const difference_type v) const
 622                         {
 623                                 _Iter ret(*this);
 624                                 ret -= v;
 625                                 return ret;
 626                         }
 627
 628                         //! Returns the distance between two iterators.
 629                         difference_type operator-(const _Iter& iter) const
 630                         {
 631                                 // Make sure we reference the same object!
 632                                 if (ref != iter.ref)
 633                                         return difference_type();
 634
 635                                 _Iter i = iter;
 636                                 difference_type ret;
 637
 638                                 // Walk up.
 639                                 if (pos > i.pos)
 640                                 {
 641                                         while (pos > i.pos)
 642                                         {
 643                                                 ++i;
 644                                                 ++ret;
 645                                         }
 646                                         return ret;
 647                                 }
 648
 649                                 // Walk down.
 650                                 while (pos < i.pos)
 651                                 {
 652                                         --i;
 653                                         --ret;
 654                                 }
 655                                 return ret;
 656                         }
 657
 658                         //! Accesses the full character at the iterator's position.
 659                         const_reference operator*() const
 660                         {
 661                                 if (pos >= ref->size_raw())
 662                                 {
 663                                         const uchar16_t* a = ref->c_str();
 664                                         u32 p = ref->size_raw();
 665                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 666                                                 --p;
 667                                         reference ret(ref, p);
 668                                         return ret;
 669                                 }
 670                                 const_reference ret(ref, pos);
 671                                 return ret;
 672                         }
 673
 674                         //! Accesses the full character at the iterator's position.
 675                         reference operator*()
 676                         {
 677                                 if (pos >= ref->size_raw())
 678                                 {
 679                                         const uchar16_t* a = ref->c_str();
 680                                         u32 p = ref->size_raw();
 681                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 682                                                 --p;
 683                                         reference ret(ref, p);
 684                                         return ret;
 685                                 }
 686                                 reference ret(ref, pos);
 687                                 return ret;
 688                         }
 689
 690                         //! Accesses the full character at the iterator's position.
 691                         const_pointer operator->() const
 692                         {
 693                                 return operator*();
 694                         }
 695
 696                         //! Accesses the full character at the iterator's position.
 697                         pointer operator->()
 698                         {
 699                                 return operator*();
 700                         }
 701
 702                         //! Is the iterator at the start of the string?
 703                         bool atStart() const
 704                         {
 705                                 return pos == 0;
 706                         }
 707
 708                         //! Is the iterator at the end of the string?
 709                         bool atEnd() const
 710                         {
 711                                 const uchar16_t* a = ref->c_str();
 712                                 if (UTF16_IS_SURROGATE(a[pos]))
 713                                         return (pos + 1) >= ref->size_raw();
 714                                 else return pos >= ref->size_raw();
 715                         }
 716
 717                         //! Moves the iterator to the start of the string.
 718                         void toStart()
 719                         {
 720                                 pos = 0;
 721                         }
 722
 723                         //! Moves the iterator to the end of the string.
 724                         void toEnd()
 725                         {
 726                                 pos = ref->size_raw();
 727                         }
 728
 729                         //! Returns the iterator's position.
 730                         //! \return The iterator's position.
 731                         u32 getPos() const
 732                         {
 733                                 return pos;
 734                         }
 735
 736                 protected:
 737                         const ustring16<TAlloc>* ref;
 738                         u32 pos;
 739         };
 740
 741         //! Iterator to iterate through a UTF-16 string.
 742         class _ustring16_iterator : public _ustring16_const_iterator
 743         {
 744                 public:
 745                         typedef _ustring16_iterator _Iter;
 746                         typedef _ustring16_const_iterator _Base;
 747                         typedef typename _Base::const_pointer const_pointer;
 748                         typedef typename _Base::const_reference const_reference;
 749
 750
 751                         typedef typename _Base::value_type value_type;
 752                         typedef typename _Base::difference_type difference_type;
 753                         typedef typename _Base::distance_type distance_type;
 754                         typedef access pointer;
 755                         typedef access reference;
 756
 757                         using _Base::pos;
 758                         using _Base::ref;
 759
 760                         //! Constructors.
 761                         _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
 762                         _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
 763                         _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
 764
 765                         //! Accesses the full character at the iterator's position.
 766                         reference operator*() const
 767                         {
 768                                 if (pos >= ref->size_raw())
 769                                 {
 770                                         const uchar16_t* a = ref->c_str();
 771                                         u32 p = ref->size_raw();
 772                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 773                                                 --p;
 774                                         reference ret(ref, p);
 775                                         return ret;
 776                                 }
 777                                 reference ret(ref, pos);
 778                                 return ret;
 779                         }
 780
 781                         //! Accesses the full character at the iterator's position.
 782                         reference operator*()
 783                         {
 784                                 if (pos >= ref->size_raw())
 785                                 {
 786                                         const uchar16_t* a = ref->c_str();
 787                                         u32 p = ref->size_raw();
 788                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 789                                                 --p;
 790                                         reference ret(ref, p);
 791                                         return ret;
 792                                 }
 793                                 reference ret(ref, pos);
 794                                 return ret;
 795                         }
 796
 797                         //! Accesses the full character at the iterator's position.
 798                         pointer operator->() const
 799                         {
 800                                 return operator*();
 801                         }
 802
 803                         //! Accesses the full character at the iterator's position.
 804                         pointer operator->()
 805                         {
 806                                 return operator*();
 807                         }
 808         };
 809
 810         typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
 811         typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
 812
 813         ///----------------------///
 814         /// end iterator classes ///
 815         ///----------------------///
 816
 817         //! Default constructor
 818         ustring16()
 819         : array(0), allocated(1), used(0)
 820         {
 821 #if __BYTE_ORDER == __BIG_ENDIAN
 822                 encoding = unicode::EUTFE_UTF16_BE;
 823 #else
 824                 encoding = unicode::EUTFE_UTF16_LE;
 825 #endif
 826                 array = allocator.allocate(1); // new u16[1];
 827                 array[0] = 0x0;
 828         }
 829
 830
 831         //! Constructor
 832         ustring16(const ustring16<TAlloc>& other)
 833         : array(0), allocated(0), used(0)
 834         {
 835 #if __BYTE_ORDER == __BIG_ENDIAN
 836                 encoding = unicode::EUTFE_UTF16_BE;
 837 #else
 838                 encoding = unicode::EUTFE_UTF16_LE;
 839 #endif
 840                 *this = other;
 841         }
 842
 843
 844         //! Constructor from other string types
 845         template <class B, class A>
 846         ustring16(const string<B, A>& other)
 847         : array(0), allocated(0), used(0)
 848         {
 849 #if __BYTE_ORDER == __BIG_ENDIAN
 850                 encoding = unicode::EUTFE_UTF16_BE;
 851 #else
 852                 encoding = unicode::EUTFE_UTF16_LE;
 853 #endif
 854                 *this = other;
 855         }
 856
 857
 858 #ifndef USTRING_NO_STL
 859         //! Constructor from std::string
 860         template <class B, class A, typename Alloc>
 861         ustring16(const std::basic_string<B, A, Alloc>& other)
 862         : array(0), allocated(0), used(0)
 863         {
 864 #if __BYTE_ORDER == __BIG_ENDIAN
 865                 encoding = unicode::EUTFE_UTF16_BE;
 866 #else
 867                 encoding = unicode::EUTFE_UTF16_LE;
 868 #endif
 869                 *this = other.c_str();
 870         }
 871
 872
 873         //! Constructor from iterator.
 874         template <typename Itr>
 875         ustring16(Itr first, Itr last)
 876         : array(0), allocated(0), used(0)
 877         {
 878 #if __BYTE_ORDER == __BIG_ENDIAN
 879                 encoding = unicode::EUTFE_UTF16_BE;
 880 #else
 881                 encoding = unicode::EUTFE_UTF16_LE;
 882 #endif
 883                 reserve(std::distance(first, last));
 884                 array[used] = 0;
 885
 886                 for (; first != last; ++first)
 887                         append((uchar32_t)*first);
 888         }
 889 #endif
 890
 891
 892 #ifndef USTRING_CPP0X_NEWLITERALS
 893         //! Constructor for copying a character string from a pointer.
 894         ustring16(const char* const c)
 895         : array(0), allocated(0), used(0)
 896         {
 897 #if __BYTE_ORDER == __BIG_ENDIAN
 898                 encoding = unicode::EUTFE_UTF16_BE;
 899 #else
 900                 encoding = unicode::EUTFE_UTF16_LE;
 901 #endif
 902
 903                 loadDataStream(c, strlen(c));
 904                 //append((uchar8_t*)c);
 905         }
 906
 907
 908         //! Constructor for copying a character string from a pointer with a given length.
 909         ustring16(const char* const c, u32 length)
 910         : array(0), allocated(0), used(0)
 911         {
 912 #if __BYTE_ORDER == __BIG_ENDIAN
 913                 encoding = unicode::EUTFE_UTF16_BE;
 914 #else
 915                 encoding = unicode::EUTFE_UTF16_LE;
 916 #endif
 917
 918                 loadDataStream(c, length);
 919         }
 920 #endif
 921
 922
 923         //! Constructor for copying a UTF-8 string from a pointer.
 924         ustring16(const uchar8_t* const c)
 925         : array(0), allocated(0), used(0)
 926         {
 927 #if __BYTE_ORDER == __BIG_ENDIAN
 928                 encoding = unicode::EUTFE_UTF16_BE;
 929 #else
 930                 encoding = unicode::EUTFE_UTF16_LE;
 931 #endif
 932
 933                 append(c);
 934         }
 935
 936
 937         //! Constructor for copying a UTF-8 string from a single char.
 938         ustring16(const char c)
 939         : array(0), allocated(0), used(0)
 940         {
 941 #if __BYTE_ORDER == __BIG_ENDIAN
 942                 encoding = unicode::EUTFE_UTF16_BE;
 943 #else
 944                 encoding = unicode::EUTFE_UTF16_LE;
 945 #endif
 946
 947                 append((uchar32_t)c);
 948         }
 949
 950
 951         //! Constructor for copying a UTF-8 string from a pointer with a given length.
 952         ustring16(const uchar8_t* const c, u32 length)
 953         : array(0), allocated(0), used(0)
 954         {
 955 #if __BYTE_ORDER == __BIG_ENDIAN
 956                 encoding = unicode::EUTFE_UTF16_BE;
 957 #else
 958                 encoding = unicode::EUTFE_UTF16_LE;
 959 #endif
 960
 961                 append(c, length);
 962         }
 963
 964
 965         //! Constructor for copying a UTF-16 string from a pointer.
 966         ustring16(const uchar16_t* const c)
 967         : array(0), allocated(0), used(0)
 968         {
 969 #if __BYTE_ORDER == __BIG_ENDIAN
 970                 encoding = unicode::EUTFE_UTF16_BE;
 971 #else
 972                 encoding = unicode::EUTFE_UTF16_LE;
 973 #endif
 974
 975                 append(c);
 976         }
 977
 978
 979         //! Constructor for copying a UTF-16 string from a pointer with a given length
 980         ustring16(const uchar16_t* const c, u32 length)
 981         : array(0), allocated(0), used(0)
 982         {
 983 #if __BYTE_ORDER == __BIG_ENDIAN
 984                 encoding = unicode::EUTFE_UTF16_BE;
 985 #else
 986                 encoding = unicode::EUTFE_UTF16_LE;
 987 #endif
 988
 989                 append(c, length);
 990         }
 991
 992
 993         //! Constructor for copying a UTF-32 string from a pointer.
 994         ustring16(const uchar32_t* const c)
 995         : array(0), allocated(0), used(0)
 996         {
 997 #if __BYTE_ORDER == __BIG_ENDIAN
 998                 encoding = unicode::EUTFE_UTF16_BE;
 999 #else
1000                 encoding = unicode::EUTFE_UTF16_LE;
1001 #endif
1002
1003                 append(c);
1004         }
1005
1006
1007         //! Constructor for copying a UTF-32 from a pointer with a given length.
1008         ustring16(const uchar32_t* const c, u32 length)
1009         : array(0), allocated(0), used(0)
1010         {
1011 #if __BYTE_ORDER == __BIG_ENDIAN
1012                 encoding = unicode::EUTFE_UTF16_BE;
1013 #else
1014                 encoding = unicode::EUTFE_UTF16_LE;
1015 #endif
1016
1017                 append(c, length);
1018         }
1019
1020
1021         //! Constructor for copying a wchar_t string from a pointer.
1022         ustring16(const wchar_t* const c)
1023         : array(0), allocated(0), used(0)
1024         {
1025 #if __BYTE_ORDER == __BIG_ENDIAN
1026                 encoding = unicode::EUTFE_UTF16_BE;
1027 #else
1028                 encoding = unicode::EUTFE_UTF16_LE;
1029 #endif
1030
1031                 if (sizeof(wchar_t) == 4)
1032                         append(reinterpret_cast<const uchar32_t* const>(c));
1033                 else if (sizeof(wchar_t) == 2)
1034                         append(reinterpret_cast<const uchar16_t* const>(c));
1035                 else if (sizeof(wchar_t) == 1)
1036                         append(reinterpret_cast<const uchar8_t* const>(c));
1037         }
1038
1039
1040         //! Constructor for copying a wchar_t string from a pointer with a given length.
1041         ustring16(const wchar_t* const c, u32 length)
1042         : array(0), allocated(0), used(0)
1043         {
1044 #if __BYTE_ORDER == __BIG_ENDIAN
1045                 encoding = unicode::EUTFE_UTF16_BE;
1046 #else
1047                 encoding = unicode::EUTFE_UTF16_LE;
1048 #endif
1049
1050                 if (sizeof(wchar_t) == 4)
1051                         append(reinterpret_cast<const uchar32_t* const>(c), length);
1052                 else if (sizeof(wchar_t) == 2)
1053                         append(reinterpret_cast<const uchar16_t* const>(c), length);
1054                 else if (sizeof(wchar_t) == 1)
1055                         append(reinterpret_cast<const uchar8_t* const>(c), length);
1056         }
1057
1058
1059 #ifdef USTRING_CPP0X
1060         //! Constructor for moving a ustring16
1061         ustring16(ustring16<TAlloc>&& other)
1062         : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1063         {
1064                 //std::cout << "MOVE constructor" << std::endl;
1065                 other.array = 0;
1066                 other.allocated = 0;
1067                 other.used = 0;
1068         }
1069 #endif
1070
1071
1072         //! Destructor
1073         ~ustring16()
1074         {
1075                 allocator.deallocate(array); // delete [] array;
1076         }
1077
1078
1079         //! Assignment operator
1080         ustring16& operator=(const ustring16<TAlloc>& other)
1081         {
1082                 if (this == &other)
1083                         return *this;
1084
1085                 used = other.size_raw();
1086                 if (used >= allocated)
1087                 {
1088                         allocator.deallocate(array); // delete [] array;
1089                         allocated = used + 1;
1090                         array = allocator.allocate(used + 1); //new u16[used];
1091                 }
1092
1093                 const uchar16_t* p = other.c_str();
1094                 for (u32 i=0; i<=used; ++i, ++p)
1095                         array[i] = *p;
1096
1097                 array[used] = 0;
1098
1099                 // Validate our new UTF-16 string.
1100                 validate();
1101
1102                 return *this;
1103         }
1104
1105
1106 #ifdef USTRING_CPP0X
1107         //! Move assignment operator
1108         ustring16& operator=(ustring16<TAlloc>&& other)
1109         {
1110                 if (this != &other)
1111                 {
1112                         //std::cout << "MOVE operator=" << std::endl;
1113                         allocator.deallocate(array);
1114
1115                         array = other.array;
1116                         allocated = other.allocated;
1117                         encoding = other.encoding;
1118                         used = other.used;
1119                         other.array = 0;
1120                         other.used = 0;
1121                 }
1122                 return *this;
1123         }
1124 #endif
1125
1126
1127         //! Assignment operator for other string types
1128         template <class B, class A>
1129         ustring16<TAlloc>& operator=(const string<B, A>& other)
1130         {
1131                 *this = other.c_str();
1132                 return *this;
1133         }
1134
1135
1136         //! Assignment operator for UTF-8 strings
1137         ustring16<TAlloc>& operator=(const uchar8_t* const c)
1138         {
1139                 if (!array)
1140                 {
1141                         array = allocator.allocate(1); //new u16[1];
1142                         allocated = 1;
1143                 }
1144                 used = 0;
1145                 array[used] = 0x0;
1146                 if (!c) return *this;
1147
1148                 //! Append our string now.
1149                 append(c);
1150                 return *this;
1151         }
1152
1153
1154         //! Assignment operator for UTF-16 strings
1155         ustring16<TAlloc>& operator=(const uchar16_t* const c)
1156         {
1157                 if (!array)
1158                 {
1159                         array = allocator.allocate(1); //new u16[1];
1160                         allocated = 1;
1161                 }
1162                 used = 0;
1163                 array[used] = 0x0;
1164                 if (!c) return *this;
1165
1166                 //! Append our string now.
1167                 append(c);
1168                 return *this;
1169         }
1170
1171
1172         //! Assignment operator for UTF-32 strings
1173         ustring16<TAlloc>& operator=(const uchar32_t* const c)
1174         {
1175                 if (!array)
1176                 {
1177                         array = allocator.allocate(1); //new u16[1];
1178                         allocated = 1;
1179                 }
1180                 used = 0;
1181                 array[used] = 0x0;
1182                 if (!c) return *this;
1183
1184                 //! Append our string now.
1185                 append(c);
1186                 return *this;
1187         }
1188
1189
1190         //! Assignment operator for wchar_t strings.
1191         /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1192                 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1193                 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1194         ustring16<TAlloc>& operator=(const wchar_t* const c)
1195         {
1196                 if (sizeof(wchar_t) == 4)
1197                         *this = reinterpret_cast<const uchar32_t* const>(c);
1198                 else if (sizeof(wchar_t) == 2)
1199                         *this = reinterpret_cast<const uchar16_t* const>(c);
1200                 else if (sizeof(wchar_t) == 1)
1201                         *this = reinterpret_cast<const uchar8_t* const>(c);
1202
1203                 return *this;
1204         }
1205
1206
1207         //! Assignment operator for other strings.
1208         /** Note that this assumes that a correct unicode string is stored in the string. **/
1209         template <class B>
1210         ustring16<TAlloc>& operator=(const B* const c)
1211         {
1212                 if (sizeof(B) == 4)
1213                         *this = reinterpret_cast<const uchar32_t* const>(c);
1214                 else if (sizeof(B) == 2)
1215                         *this = reinterpret_cast<const uchar16_t* const>(c);
1216                 else if (sizeof(B) == 1)
1217                         *this = reinterpret_cast<const uchar8_t* const>(c);
1218
1219                 return *this;
1220         }
1221
1222
1223         //! Direct access operator
1224         access operator [](const u32 index)
1225         {
1226                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1227                 iterator iter(*this, index);
1228                 return iter.operator*();
1229         }
1230
1231
1232         //! Direct access operator
1233         const access operator [](const u32 index) const
1234         {
1235                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1236                 const_iterator iter(*this, index);
1237                 return iter.operator*();
1238         }
1239
1240
1241         //! Equality operator
1242         bool operator ==(const uchar16_t* const str) const
1243         {
1244                 if (!str)
1245                         return false;
1246
1247                 u32 i;
1248                 for(i=0; array[i] && str[i]; ++i)
1249                         if (array[i] != str[i])
1250                                 return false;
1251
1252                 return !array[i] && !str[i];
1253         }
1254
1255
1256         //! Equality operator
1257         bool operator ==(const ustring16<TAlloc>& other) const
1258         {
1259                 for(u32 i=0; array[i] && other.array[i]; ++i)
1260                         if (array[i] != other.array[i])
1261                                 return false;
1262
1263                 return used == other.used;
1264         }
1265
1266
1267         //! Is smaller comparator
1268         bool operator <(const ustring16<TAlloc>& other) const
1269         {
1270                 for(u32 i=0; array[i] && other.array[i]; ++i)
1271                 {
1272                         s32 diff = array[i] - other.array[i];
1273                         if ( diff )
1274                                 return diff < 0;
1275                 }
1276
1277                 return used < other.used;
1278         }
1279
1280
1281         //! Inequality operator
1282         bool operator !=(const uchar16_t* const str) const
1283         {
1284                 return !(*this == str);
1285         }
1286
1287
1288         //! Inequality operator
1289         bool operator !=(const ustring16<TAlloc>& other) const
1290         {
1291                 return !(*this == other);
1292         }
1293
1294
1295         //! Returns the length of a ustring16 in full characters.
1296         //! \return Length of a ustring16 in full characters.
1297         u32 size() const
1298         {
1299                 const_iterator i(*this, 0);
1300                 u32 pos = 0;
1301                 while (!i.atEnd())
1302                 {
1303                         ++i;
1304                         ++pos;
1305                 }
1306                 return pos;
1307         }
1308
1309
1310         //! Informs if the ustring is empty or not.
1311         //! \return True if the ustring is empty, false if not.
1312         bool empty() const
1313         {
1314                 return (size_raw() == 0);
1315         }
1316
1317
1318         //! Returns a pointer to the raw UTF-16 string data.
1319         //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1320         const uchar16_t* c_str() const
1321         {
1322                 return array;
1323         }
1324
1325
1326         //! Compares the first n characters of this string with another.
1327         //! \param other Other string to compare to.
1328         //! \param n Number of characters to compare.
1329         //! \return True if the n first characters of both strings are equal.
1330         bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1331         {
1332                 u32 i;
1333                 const uchar16_t* oa = other.c_str();
1334                 for(i=0; array[i] && oa[i] && i < n; ++i)
1335                         if (array[i] != oa[i])
1336                                 return false;
1337
1338                 // if one (or both) of the strings was smaller then they
1339                 // are only equal if they have the same length
1340                 return (i == n) || (used == other.used);
1341         }
1342
1343
1344         //! Compares the first n characters of this string with another.
1345         //! \param str Other string to compare to.
1346         //! \param n Number of characters to compare.
1347         //! \return True if the n first characters of both strings are equal.
1348         bool equalsn(const uchar16_t* const str, u32 n) const
1349         {
1350                 if (!str)
1351                         return false;
1352                 u32 i;
1353                 for(i=0; array[i] && str[i] && i < n; ++i)
1354                         if (array[i] != str[i])
1355                                 return false;
1356
1357                 // if one (or both) of the strings was smaller then they
1358                 // are only equal if they have the same length
1359                 return (i == n) || (array[i] == 0 && str[i] == 0);
1360         }
1361
1362
1363         //! Appends a character to this ustring16
1364         //! \param character The character to append.
1365         //! \return A reference to our current string.
1366         ustring16<TAlloc>& append(uchar32_t character)
1367         {
1368                 if (used + 2 >= allocated)
1369                         reallocate(used + 2);
1370
1371                 if (character > 0xFFFF)
1372                 {
1373                         used += 2;
1374
1375                         // character will be multibyte, so split it up into a surrogate pair.
1376                         uchar16_t x = static_cast<uchar16_t>(character);
1377                         uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1378                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1379                         array[used-2] = vh;
1380                         array[used-1] = vl;
1381                 }
1382                 else
1383                 {
1384                         ++used;
1385                         array[used-1] = character;
1386                 }
1387                 array[used] = 0;
1388
1389                 return *this;
1390         }
1391
1392
1393         //! Appends a UTF-8 string to this ustring16
1394         //! \param other The UTF-8 string to append.
1395         //! \param length The length of the string to append.
1396         //! \return A reference to our current string.
1397         ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1398         {
1399                 if (!other)
1400                         return *this;
1401
1402                 // Determine if the string is long enough for a BOM.
1403                 u32 len = 0;
1404                 const uchar8_t* p = other;
1405                 do
1406                 {
1407                         ++len;
1408                 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1409
1410                 // Check for BOM.
1411                 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1412                 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1413                 {
1414                         if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1415                                 c_bom = unicode::EUTFE_UTF8;
1416                 }
1417
1418                 // If a BOM was found, don't include it in the string.
1419                 const uchar8_t* c2 = other;
1420                 if (c_bom != unicode::EUTFE_NONE)
1421                 {
1422                         c2 = other + unicode::BOM_UTF8_LEN;
1423                         length -= unicode::BOM_UTF8_LEN;
1424                 }
1425
1426                 // Calculate the size of the string to read in.
1427                 len = 0;
1428                 p = c2;
1429                 do
1430                 {
1431                         ++len;
1432                 } while(*p++ && len < length);
1433                 if (len > length)
1434                         len = length;
1435
1436                 // If we need to grow the array, do it now.
1437                 if (used + len >= allocated)
1438                         reallocate(used + (len * 2));
1439                 u32 start = used;
1440
1441                 // Convert UTF-8 to UTF-16.
1442                 u32 pos = start;
1443                 for (u32 l = 0; l<len;)
1444                 {
1445                         ++used;
1446                         if (((c2[l] >> 6) & 0x03) == 0x02)
1447                         {       // Invalid continuation byte.
1448                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1449                                 ++l;
1450                         }
1451                         else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1452                         {       // Invalid byte - overlong encoding.
1453                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1454                                 ++l;
1455                         }
1456                         else if ((c2[l] & 0xF8) == 0xF0)
1457                         {       // 4 bytes UTF-8, 2 bytes UTF-16.
1458                                 // Check for a full string.
1459                                 if ((l + 3) >= len)
1460                                 {
1461                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1462                                         l += 3;
1463                                         break;
1464                                 }
1465
1466                                 // Validate.
1467                                 bool valid = true;
1468                                 u8 l2 = 0;
1469                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1470                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471                                 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1472                                 if (!valid)
1473                                 {
1474                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1475                                         l += l2;
1476                                         continue;
1477                                 }
1478
1479                                 // Decode.
1480                                 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1481                                 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1482                                 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1483                                 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1484
1485                                 // Split v up into a surrogate pair.
1486                                 uchar16_t x = static_cast<uchar16_t>(v);
1487                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1488                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1489
1490                                 array[pos++] = vh;
1491                                 array[pos++] = vl;
1492                                 l += 4;
1493                                 ++used;         // Using two shorts this time, so increase used by 1.
1494                         }
1495                         else if ((c2[l] & 0xF0) == 0xE0)
1496                         {       // 3 bytes UTF-8, 1 byte UTF-16.
1497                                 // Check for a full string.
1498                                 if ((l + 2) >= len)
1499                                 {
1500                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1501                                         l += 2;
1502                                         break;
1503                                 }
1504
1505                                 // Validate.
1506                                 bool valid = true;
1507                                 u8 l2 = 0;
1508                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1509                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1510                                 if (!valid)
1511                                 {
1512                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1513                                         l += l2;
1514                                         continue;
1515                                 }
1516
1517                                 // Decode.
1518                                 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1519                                 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1520                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1521                                 array[pos++] = ch;
1522                                 l += 3;
1523                         }
1524                         else if ((c2[l] & 0xE0) == 0xC0)
1525                         {       // 2 bytes UTF-8, 1 byte UTF-16.
1526                                 // Check for a full string.
1527                                 if ((l + 1) >= len)
1528                                 {
1529                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1530                                         l += 1;
1531                                         break;
1532                                 }
1533
1534                                 // Validate.
1535                                 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1536                                 {
1537                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1538                                         ++l;
1539                                         continue;
1540                                 }
1541
1542                                 // Decode.
1543                                 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1544                                 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1545                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1546                                 array[pos++] = ch;
1547                                 l += 2;
1548                         }
1549                         else
1550                         {       // 1 byte UTF-8, 1 byte UTF-16.
1551                                 // Validate.
1552                                 if (c2[l] > 0x7F)
1553                                 {       // Values above 0xF4 are restricted and aren't used.  By now, anything above 0x7F is invalid.
1554                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1555                                 }
1556                                 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1557                                 ++l;
1558                         }
1559                 }
1560                 array[used] = 0;
1561
1562                 // Validate our new UTF-16 string.
1563                 validate();
1564
1565                 return *this;
1566         }
1567
1568
1569         //! Appends a UTF-16 string to this ustring16
1570         //! \param other The UTF-16 string to append.
1571         //! \param length The length of the string to append.
1572         //! \return A reference to our current string.
1573         ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1574         {
1575                 if (!other)
1576                         return *this;
1577
1578                 // Determine if the string is long enough for a BOM.
1579                 u32 len = 0;
1580                 const uchar16_t* p = other;
1581                 do
1582                 {
1583                         ++len;
1584                 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1585
1586                 // Check for the BOM to determine the string's endianness.
1587                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1588                 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1589                         c_end = unicode::EUTFEE_LITTLE;
1590                 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1591                         c_end = unicode::EUTFEE_BIG;
1592
1593                 // If a BOM was found, don't include it in the string.
1594                 const uchar16_t* c2 = other;
1595                 if (c_end != unicode::EUTFEE_NATIVE)
1596                 {
1597                         c2 = other + unicode::BOM_UTF16_LEN;
1598                         length -= unicode::BOM_UTF16_LEN;
1599                 }
1600
1601                 // Calculate the size of the string to read in.
1602                 len = 0;
1603                 p = c2;
1604                 do
1605                 {
1606                         ++len;
1607                 } while(*p++ && len < length);
1608                 if (len > length)
1609                         len = length;
1610
1611                 // If we need to grow the size of the array, do it now.
1612                 if (used + len >= allocated)
1613                         reallocate(used + (len * 2));
1614                 u32 start = used;
1615                 used += len;
1616
1617                 // Copy the string now.
1618                 unicode::EUTF_ENDIAN m_end = getEndianness();
1619                 for (u32 l = start; l < start + len; ++l)
1620                 {
1621                         array[l] = (uchar16_t)c2[l];
1622                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1623                                 array[l] = unicode::swapEndian16(array[l]);
1624                 }
1625
1626                 array[used] = 0;
1627
1628                 // Validate our new UTF-16 string.
1629                 validate();
1630                 return *this;
1631         }
1632
1633
1634         //! Appends a UTF-32 string to this ustring16
1635         //! \param other The UTF-32 string to append.
1636         //! \param length The length of the string to append.
1637         //! \return A reference to our current string.
1638         ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1639         {
1640                 if (!other)
1641                         return *this;
1642
1643                 // Check for the BOM to determine the string's endianness.
1644                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1645                 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1646                         c_end = unicode::EUTFEE_LITTLE;
1647                 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1648                         c_end = unicode::EUTFEE_BIG;
1649
1650                 // If a BOM was found, don't include it in the string.
1651                 const uchar32_t* c2 = other;
1652                 if (c_end != unicode::EUTFEE_NATIVE)
1653                 {
1654                         c2 = other + unicode::BOM_UTF32_LEN;
1655                         length -= unicode::BOM_UTF32_LEN;
1656                 }
1657
1658                 // Calculate the size of the string to read in.
1659                 u32 len = 0;
1660                 const uchar32_t* p = c2;
1661                 do
1662                 {
1663                         ++len;
1664                 } while(*p++ && len < length);
1665                 if (len > length)
1666                         len = length;
1667
1668                 // If we need to grow the size of the array, do it now.
1669                 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1670                 if (used + (len * 2) >= allocated)
1671                         reallocate(used + ((len * 2) * 2));
1672                 u32 start = used;
1673
1674                 // Convert UTF-32 to UTF-16.
1675                 unicode::EUTF_ENDIAN m_end = getEndianness();
1676                 u32 pos = start;
1677                 for (u32 l = 0; l<len; ++l)
1678                 {
1679                         ++used;
1680
1681                         uchar32_t ch = c2[l];
1682                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1683                                 ch = unicode::swapEndian32(ch);
1684
1685                         if (ch > 0xFFFF)
1686                         {
1687                                 // Split ch up into a surrogate pair as it is over 16 bits long.
1688                                 uchar16_t x = static_cast<uchar16_t>(ch);
1689                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1690                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1691                                 array[pos++] = vh;
1692                                 array[pos++] = vl;
1693                                 ++used;         // Using two shorts, so increased used again.
1694                         }
1695                         else if (ch >= 0xD800 && ch <= 0xDFFF)
1696                         {
1697                                 // Between possible UTF-16 surrogates (invalid!)
1698                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1699                         }
1700                         else array[pos++] = static_cast<uchar16_t>(ch);
1701                 }
1702                 array[used] = 0;
1703
1704                 // Validate our new UTF-16 string.
1705                 validate();
1706
1707                 return *this;
1708         }
1709
1710
1711         //! Appends a ustring16 to this ustring16
1712         //! \param other The string to append to this one.
1713         //! \return A reference to our current string.
1714         ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1715         {
1716                 const uchar16_t* oa = other.c_str();
1717
1718                 u32 len = other.size_raw();
1719
1720                 if (used + len >= allocated)
1721                         reallocate(used + len);
1722
1723                 for (u32 l=0; l<len; ++l)
1724                         array[used+l] = oa[l];
1725
1726                 used += len;
1727                 array[used] = 0;
1728
1729                 return *this;
1730         }
1731
1732
1733         //! Appends a certain amount of characters of a ustring16 to this ustring16.
1734         //! \param other The string to append to this one.
1735         //! \param length How many characters of the other string to add to this one.
1736         //! \return A reference to our current string.
1737         ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1738         {
1739                 if (other.size() == 0)
1740                         return *this;
1741
1742                 if (other.size() < length)
1743                 {
1744                         append(other);
1745                         return *this;
1746                 }
1747
1748                 if (used + length * 2 >= allocated)
1749                         reallocate(used + length * 2);
1750
1751                 const_iterator iter(other, 0);
1752                 u32 l = length;
1753                 while (!iter.atEnd() && l)
1754                 {
1755                         uchar32_t c = *iter;
1756                         append(c);
1757                         ++iter;
1758                         --l;
1759                 }
1760
1761                 return *this;
1762         }
1763
1764
1765         //! Reserves some memory.
1766         //! \param count The amount of characters to reserve.
1767         void reserve(u32 count)
1768         {
1769                 if (count < allocated)
1770                         return;
1771
1772                 reallocate(count);
1773         }
1774
1775
1776         //! Finds first occurrence of character.
1777         //! \param c The character to search for.
1778         //! \return Position where the character has been found, or -1 if not found.
1779         s32 findFirst(uchar32_t c) const
1780         {
1781                 const_iterator i(*this, 0);
1782
1783                 s32 pos = 0;
1784                 while (!i.atEnd())
1785                 {
1786                         uchar32_t t = *i;
1787                         if (c == t)
1788                                 return pos;
1789                         ++pos;
1790                         ++i;
1791                 }
1792
1793                 return -1;
1794         }
1795
1796         //! Finds first occurrence of a character of a list.
1797         //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1798         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1799         //! \return Position where one of the characters has been found, or -1 if not found.
1800         s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1801         {
1802                 if (!c || !count)
1803                         return -1;
1804
1805                 const_iterator i(*this, 0);
1806
1807                 s32 pos = 0;
1808                 while (!i.atEnd())
1809                 {
1810                         uchar32_t t = *i;
1811                         for (u32 j=0; j<count; ++j)
1812                                 if (t == c[j])
1813                                         return pos;
1814                         ++pos;
1815                         ++i;
1816                 }
1817
1818                 return -1;
1819         }
1820
1821
1822         //! Finds first position of a character not in a given list.
1823         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1824         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1825         //! \return Position where the character has been found, or -1 if not found.
1826         s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1827         {
1828                 if (!c || !count)
1829                         return -1;
1830
1831                 const_iterator i(*this, 0);
1832
1833                 s32 pos = 0;
1834                 while (!i.atEnd())
1835                 {
1836                         uchar32_t t = *i;
1837                         u32 j;
1838                         for (j=0; j<count; ++j)
1839                                 if (t == c[j])
1840                                         break;
1841
1842                         if (j==count)
1843                                 return pos;
1844                         ++pos;
1845                         ++i;
1846                 }
1847
1848                 return -1;
1849         }
1850
1851         //! Finds last position of a character not in a given list.
1852         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1853         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1854         //! \return Position where the character has been found, or -1 if not found.
1855         s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1856         {
1857                 if (!c || !count)
1858                         return -1;
1859
1860                 const_iterator i(end());
1861                 --i;
1862
1863                 s32 pos = size() - 1;
1864                 while (!i.atStart())
1865                 {
1866                         uchar32_t t = *i;
1867                         u32 j;
1868                         for (j=0; j<count; ++j)
1869                                 if (t == c[j])
1870                                         break;
1871
1872                         if (j==count)
1873                                 return pos;
1874                         --pos;
1875                         --i;
1876                 }
1877
1878                 return -1;
1879         }
1880
1881         //! Finds next occurrence of character.
1882         //! \param c The character to search for.
1883         //! \param startPos The position in the string to start searching.
1884         //! \return Position where the character has been found, or -1 if not found.
1885         s32 findNext(uchar32_t c, u32 startPos) const
1886         {
1887                 const_iterator i(*this, startPos);
1888
1889                 s32 pos = startPos;
1890                 while (!i.atEnd())
1891                 {
1892                         uchar32_t t = *i;
1893                         if (t == c)
1894                                 return pos;
1895                         ++pos;
1896                         ++i;
1897                 }
1898
1899                 return -1;
1900         }
1901
1902
1903         //! Finds last occurrence of character.
1904         //! \param c The character to search for.
1905         //! \param start The start position of the reverse search ( default = -1, on end ).
1906         //! \return Position where the character has been found, or -1 if not found.
1907         s32 findLast(uchar32_t c, s32 start = -1) const
1908         {
1909                 u32 s = size();
1910                 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1911
1912                 const_iterator i(*this, start);
1913                 u32 pos = start;
1914                 while (!i.atStart())
1915                 {
1916                         uchar32_t t = *i;
1917                         if (t == c)
1918                                 return pos;
1919                         --pos;
1920                         --i;
1921                 }
1922
1923                 return -1;
1924         }
1925
1926         //! Finds last occurrence of a character in a list.
1927         //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1928         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1929         //! \return Position where one of the characters has been found, or -1 if not found.
1930         s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1931         {
1932                 if (!c || !count)
1933                         return -1;
1934
1935                 const_iterator i(end());
1936                 --i;
1937
1938                 s32 pos = size();
1939                 while (!i.atStart())
1940                 {
1941                         uchar32_t t = *i;
1942                         for (u32 j=0; j<count; ++j)
1943                                 if (t == c[j])
1944                                         return pos;
1945                         --pos;
1946                         --i;
1947                 }
1948
1949                 return -1;
1950         }
1951
1952
1953         //! Finds another ustring16 in this ustring16.
1954         //! \param str The string to find.
1955         //! \param start The start position of the search.
1956         //! \return Positions where the ustring16 has been found, or -1 if not found.
1957         s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1958         {
1959                 u32 my_size = size();
1960                 u32 their_size = str.size();
1961
1962                 if (their_size == 0 || my_size - start < their_size)
1963                         return -1;
1964
1965                 const_iterator i(*this, start);
1966
1967                 s32 pos = start;
1968                 while (!i.atEnd())
1969                 {
1970                         const_iterator i2(i);
1971                         const_iterator j(str, 0);
1972                         uchar32_t t1 = (uchar32_t)*i2;
1973                         uchar32_t t2 = (uchar32_t)*j;
1974                         while (t1 == t2)
1975                         {
1976                                 ++i2;
1977                                 ++j;
1978                                 if (j.atEnd())
1979                                         return pos;
1980                                 t1 = (uchar32_t)*i2;
1981                                 t2 = (uchar32_t)*j;
1982                         }
1983                         ++i;
1984                         ++pos;
1985                 }
1986
1987                 return -1;
1988         }
1989
1990
1991         //! Finds another ustring16 in this ustring16.
1992         //! \param str The string to find.
1993         //! \param start The start position of the search.
1994         //! \return Positions where the string has been found, or -1 if not found.
1995         s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1996         {
1997                 const uchar16_t* data = str.c_str();
1998                 if (data && *data)
1999                 {
2000                         u32 len = 0;
2001
2002                         while (data[len])
2003                                 ++len;
2004
2005                         if (len > used)
2006                                 return -1;
2007
2008                         for (u32 i=start; i<=used-len; ++i)
2009                         {
2010                                 u32 j=0;
2011
2012                                 while(data[j] && array[i+j] == data[j])
2013                                         ++j;
2014
2015                                 if (!data[j])
2016                                         return i;
2017                         }
2018                 }
2019
2020                 return -1;
2021         }
2022
2023
2024         //! Returns a substring.
2025         //! \param begin: Start of substring.
2026         //! \param length: Length of substring.
2027         //! \return A reference to our current string.
2028         ustring16<TAlloc> subString(u32 begin, s32 length) const
2029         {
2030                 u32 len = size();
2031                 // if start after ustring16
2032                 // or no proper substring length
2033                 if ((length <= 0) || (begin>=len))
2034                         return ustring16<TAlloc>("");
2035                 // clamp length to maximal value
2036                 if ((length+begin) > len)
2037                         length = len-begin;
2038
2039                 ustring16<TAlloc> o;
2040                 o.reserve((length+1) * 2);
2041
2042                 const_iterator i(*this, begin);
2043                 while (!i.atEnd() && length)
2044                 {
2045                         o.append(*i);
2046                         ++i;
2047                         --length;
2048                 }
2049
2050                 return o;
2051         }
2052
2053
2054         //! Appends a character to this ustring16.
2055         //! \param c Character to append.
2056         //! \return A reference to our current string.
2057         ustring16<TAlloc>& operator += (char c)
2058         {
2059                 append((uchar32_t)c);
2060                 return *this;
2061         }
2062
2063
2064         //! Appends a character to this ustring16.
2065         //! \param c Character to append.
2066         //! \return A reference to our current string.
2067         ustring16<TAlloc>& operator += (uchar32_t c)
2068         {
2069                 append(c);
2070                 return *this;
2071         }
2072
2073
2074         //! Appends a number to this ustring16.
2075         //! \param c Number to append.
2076         //! \return A reference to our current string.
2077         ustring16<TAlloc>& operator += (short c)
2078         {
2079                 append(core::stringc(c));
2080                 return *this;
2081         }
2082
2083
2084         //! Appends a number to this ustring16.
2085         //! \param c Number to append.
2086         //! \return A reference to our current string.
2087         ustring16<TAlloc>& operator += (unsigned short c)
2088         {
2089                 append(core::stringc(c));
2090                 return *this;
2091         }
2092
2093
2094 #ifdef USTRING_CPP0X_NEWLITERALS
2095         //! Appends a number to this ustring16.
2096         //! \param c Number to append.
2097         //! \return A reference to our current string.
2098         ustring16<TAlloc>& operator += (int c)
2099         {
2100                 append(core::stringc(c));
2101                 return *this;
2102         }
2103
2104
2105         //! Appends a number to this ustring16.
2106         //! \param c Number to append.
2107         //! \return A reference to our current string.
2108         ustring16<TAlloc>& operator += (unsigned int c)
2109         {
2110                 append(core::stringc(c));
2111                 return *this;
2112         }
2113 #endif
2114
2115
2116         //! Appends a number to this ustring16.
2117         //! \param c Number to append.
2118         //! \return A reference to our current string.
2119         ustring16<TAlloc>& operator += (long c)
2120         {
2121                 append(core::stringc(c));
2122                 return *this;
2123         }
2124
2125
2126         //! Appends a number to this ustring16.
2127         //! \param c Number to append.
2128         //! \return A reference to our current string.
2129         ustring16<TAlloc>& operator += (unsigned long c)
2130         {
2131                 append(core::stringc(c));
2132                 return *this;
2133         }
2134
2135
2136         //! Appends a number to this ustring16.
2137         //! \param c Number to append.
2138         //! \return A reference to our current string.
2139         ustring16<TAlloc>& operator += (double c)
2140         {
2141                 append(core::stringc(c));
2142                 return *this;
2143         }
2144
2145
2146         //! Appends a char ustring16 to this ustring16.
2147         //! \param c Char ustring16 to append.
2148         //! \return A reference to our current string.
2149         ustring16<TAlloc>& operator += (const uchar16_t* const c)
2150         {
2151                 append(c);
2152                 return *this;
2153         }
2154
2155
2156         //! Appends a ustring16 to this ustring16.
2157         //! \param other ustring16 to append.
2158         //! \return A reference to our current string.
2159         ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2160         {
2161                 append(other);
2162                 return *this;
2163         }
2164
2165
2166         //! Replaces all characters of a given type with another one.
2167         //! \param toReplace Character to replace.
2168         //! \param replaceWith Character replacing the old one.
2169         //! \return A reference to our current string.
2170         ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2171         {
2172                 iterator i(*this, 0);
2173                 while (!i.atEnd())
2174                 {
2175                         typename ustring16<TAlloc>::access a = *i;
2176                         if ((uchar32_t)a == toReplace)
2177                                 a = replaceWith;
2178                         ++i;
2179                 }
2180                 return *this;
2181         }
2182
2183
2184         //! Replaces all instances of a string with another one.
2185         //! \param toReplace The string to replace.
2186         //! \param replaceWith The string replacing the old one.
2187         //! \return A reference to our current string.
2188         ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2189         {
2190                 if (toReplace.size() == 0)
2191                         return *this;
2192
2193                 const uchar16_t* other = toReplace.c_str();
2194                 const uchar16_t* replace = replaceWith.c_str();
2195                 const u32 other_size = toReplace.size_raw();
2196                 const u32 replace_size = replaceWith.size_raw();
2197
2198                 // Determine the delta.  The algorithm will change depending on the delta.
2199                 s32 delta = replace_size - other_size;
2200
2201                 // A character for character replace.  The string will not shrink or grow.
2202                 if (delta == 0)
2203                 {
2204                         s32 pos = 0;
2205                         while ((pos = find_raw(other, pos)) != -1)
2206                         {
2207                                 for (u32 i = 0; i < replace_size; ++i)
2208                                         array[pos + i] = replace[i];
2209                                 ++pos;
2210                         }
2211                         return *this;
2212                 }
2213
2214                 // We are going to be removing some characters.  The string will shrink.
2215                 if (delta < 0)
2216                 {
2217                         u32 i = 0;
2218                         for (u32 pos = 0; pos <= used; ++i, ++pos)
2219                         {
2220                                 // Is this potentially a match?
2221                                 if (array[pos] == *other)
2222                                 {
2223                                         // Check to see if we have a match.
2224                                         u32 j;
2225                                         for (j = 0; j < other_size; ++j)
2226                                         {
2227                                                 if (array[pos + j] != other[j])
2228                                                         break;
2229                                         }
2230
2231                                         // If we have a match, replace characters.
2232                                         if (j == other_size)
2233                                         {
2234                                                 for (j = 0; j < replace_size; ++j)
2235                                                         array[i + j] = replace[j];
2236                                                 i += replace_size - 1;
2237                                                 pos += other_size - 1;
2238                                                 continue;
2239                                         }
2240                                 }
2241
2242                                 // No match found, just copy characters.
2243                                 array[i - 1] = array[pos];
2244                         }
2245                         array[i] = 0;
2246                         used = i;
2247
2248                         return *this;
2249                 }
2250
2251                 // We are going to be adding characters, so the string size will increase.
2252                 // Count the number of times toReplace exists in the string so we can allocate the new size.
2253                 u32 find_count = 0;
2254                 s32 pos = 0;
2255                 while ((pos = find_raw(other, pos)) != -1)
2256                 {
2257                         ++find_count;
2258                         ++pos;
2259                 }
2260
2261                 // Re-allocate the string now, if needed.
2262                 u32 len = delta * find_count;
2263                 if (used + len >= allocated)
2264                         reallocate(used + len);
2265
2266                 // Start replacing.
2267                 pos = 0;
2268                 while ((pos = find_raw(other, pos)) != -1)
2269                 {
2270                         uchar16_t* start = array + pos + other_size - 1;
2271                         uchar16_t* ptr   = array + used;
2272                         uchar16_t* end   = array + used + delta;
2273
2274                         // Shift characters to make room for the string.
2275                         while (ptr != start)
2276                         {
2277                                 *end = *ptr;
2278                                 --ptr;
2279                                 --end;
2280                         }
2281
2282                         // Add the new string now.
2283                         for (u32 i = 0; i < replace_size; ++i)
2284                                 array[pos + i] = replace[i];
2285
2286                         pos += replace_size;
2287                         used += delta;
2288                 }
2289
2290                 // Terminate the string and return ourself.
2291                 array[used] = 0;
2292                 return *this;
2293         }
2294
2295
2296         //! Removes characters from a ustring16..
2297         //! \param c The character to remove.
2298         //! \return A reference to our current string.
2299         ustring16<TAlloc>& remove(uchar32_t c)
2300         {
2301                 u32 pos = 0;
2302                 u32 found = 0;
2303                 u32 len = (c > 0xFFFF ? 2 : 1);         // Remove characters equal to the size of c as a UTF-16 character.
2304                 for (u32 i=0; i<=used; ++i)
2305                 {
2306                         uchar32_t uc32 = 0;
2307                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2308                                 uc32 |= array[i];
2309                         else if (i + 1 <= used)
2310                         {
2311                                 // Convert the surrogate pair into a single UTF-32 character.
2312                                 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2313                         }
2314                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2315
2316                         if (uc32 == c)
2317                         {
2318                                 found += len;
2319                                 continue;
2320                         }
2321
2322                         array[pos++] = array[i];
2323                         if (len2 == 2)
2324                                 array[pos++] = array[++i];
2325                 }
2326                 used -= found;
2327                 array[used] = 0;
2328                 return *this;
2329         }
2330
2331
2332         //! Removes a ustring16 from the ustring16.
2333         //! \param toRemove The string to remove.
2334         //! \return A reference to our current string.
2335         ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2336         {
2337                 u32 size = toRemove.size_raw();
2338                 if (size == 0) return *this;
2339
2340                 const uchar16_t* tra = toRemove.c_str();
2341                 u32 pos = 0;
2342                 u32 found = 0;
2343                 for (u32 i=0; i<=used; ++i)
2344                 {
2345                         u32 j = 0;
2346                         while (j < size)
2347                         {
2348                                 if (array[i + j] != tra[j])
2349                                         break;
2350                                 ++j;
2351                         }
2352                         if (j == size)
2353                         {
2354                                 found += size;
2355                                 i += size - 1;
2356                                 continue;
2357                         }
2358
2359                         array[pos++] = array[i];
2360                 }
2361                 used -= found;
2362                 array[used] = 0;
2363                 return *this;
2364         }
2365
2366
2367         //! Removes characters from the ustring16.
2368         //! \param characters The characters to remove.
2369         //! \return A reference to our current string.
2370         ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2371         {
2372                 if (characters.size_raw() == 0)
2373                         return *this;
2374
2375                 u32 pos = 0;
2376                 u32 found = 0;
2377                 const_iterator iter(characters);
2378                 for (u32 i=0; i<=used; ++i)
2379                 {
2380                         uchar32_t uc32 = 0;
2381                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2382                                 uc32 |= array[i];
2383                         else if (i + 1 <= used)
2384                         {
2385                                 // Convert the surrogate pair into a single UTF-32 character.
2386                                 uc32 = unicode::toUTF32(array[i], array[i+1]);
2387                         }
2388                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2389
2390                         bool cont = false;
2391                         iter.toStart();
2392                         while (!iter.atEnd())
2393                         {
2394                                 uchar32_t c = *iter;
2395                                 if (uc32 == c)
2396                                 {
2397                                         found += (c > 0xFFFF ? 2 : 1);          // Remove characters equal to the size of c as a UTF-16 character.
2398                                         ++i;
2399                                         cont = true;
2400                                         break;
2401                                 }
2402                                 ++iter;
2403                         }
2404                         if (cont) continue;
2405
2406                         array[pos++] = array[i];
2407                         if (len2 == 2)
2408                                 array[pos++] = array[++i];
2409                 }
2410                 used -= found;
2411                 array[used] = 0;
2412                 return *this;
2413         }
2414
2415
2416         //! Trims the ustring16.
2417         //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2418         //! \param whitespace The characters that are to be considered as whitespace.
2419         //! \return A reference to our current string.
2420         ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2421         {
2422                 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2423
2424                 // find start and end of the substring without the specified characters
2425                 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2426                 if (begin == -1)
2427                         return (*this="");
2428
2429                 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2430
2431                 return (*this = subString(begin, (end +1) - begin));
2432         }
2433
2434
2435         //! Erases a character from the ustring16.
2436         //! May be slow, because all elements following after the erased element have to be copied.
2437         //! \param index Index of element to be erased.
2438         //! \return A reference to our current string.
2439         ustring16<TAlloc>& erase(u32 index)
2440         {
2441                 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2442
2443                 iterator i(*this, index);
2444
2445                 uchar32_t t = *i;
2446                 u32 len = (t > 0xFFFF ? 2 : 1);
2447
2448                 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2449                         array[j - len] = array[j];
2450
2451                 used -= len;
2452                 array[used] = 0;
2453
2454                 return *this;
2455         }
2456
2457
2458         //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2459         //! \return A reference to our current string.
2460         ustring16<TAlloc>& validate()
2461         {
2462                 // Validate all unicode characters.
2463                 for (u32 i=0; i<allocated; ++i)
2464                 {
2465                         // Terminate on existing null.
2466                         if (array[i] == 0)
2467                         {
2468                                 used = i;
2469                                 return *this;
2470                         }
2471                         if (UTF16_IS_SURROGATE(array[i]))
2472                         {
2473                                 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2474                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2475                                 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2476                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2477                                 ++i;
2478                         }
2479                         if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2480                                 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2481                 }
2482
2483                 // terminate
2484                 used = 0;
2485                 if (allocated > 0)
2486                 {
2487                         used = allocated - 1;
2488                         array[used] = 0;
2489                 }
2490                 return *this;
2491         }
2492
2493
2494         //! Gets the last char of the ustring16, or 0.
2495         //! \return The last char of the ustring16, or 0.
2496         uchar32_t lastChar() const
2497         {
2498                 if (used < 1)
2499                         return 0;
2500
2501                 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2502                 {
2503                         // Make sure we have a paired surrogate.
2504                         if (used < 2)
2505                                 return 0;
2506
2507                         // Check for an invalid surrogate.
2508                         if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2509                                 return 0;
2510
2511                         // Convert the surrogate pair into a single UTF-32 character.
2512                         return unicode::toUTF32(array[used-2], array[used-1]);
2513                 }
2514                 else
2515                 {
2516                         return array[used-1];
2517                 }
2518         }
2519
2520
2521         //! Split the ustring16 into parts.
2522         /** This method will split a ustring16 at certain delimiter characters
2523         into the container passed in as reference. The type of the container
2524         has to be given as template parameter. It must provide a push_back and
2525         a size method.
2526         \param ret The result container
2527         \param c C-style ustring16 of delimiter characters
2528         \param count Number of delimiter characters
2529         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2530         container. If two delimiters occur without a character in between, an
2531         empty substring would be placed in the result. If this flag is set,
2532         only non-empty strings are stored.
2533         \param keepSeparators Flag which allows to add the separator to the
2534         result ustring16. If this flag is true, the concatenation of the
2535         substrings results in the original ustring16. Otherwise, only the
2536         characters between the delimiters are returned.
2537         \return The number of resulting substrings
2538         */
2539         template<class container>
2540         u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2541         {
2542                 if (!c)
2543                         return 0;
2544
2545                 const_iterator i(*this);
2546                 const u32 oldSize=ret.size();
2547                 u32 pos = 0;
2548                 u32 lastpos = 0;
2549                 u32 lastpospos = 0;
2550                 bool lastWasSeparator = false;
2551                 while (!i.atEnd())
2552                 {
2553                         uchar32_t ch = *i;
2554                         bool foundSeparator = false;
2555                         for (u32 j=0; j<count; ++j)
2556                         {
2557                                 if (ch == c[j])
2558                                 {
2559                                         if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2560                                                         !lastWasSeparator)
2561                                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2562                                         foundSeparator = true;
2563                                         lastpos = (keepSeparators ? pos : pos + 1);
2564                                         lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2565                                         break;
2566                                 }
2567                         }
2568                         lastWasSeparator = foundSeparator;
2569                         ++pos;
2570                         ++i;
2571                 }
2572                 u32 s = size() + 1;
2573                 if (s > lastpos)
2574                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2575                 return ret.size()-oldSize;
2576         }
2577
2578
2579         //! Split the ustring16 into parts.
2580         /** This method will split a ustring16 at certain delimiter characters
2581         into the container passed in as reference. The type of the container
2582         has to be given as template parameter. It must provide a push_back and
2583         a size method.
2584         \param ret The result container
2585         \param c A unicode string of delimiter characters
2586         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2587         container. If two delimiters occur without a character in between, an
2588         empty substring would be placed in the result. If this flag is set,
2589         only non-empty strings are stored.
2590         \param keepSeparators Flag which allows to add the separator to the
2591         result ustring16. If this flag is true, the concatenation of the
2592         substrings results in the original ustring16. Otherwise, only the
2593         characters between the delimiters are returned.
2594         \return The number of resulting substrings
2595         */
2596         template<class container>
2597         u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2598         {
2599                 core::array<uchar32_t> v = c.toUTF32();
2600                 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2601         }
2602
2603
2604         //! Gets the size of the allocated memory buffer for the string.
2605         //! \return The size of the allocated memory buffer.
2606         u32 capacity() const
2607         {
2608                 return allocated;
2609         }
2610
2611
2612         //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2613         //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2614         u32 size_raw() const
2615         {
2616                 return used;
2617         }
2618
2619
2620         //! Inserts a character into the string.
2621         //! \param c The character to insert.
2622         //! \param pos The position to insert the character.
2623         //! \return A reference to our current string.
2624         ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2625         {
2626                 u8 len = (c > 0xFFFF ? 2 : 1);
2627
2628                 if (used + len >= allocated)
2629                         reallocate(used + len);
2630
2631                 used += len;
2632
2633                 iterator iter(*this, pos);
2634                 for (u32 i = used - 2; i > iter.getPos(); --i)
2635                         array[i] = array[i - len];
2636
2637                 if (c > 0xFFFF)
2638                 {
2639                         // c will be multibyte, so split it up into a surrogate pair.
2640                         uchar16_t x = static_cast<uchar16_t>(c);
2641                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2642                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2643                         array[iter.getPos()] = vh;
2644                         array[iter.getPos()+1] = vl;
2645                 }
2646                 else
2647                 {
2648                         array[iter.getPos()] = static_cast<uchar16_t>(c);
2649                 }
2650                 array[used] = 0;
2651                 return *this;
2652         }
2653
2654
2655         //! Inserts a string into the string.
2656         //! \param c The string to insert.
2657         //! \param pos The position to insert the string.
2658         //! \return A reference to our current string.
2659         ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2660         {
2661                 u32 len = c.size_raw();
2662                 if (len == 0) return *this;
2663
2664                 if (used + len >= allocated)
2665                         reallocate(used + len);
2666
2667                 used += len;
2668
2669                 iterator iter(*this, pos);
2670                 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2671                         array[i] = array[i - len];
2672
2673                 const uchar16_t* s = c.c_str();
2674                 for (u32 i = 0; i < len; ++i)
2675                 {
2676                         array[pos++] = *s;
2677                         ++s;
2678                 }
2679
2680                 array[used] = 0;
2681                 return *this;
2682         }
2683
2684
2685         //! Inserts a character into the string.
2686         //! \param c The character to insert.
2687         //! \param pos The position to insert the character.
2688         //! \return A reference to our current string.
2689         ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2690         {
2691                 if (used + 1 >= allocated)
2692                         reallocate(used + 1);
2693
2694                 ++used;
2695
2696                 for (u32 i = used - 1; i > pos; --i)
2697                         array[i] = array[i - 1];
2698
2699                 array[pos] = c;
2700                 array[used] = 0;
2701                 return *this;
2702         }
2703
2704
2705         //! Removes a character from string.
2706         //! \param pos Position of the character to remove.
2707         //! \return A reference to our current string.
2708         ustring16<TAlloc>& erase_raw(u32 pos)
2709         {
2710                 for (u32 i=pos; i<=used; ++i)
2711                 {
2712                         array[i] = array[i + 1];
2713                 }
2714                 --used;
2715                 array[used] = 0;
2716                 return *this;
2717         }
2718
2719
2720         //! Replaces a character in the string.
2721         //! \param c The new character.
2722         //! \param pos The position of the character to replace.
2723         //! \return A reference to our current string.
2724         ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2725         {
2726                 array[pos] = c;
2727                 return *this;
2728         }
2729
2730
2731         //! Returns an iterator to the beginning of the string.
2732         //! \return An iterator to the beginning of the string.
2733         iterator begin()
2734         {
2735                 iterator i(*this, 0);
2736                 return i;
2737         }
2738
2739
2740         //! Returns an iterator to the beginning of the string.
2741         //! \return An iterator to the beginning of the string.
2742         const_iterator begin() const
2743         {
2744                 const_iterator i(*this, 0);
2745                 return i;
2746         }
2747
2748
2749         //! Returns an iterator to the beginning of the string.
2750         //! \return An iterator to the beginning of the string.
2751         const_iterator cbegin() const
2752         {
2753                 const_iterator i(*this, 0);
2754                 return i;
2755         }
2756
2757
2758         //! Returns an iterator to the end of the string.
2759         //! \return An iterator to the end of the string.
2760         iterator end()
2761         {
2762                 iterator i(*this, 0);
2763                 i.toEnd();
2764                 return i;
2765         }
2766
2767
2768         //! Returns an iterator to the end of the string.
2769         //! \return An iterator to the end of the string.
2770         const_iterator end() const
2771         {
2772                 const_iterator i(*this, 0);
2773                 i.toEnd();
2774                 return i;
2775         }
2776
2777
2778         //! Returns an iterator to the end of the string.
2779         //! \return An iterator to the end of the string.
2780         const_iterator cend() const
2781         {
2782                 const_iterator i(*this, 0);
2783                 i.toEnd();
2784                 return i;
2785         }
2786
2787
2788         //! Converts the string to a UTF-8 encoded string.
2789         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2790         //! \return A string containing the UTF-8 encoded string.
2791         core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2792         {
2793                 core::string<uchar8_t> ret;
2794                 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2795                 const_iterator iter(*this, 0);
2796
2797                 // Add the byte order mark if the user wants it.
2798                 if (addBOM)
2799                 {
2800                         ret.append(unicode::BOM_ENCODE_UTF8[0]);
2801                         ret.append(unicode::BOM_ENCODE_UTF8[1]);
2802                         ret.append(unicode::BOM_ENCODE_UTF8[2]);
2803                 }
2804
2805                 while (!iter.atEnd())
2806                 {
2807                         uchar32_t c = *iter;
2808                         if (c > 0xFFFF)
2809                         {       // 4 bytes
2810                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2811                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2812                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2813                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2814                                 ret.append(b1);
2815                                 ret.append(b2);
2816                                 ret.append(b3);
2817                                 ret.append(b4);
2818                         }
2819                         else if (c > 0x7FF)
2820                         {       // 3 bytes
2821                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2822                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2823                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2824                                 ret.append(b1);
2825                                 ret.append(b2);
2826                                 ret.append(b3);
2827                         }
2828                         else if (c > 0x7F)
2829                         {       // 2 bytes
2830                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2831                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2832                                 ret.append(b1);
2833                                 ret.append(b2);
2834                         }
2835                         else
2836                         {       // 1 byte
2837                                 ret.append(static_cast<uchar8_t>(c));
2838                         }
2839                         ++iter;
2840                 }
2841                 return ret;
2842         }
2843
2844
2845         //! Converts the string to a UTF-8 encoded string array.
2846         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2847         //! \return An array containing the UTF-8 encoded string.
2848         core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2849         {
2850                 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2851                 const_iterator iter(*this, 0);
2852
2853                 // Add the byte order mark if the user wants it.
2854                 if (addBOM)
2855                 {
2856                         ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2857                         ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2858                         ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2859                 }
2860
2861                 while (!iter.atEnd())
2862                 {
2863                         uchar32_t c = *iter;
2864                         if (c > 0xFFFF)
2865                         {       // 4 bytes
2866                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2867                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2868                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2869                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2870                                 ret.push_back(b1);
2871                                 ret.push_back(b2);
2872                                 ret.push_back(b3);
2873                                 ret.push_back(b4);
2874                         }
2875                         else if (c > 0x7FF)
2876                         {       // 3 bytes
2877                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2878                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2879                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2880                                 ret.push_back(b1);
2881                                 ret.push_back(b2);
2882                                 ret.push_back(b3);
2883                         }
2884                         else if (c > 0x7F)
2885                         {       // 2 bytes
2886                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2887                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2888                                 ret.push_back(b1);
2889                                 ret.push_back(b2);
2890                         }
2891                         else
2892                         {       // 1 byte
2893                                 ret.push_back(static_cast<uchar8_t>(c));
2894                         }
2895                         ++iter;
2896                 }
2897                 ret.push_back(0);
2898                 return ret;
2899         }
2900
2901
2902 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2903         //! Converts the string to a UTF-16 encoded string.
2904         //! \param endian The desired endianness of the string.
2905         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2906         //! \return A string containing the UTF-16 encoded string.
2907         core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2908         {
2909                 core::string<char16_t> ret;
2910                 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2911
2912                 // Add the BOM if specified.
2913                 if (addBOM)
2914                 {
2915                         if (endian == unicode::EUTFEE_NATIVE)
2916                                 ret[0] = unicode::BOM;
2917                         else if (endian == unicode::EUTFEE_LITTLE)
2918                         {
2919                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2920                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2921                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2922                         }
2923                         else
2924                         {
2925                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2926                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2927                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2928                         }
2929                 }
2930
2931                 ret.append(array);
2932                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2933                 {
2934                         char16_t* ptr = ret.c_str();
2935                         for (u32 i = 0; i < ret.size(); ++i)
2936                                 *ptr++ = unicode::swapEndian16(*ptr);
2937                 }
2938                 return ret;
2939         }
2940 #endif
2941
2942
2943         //! Converts the string to a UTF-16 encoded string array.
2944         //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2945         //! \param endian The desired endianness of the string.
2946         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2947         //! \return An array containing the UTF-16 encoded string.
2948         core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2949         {
2950                 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2951                 uchar16_t* ptr = ret.pointer();
2952
2953                 // Add the BOM if specified.
2954                 if (addBOM)
2955                 {
2956                         if (endian == unicode::EUTFEE_NATIVE)
2957                                 *ptr = unicode::BOM;
2958                         else if (endian == unicode::EUTFEE_LITTLE)
2959                         {
2960                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2961                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2962                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2963                         }
2964                         else
2965                         {
2966                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2967                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2968                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2969                         }
2970                         ++ptr;
2971                 }
2972
2973                 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2974                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2975                 {
2976                         for (u32 i = 0; i <= used; ++i)
2977                                 ptr[i] = unicode::swapEndian16(ptr[i]);
2978                 }
2979                 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2980                 ret.push_back(0);
2981                 return ret;
2982         }
2983
2984
2985 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2986         //! Converts the string to a UTF-32 encoded string.
2987         //! \param endian The desired endianness of the string.
2988         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2989         //! \return A string containing the UTF-32 encoded string.
2990         core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2991         {
2992                 core::string<char32_t> ret;
2993                 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2994                 const_iterator iter(*this, 0);
2995
2996                 // Add the BOM if specified.
2997                 if (addBOM)
2998                 {
2999                         if (endian == unicode::EUTFEE_NATIVE)
3000                                 ret.append(unicode::BOM);
3001                         else
3002                         {
3003                                 union
3004                                 {
3005                                         uchar32_t full;
3006                                         u8 chunk[4];
3007                                 } t;
3008
3009                                 if (endian == unicode::EUTFEE_LITTLE)
3010                                 {
3011                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3012                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3013                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3014                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3015                                 }
3016                                 else
3017                                 {
3018                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3019                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3020                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3021                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3022                                 }
3023                                 ret.append(t.full);
3024                         }
3025                 }
3026
3027                 while (!iter.atEnd())
3028                 {
3029                         uchar32_t c = *iter;
3030                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3031                                 c = unicode::swapEndian32(c);
3032                         ret.append(c);
3033                         ++iter;
3034                 }
3035                 return ret;
3036         }
3037 #endif
3038
3039
3040         //! Converts the string to a UTF-32 encoded string array.
3041         //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3042         //! \param endian The desired endianness of the string.
3043         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3044         //! \return An array containing the UTF-32 encoded string.
3045         core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3046         {
3047                 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3048                 const_iterator iter(*this, 0);
3049
3050                 // Add the BOM if specified.
3051                 if (addBOM)
3052                 {
3053                         if (endian == unicode::EUTFEE_NATIVE)
3054                                 ret.push_back(unicode::BOM);
3055                         else
3056                         {
3057                                 union
3058                                 {
3059                                         uchar32_t full;
3060                                         u8 chunk[4];
3061                                 } t;
3062
3063                                 if (endian == unicode::EUTFEE_LITTLE)
3064                                 {
3065                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3066                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3067                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3068                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3069                                 }
3070                                 else
3071                                 {
3072                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3073                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3074                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3075                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3076                                 }
3077                                 ret.push_back(t.full);
3078                         }
3079                 }
3080                 ret.push_back(0);
3081
3082                 while (!iter.atEnd())
3083                 {
3084                         uchar32_t c = *iter;
3085                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3086                                 c = unicode::swapEndian32(c);
3087                         ret.push_back(c);
3088                         ++iter;
3089                 }
3090                 return ret;
3091         }
3092
3093
3094         //! Converts the string to a wchar_t encoded string.
3095         /** The size of a wchar_t changes depending on the platform.  This function will store a
3096         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3097         //! \param endian The desired endianness of the string.
3098         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3099         //! \return A string containing the wchar_t encoded string.
3100         core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3101         {
3102                 if (sizeof(wchar_t) == 4)
3103                 {
3104                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3105                         core::stringw ret(a.pointer());
3106                         return ret;
3107                 }
3108                 else if (sizeof(wchar_t) == 2)
3109                 {
3110                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3111                         {
3112                                 core::stringw ret(array);
3113                                 return ret;
3114                         }
3115                         else
3116                         {
3117                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3118                                 core::stringw ret(a.pointer());
3119                                 return ret;
3120                         }
3121                 }
3122                 else if (sizeof(wchar_t) == 1)
3123                 {
3124                         core::array<uchar8_t> a(toUTF8(addBOM));
3125                         core::stringw ret(a.pointer());
3126                         return ret;
3127                 }
3128
3129                 // Shouldn't happen.
3130                 return core::stringw();
3131         }
3132
3133
3134         //! Converts the string to a wchar_t encoded string array.
3135         /** The size of a wchar_t changes depending on the platform.  This function will store a
3136         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3137         //! \param endian The desired endianness of the string.
3138         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3139         //! \return An array containing the wchar_t encoded string.
3140         core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3141         {
3142                 if (sizeof(wchar_t) == 4)
3143                 {
3144                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3145                         core::array<wchar_t> ret(a.size());
3146                         ret.set_used(a.size());
3147                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3148                         return ret;
3149                 }
3150                 if (sizeof(wchar_t) == 2)
3151                 {
3152                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3153                         {
3154                                 core::array<wchar_t> ret(used);
3155                                 ret.set_used(used);
3156                                 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3157                                 return ret;
3158                         }
3159                         else
3160                         {
3161                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3162                                 core::array<wchar_t> ret(a.size());
3163                                 ret.set_used(a.size());
3164                                 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3165                                 return ret;
3166                         }
3167                 }
3168                 if (sizeof(wchar_t) == 1)
3169                 {
3170                         core::array<uchar8_t> a(toUTF8(addBOM));
3171                         core::array<wchar_t> ret(a.size());
3172                         ret.set_used(a.size());
3173                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3174                         return ret;
3175                 }
3176
3177                 // Shouldn't happen.
3178                 return core::array<wchar_t>();
3179         }
3180
3181         //! Converts the string to a properly encoded io::path string.
3182         //! \param endian The desired endianness of the string.
3183         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3184         //! \return An io::path string containing the properly encoded string.
3185         io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3186         {
3187 #if defined(_IRR_WCHAR_FILESYSTEM)
3188                 return toWCHAR_s(endian, addBOM);
3189 #else
3190                 return toUTF8_s(addBOM);
3191 #endif
3192         }
3193
3194         //! Loads an unknown stream of data.
3195         //! Will attempt to determine if the stream is unicode data.  Useful for loading from files.
3196         //! \param data The data stream to load from.
3197         //! \param data_size The length of the data string.
3198         //! \return A reference to our current string.
3199         ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3200         {
3201                 // Clear our string.
3202                 *this = "";
3203                 if (!data)
3204                         return *this;
3205
3206                 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3207                 switch (e)
3208                 {
3209                         default:
3210                         case unicode::EUTFE_UTF8:
3211                                 append((uchar8_t*)data, data_size);
3212                                 break;
3213
3214                         case unicode::EUTFE_UTF16:
3215                         case unicode::EUTFE_UTF16_BE:
3216                         case unicode::EUTFE_UTF16_LE:
3217                                 append((uchar16_t*)data, data_size / 2);
3218                                 break;
3219
3220                         case unicode::EUTFE_UTF32:
3221                         case unicode::EUTFE_UTF32_BE:
3222                         case unicode::EUTFE_UTF32_LE:
3223                                 append((uchar32_t*)data, data_size / 4);
3224                                 break;
3225                 }
3226
3227                 return *this;
3228         }
3229
3230         //! Gets the encoding of the Unicode string this class contains.
3231         //! \return An enum describing the current encoding of this string.
3232         const unicode::EUTF_ENCODE getEncoding() const
3233         {
3234                 return encoding;
3235         }
3236
3237         //! Gets the endianness of the Unicode string this class contains.
3238         //! \return An enum describing the endianness of this string.
3239         const unicode::EUTF_ENDIAN getEndianness() const
3240         {
3241                 if (encoding == unicode::EUTFE_UTF16_LE ||
3242                         encoding == unicode::EUTFE_UTF32_LE)
3243                         return unicode::EUTFEE_LITTLE;
3244                 else return unicode::EUTFEE_BIG;
3245         }
3246
3247 private:
3248
3249         //! Reallocate the string, making it bigger or smaller.
3250         //! \param new_size The new size of the string.
3251         void reallocate(u32 new_size)
3252         {
3253                 uchar16_t* old_array = array;
3254
3255                 array = allocator.allocate(new_size + 1); //new u16[new_size];
3256                 allocated = new_size + 1;
3257                 if (old_array == 0) return;
3258
3259                 u32 amount = used < new_size ? used : new_size;
3260                 for (u32 i=0; i<=amount; ++i)
3261                         array[i] = old_array[i];
3262
3263                 if (allocated <= used)
3264                         used = allocated - 1;
3265
3266                 array[used] = 0;
3267
3268                 allocator.deallocate(old_array); // delete [] old_array;
3269         }
3270
3271         //--- member variables
3272
3273         uchar16_t* array;
3274         unicode::EUTF_ENCODE encoding;
3275         u32 allocated;
3276         u32 used;
3277         TAlloc allocator;
3278         //irrAllocator<uchar16_t> allocator;
3279 };
3280
3281 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3282
3283
3284 //! Appends two ustring16s.
3285 template <typename TAlloc>
3286 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3287 {
3288         ustring16<TAlloc> ret(left);
3289         ret += right;
3290         return ret;
3291 }
3292
3293
3294 //! Appends a ustring16 and a null-terminated unicode string.
3295 template <typename TAlloc, class B>
3296 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3297 {
3298         ustring16<TAlloc> ret(left);
3299         ret += right;
3300         return ret;
3301 }
3302
3303
3304 //! Appends a ustring16 and a null-terminated unicode string.
3305 template <class B, typename TAlloc>
3306 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3307 {
3308         ustring16<TAlloc> ret(left);
3309         ret += right;
3310         return ret;
3311 }
3312
3313
3314 //! Appends a ustring16 and an Irrlicht string.
3315 template <typename TAlloc, typename B, typename BAlloc>
3316 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3317 {
3318         ustring16<TAlloc> ret(left);
3319         ret += right;
3320         return ret;
3321 }
3322
3323
3324 //! Appends a ustring16 and an Irrlicht string.
3325 template <typename TAlloc, typename B, typename BAlloc>
3326 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3327 {
3328         ustring16<TAlloc> ret(left);
3329         ret += right;
3330         return ret;
3331 }
3332
3333
3334 //! Appends a ustring16 and a std::basic_string.
3335 template <typename TAlloc, typename B, typename A, typename BAlloc>
3336 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3337 {
3338         ustring16<TAlloc> ret(left);
3339         ret += right;
3340         return ret;
3341 }
3342
3343
3344 //! Appends a ustring16 and a std::basic_string.
3345 template <typename TAlloc, typename B, typename A, typename BAlloc>
3346 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3347 {
3348         ustring16<TAlloc> ret(left);
3349         ret += right;
3350         return ret;
3351 }
3352
3353
3354 //! Appends a ustring16 and a char.
3355 template <typename TAlloc>
3356 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3357 {
3358         ustring16<TAlloc> ret(left);
3359         ret += right;
3360         return ret;
3361 }
3362
3363
3364 //! Appends a ustring16 and a char.
3365 template <typename TAlloc>
3366 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3367 {
3368         ustring16<TAlloc> ret(left);
3369         ret += right;
3370         return ret;
3371 }
3372
3373
3374 #ifdef USTRING_CPP0X_NEWLITERALS
3375 //! Appends a ustring16 and a uchar32_t.
3376 template <typename TAlloc>
3377 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3378 {
3379         ustring16<TAlloc> ret(left);
3380         ret += right;
3381         return ret;
3382 }
3383
3384
3385 //! Appends a ustring16 and a uchar32_t.
3386 template <typename TAlloc>
3387 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3388 {
3389         ustring16<TAlloc> ret(left);
3390         ret += right;
3391         return ret;
3392 }
3393 #endif
3394
3395
3396 //! Appends a ustring16 and a short.
3397 template <typename TAlloc>
3398 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3399 {
3400         ustring16<TAlloc> ret(left);
3401         ret += core::stringc(right);
3402         return ret;
3403 }
3404
3405
3406 //! Appends a ustring16 and a short.
3407 template <typename TAlloc>
3408 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3409 {
3410         ustring16<TAlloc> ret((core::stringc(left)));
3411         ret += right;
3412         return ret;
3413 }
3414
3415
3416 //! Appends a ustring16 and an unsigned short.
3417 template <typename TAlloc>
3418 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3419 {
3420         ustring16<TAlloc> ret(left);
3421         ret += core::stringc(right);
3422         return ret;
3423 }
3424
3425
3426 //! Appends a ustring16 and an unsigned short.
3427 template <typename TAlloc>
3428 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3429 {
3430         ustring16<TAlloc> ret((core::stringc(left)));
3431         ret += right;
3432         return ret;
3433 }
3434
3435
3436 //! Appends a ustring16 and an int.
3437 template <typename TAlloc>
3438 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3439 {
3440         ustring16<TAlloc> ret(left);
3441         ret += core::stringc(right);
3442         return ret;
3443 }
3444
3445
3446 //! Appends a ustring16 and an int.
3447 template <typename TAlloc>
3448 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3449 {
3450         ustring16<TAlloc> ret((core::stringc(left)));
3451         ret += right;
3452         return ret;
3453 }
3454
3455
3456 //! Appends a ustring16 and an unsigned int.
3457 template <typename TAlloc>
3458 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3459 {
3460         ustring16<TAlloc> ret(left);
3461         ret += core::stringc(right);
3462         return ret;
3463 }
3464
3465
3466 //! Appends a ustring16 and an unsigned int.
3467 template <typename TAlloc>
3468 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3469 {
3470         ustring16<TAlloc> ret((core::stringc(left)));
3471         ret += right;
3472         return ret;
3473 }
3474
3475
3476 //! Appends a ustring16 and a long.
3477 template <typename TAlloc>
3478 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3479 {
3480         ustring16<TAlloc> ret(left);
3481         ret += core::stringc(right);
3482         return ret;
3483 }
3484
3485
3486 //! Appends a ustring16 and a long.
3487 template <typename TAlloc>
3488 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3489 {
3490         ustring16<TAlloc> ret((core::stringc(left)));
3491         ret += right;
3492         return ret;
3493 }
3494
3495
3496 //! Appends a ustring16 and an unsigned long.
3497 template <typename TAlloc>
3498 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3499 {
3500         ustring16<TAlloc> ret(left);
3501         ret += core::stringc(right);
3502         return ret;
3503 }
3504
3505
3506 //! Appends a ustring16 and an unsigned long.
3507 template <typename TAlloc>
3508 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3509 {
3510         ustring16<TAlloc> ret((core::stringc(left)));
3511         ret += right;
3512         return ret;
3513 }
3514
3515
3516 //! Appends a ustring16 and a float.
3517 template <typename TAlloc>
3518 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3519 {
3520         ustring16<TAlloc> ret(left);
3521         ret += core::stringc(right);
3522         return ret;
3523 }
3524
3525
3526 //! Appends a ustring16 and a float.
3527 template <typename TAlloc>
3528 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3529 {
3530         ustring16<TAlloc> ret((core::stringc(left)));
3531         ret += right;
3532         return ret;
3533 }
3534
3535
3536 //! Appends a ustring16 and a double.
3537 template <typename TAlloc>
3538 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3539 {
3540         ustring16<TAlloc> ret(left);
3541         ret += core::stringc(right);
3542         return ret;
3543 }
3544
3545
3546 //! Appends a ustring16 and a double.
3547 template <typename TAlloc>
3548 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3549 {
3550         ustring16<TAlloc> ret((core::stringc(left)));
3551         ret += right;
3552         return ret;
3553 }
3554
3555
3556 #ifdef USTRING_CPP0X
3557 //! Appends two ustring16s.
3558 template <typename TAlloc>
3559 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3560 {
3561         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3562         right.insert(left, 0);
3563         return std::move(right);
3564 }
3565
3566
3567 //! Appends two ustring16s.
3568 template <typename TAlloc>
3569 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3570 {
3571         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3572         left.append(right);
3573         return std::move(left);
3574 }
3575
3576
3577 //! Appends two ustring16s.
3578 template <typename TAlloc>
3579 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3580 {
3581         //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3582         if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3583                 (right.capacity() - right.size_raw() < left.size_raw()))
3584         {
3585                 left.append(right);
3586                 return std::move(left);
3587         }
3588         else
3589         {
3590                 right.insert(left, 0);
3591                 return std::move(right);
3592         }
3593 }
3594
3595
3596 //! Appends a ustring16 and a null-terminated unicode string.
3597 template <typename TAlloc, class B>
3598 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3599 {
3600         //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3601         left.append(right);
3602         return std::move(left);
3603 }
3604
3605
3606 //! Appends a ustring16 and a null-terminated unicode string.
3607 template <class B, typename TAlloc>
3608 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3609 {
3610         //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3611         right.insert(left, 0);
3612         return std::move(right);
3613 }
3614
3615
3616 //! Appends a ustring16 and an Irrlicht string.
3617 template <typename TAlloc, typename B, typename BAlloc>
3618 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3619 {
3620         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3621         right.insert(left, 0);
3622         return std::move(right);
3623 }
3624
3625
3626 //! Appends a ustring16 and an Irrlicht string.
3627 template <typename TAlloc, typename B, typename BAlloc>
3628 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3629 {
3630         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3631         left.append(right);
3632         return std::move(left);
3633 }
3634
3635
3636 //! Appends a ustring16 and a std::basic_string.
3637 template <typename TAlloc, typename B, typename A, typename BAlloc>
3638 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3639 {
3640         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3641         right.insert(core::ustring16<TAlloc>(left), 0);
3642         return std::move(right);
3643 }
3644
3645
3646 //! Appends a ustring16 and a std::basic_string.
3647 template <typename TAlloc, typename B, typename A, typename BAlloc>
3648 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3649 {
3650         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3651         left.append(right);
3652         return std::move(left);
3653 }
3654
3655
3656 //! Appends a ustring16 and a char.
3657 template <typename TAlloc>
3658 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3659 {
3660         left.append((uchar32_t)right);
3661         return std::move(left);
3662 }
3663
3664
3665 //! Appends a ustring16 and a char.
3666 template <typename TAlloc>
3667 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3668 {
3669         right.insert((uchar32_t)left, 0);
3670         return std::move(right);
3671 }
3672
3673
3674 #ifdef USTRING_CPP0X_NEWLITERALS
3675 //! Appends a ustring16 and a uchar32_t.
3676 template <typename TAlloc>
3677 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3678 {
3679         left.append(right);
3680         return std::move(left);
3681 }
3682
3683
3684 //! Appends a ustring16 and a uchar32_t.
3685 template <typename TAlloc>
3686 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3687 {
3688         right.insert(left, 0);
3689         return std::move(right);
3690 }
3691 #endif
3692
3693
3694 //! Appends a ustring16 and a short.
3695 template <typename TAlloc>
3696 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3697 {
3698         left.append(core::stringc(right));
3699         return std::move(left);
3700 }
3701
3702
3703 //! Appends a ustring16 and a short.
3704 template <typename TAlloc>
3705 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3706 {
3707         right.insert(core::stringc(left), 0);
3708         return std::move(right);
3709 }
3710
3711
3712 //! Appends a ustring16 and an unsigned short.
3713 template <typename TAlloc>
3714 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3715 {
3716         left.append(core::stringc(right));
3717         return std::move(left);
3718 }
3719
3720
3721 //! Appends a ustring16 and an unsigned short.
3722 template <typename TAlloc>
3723 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3724 {
3725         right.insert(core::stringc(left), 0);
3726         return std::move(right);
3727 }
3728
3729
3730 //! Appends a ustring16 and an int.
3731 template <typename TAlloc>
3732 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3733 {
3734         left.append(core::stringc(right));
3735         return std::move(left);
3736 }
3737
3738
3739 //! Appends a ustring16 and an int.
3740 template <typename TAlloc>
3741 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3742 {
3743         right.insert(core::stringc(left), 0);
3744         return std::move(right);
3745 }
3746
3747
3748 //! Appends a ustring16 and an unsigned int.
3749 template <typename TAlloc>
3750 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3751 {
3752         left.append(core::stringc(right));
3753         return std::move(left);
3754 }
3755
3756
3757 //! Appends a ustring16 and an unsigned int.
3758 template <typename TAlloc>
3759 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3760 {
3761         right.insert(core::stringc(left), 0);
3762         return std::move(right);
3763 }
3764
3765
3766 //! Appends a ustring16 and a long.
3767 template <typename TAlloc>
3768 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3769 {
3770         left.append(core::stringc(right));
3771         return std::move(left);
3772 }
3773
3774
3775 //! Appends a ustring16 and a long.
3776 template <typename TAlloc>
3777 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3778 {
3779         right.insert(core::stringc(left), 0);
3780         return std::move(right);
3781 }
3782
3783
3784 //! Appends a ustring16 and an unsigned long.
3785 template <typename TAlloc>
3786 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3787 {
3788         left.append(core::stringc(right));
3789         return std::move(left);
3790 }
3791
3792
3793 //! Appends a ustring16 and an unsigned long.
3794 template <typename TAlloc>
3795 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3796 {
3797         right.insert(core::stringc(left), 0);
3798         return std::move(right);
3799 }
3800
3801
3802 //! Appends a ustring16 and a float.
3803 template <typename TAlloc>
3804 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3805 {
3806         left.append(core::stringc(right));
3807         return std::move(left);
3808 }
3809
3810
3811 //! Appends a ustring16 and a float.
3812 template <typename TAlloc>
3813 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3814 {
3815         right.insert(core::stringc(left), 0);
3816         return std::move(right);
3817 }
3818
3819
3820 //! Appends a ustring16 and a double.
3821 template <typename TAlloc>
3822 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3823 {
3824         left.append(core::stringc(right));
3825         return std::move(left);
3826 }
3827
3828
3829 //! Appends a ustring16 and a double.
3830 template <typename TAlloc>
3831 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3832 {
3833         right.insert(core::stringc(left), 0);
3834         return std::move(right);
3835 }
3836 #endif
3837
3838
3839 #ifndef USTRING_NO_STL
3840 //! Writes a ustring16 to an ostream.
3841 template <typename TAlloc>
3842 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3843 {
3844         out << in.toUTF8_s().c_str();
3845         return out;
3846 }
3847
3848 //! Writes a ustring16 to a wostream.
3849 template <typename TAlloc>
3850 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3851 {
3852         out << in.toWCHAR_s().c_str();
3853         return out;
3854 }
3855 #endif
3856
3857
3858 #ifndef USTRING_NO_STL
3859
3860 namespace unicode
3861 {
3862
3863 //! Hashing algorithm for hashing a ustring.  Used for things like unordered_maps.
3864 //! Algorithm taken from std::hash<std::string>.
3865 class hash : public std::unary_function<core::ustring, size_t>
3866 {
3867         public:
3868                 size_t operator()(const core::ustring& s) const
3869                 {
3870                         size_t ret = 2166136261U;
3871                         size_t index = 0;
3872                         size_t stride = 1 + s.size_raw() / 10;
3873
3874                         core::ustring::const_iterator i = s.begin();
3875                         while (i != s.end())
3876                         {
3877                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
3878                                 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3879                                 index += stride;
3880                                 i += stride;
3881                         }
3882                         return (ret);
3883                 }
3884 };
3885
3886 } // end namespace unicode
3887
3888 #endif
3889
3890 } // end namespace core
3891 } // end namespace irr