2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
39 #define __BYTE_ORDER 0
40 #define __LITTLE_ENDIAN 0
41 #define __BIG_ENDIAN 1
42 #elif defined(__MACH__) && defined(__APPLE__)
43 #include <machine/endian.h>
44 #elif defined(__FreeBSD__) || defined(__DragonFly__)
45 #include <sys/endian.h>
59 #include "irrString.h"
62 //! UTF-16 surrogate start values.
63 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
64 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
66 //! Is a UTF-16 code point a surrogate?
67 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
68 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
69 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
75 // Define our character types.
76 typedef char32_t uchar32_t;
77 typedef char16_t uchar16_t;
78 typedef char uchar8_t;
86 //! The unicode replacement character. Used to replace invalid characters.
87 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
89 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
90 //! \param high The high value of the pair.
91 //! \param low The low value of the pair.
92 //! \return The UTF-32 character expressed by the surrogate pair.
93 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
95 // Convert the surrogate pair into a single UTF-32 character.
96 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
97 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
98 return (wu << 16) | x;
101 //! Swaps the endianness of a 16-bit value.
102 //! \return The new value.
103 inline uchar16_t swapEndian16(const uchar16_t& c)
105 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
108 //! Swaps the endianness of a 32-bit value.
109 //! \return The new value.
110 inline uchar32_t swapEndian32(const uchar32_t& c)
112 return ((c >> 24) & 0x000000FF) |
113 ((c >> 8) & 0x0000FF00) |
114 ((c << 8) & 0x00FF0000) |
115 ((c << 24) & 0xFF000000);
118 //! The Unicode byte order mark.
119 const u16 BOM = 0xFEFF;
121 //! The size of the Unicode byte order mark in terms of the Unicode character size.
122 const u8 BOM_UTF8_LEN = 3;
123 const u8 BOM_UTF16_LEN = 1;
124 const u8 BOM_UTF32_LEN = 1;
126 //! Unicode byte order marks for file operations.
127 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
128 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
129 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
130 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
131 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
133 //! The size in bytes of the Unicode byte marks for file operations.
134 const u8 BOM_ENCODE_UTF8_LEN = 3;
135 const u8 BOM_ENCODE_UTF16_LEN = 2;
136 const u8 BOM_ENCODE_UTF32_LEN = 4;
138 //! Unicode encoding type.
151 //! Unicode endianness.
159 //! Returns the specified unicode byte order mark in a byte array.
160 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
161 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
162 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
163 //! \return An array that contains a byte order mark.
164 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
166 #define COPY_ARRAY(source, size) \
167 memcpy(ret.pointer(), source, size); \
170 core::array<u8> ret(4);
174 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
177 #if __BYTE_ORDER == __BIG_ENDIAN
178 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
180 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
184 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
187 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
190 #if __BYTE_ORDER == __BIG_ENDIAN
191 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
193 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
197 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
200 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
203 // TODO sapier: fixed warning only,
204 // don't know if something needs to be done here
212 //! Detects if the given data stream starts with a unicode BOM.
213 //! \param data The data stream to check.
214 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
215 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
217 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
218 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
219 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
220 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
221 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
225 } // end namespace unicode
228 //! UTF-16 string class.
233 ///------------------///
234 /// iterator classes ///
235 ///------------------///
237 //! Access an element in a unicode string, allowing one to change it.
238 class _ustring16_iterator_access
241 _ustring16_iterator_access(const ustring16* s, u32 p) : ref(s), pos(p) {}
243 //! Allow the class to be interpreted as a single UTF-32 character.
244 operator uchar32_t() const
249 //! Allow one to change the character in the unicode string.
250 //! \param c The new character to use.
252 _ustring16_iterator_access& operator=(const uchar32_t c)
258 //! Increments the value by 1.
260 _ustring16_iterator_access& operator++()
266 //! Increments the value by 1, returning the old value.
267 //! \return A unicode character.
268 uchar32_t operator++(int)
270 uchar32_t old = _get();
275 //! Decrements the value by 1.
277 _ustring16_iterator_access& operator--()
283 //! Decrements the value by 1, returning the old value.
284 //! \return A unicode character.
285 uchar32_t operator--(int)
287 uchar32_t old = _get();
292 //! Adds to the value by a specified amount.
293 //! \param val The amount to add to this character.
295 _ustring16_iterator_access& operator+=(int val)
301 //! Subtracts from the value by a specified amount.
302 //! \param val The amount to subtract from this character.
304 _ustring16_iterator_access& operator-=(int val)
310 //! Multiples the value by a specified amount.
311 //! \param val The amount to multiply this character by.
313 _ustring16_iterator_access& operator*=(int val)
319 //! Divides the value by a specified amount.
320 //! \param val The amount to divide this character by.
322 _ustring16_iterator_access& operator/=(int val)
328 //! Modulos the value by a specified amount.
329 //! \param val The amount to modulo this character by.
331 _ustring16_iterator_access& operator%=(int val)
337 //! Adds to the value by a specified amount.
338 //! \param val The amount to add to this character.
339 //! \return A unicode character.
340 uchar32_t operator+(int val) const
345 //! Subtracts from the value by a specified amount.
346 //! \param val The amount to subtract from this character.
347 //! \return A unicode character.
348 uchar32_t operator-(int val) const
353 //! Multiplies the value by a specified amount.
354 //! \param val The amount to multiply this character by.
355 //! \return A unicode character.
356 uchar32_t operator*(int val) const
361 //! Divides the value by a specified amount.
362 //! \param val The amount to divide this character by.
363 //! \return A unicode character.
364 uchar32_t operator/(int val) const
369 //! Modulos the value by a specified amount.
370 //! \param val The amount to modulo this character by.
371 //! \return A unicode character.
372 uchar32_t operator%(int val) const
378 //! Gets a uchar32_t from our current position.
379 uchar32_t _get() const
381 const uchar16_t* a = ref->c_str();
382 if (!UTF16_IS_SURROGATE(a[pos]))
383 return static_cast<uchar32_t>(a[pos]);
386 if (pos + 1 >= ref->size_raw())
389 return unicode::toUTF32(a[pos], a[pos + 1]);
393 //! Sets a uchar32_t at our current position.
394 void _set(uchar32_t c)
396 ustring16* ref2 = const_cast<ustring16*>(ref);
397 const uchar16_t* a = ref2->c_str();
400 // c will be multibyte, so split it up into the high and low surrogate pairs.
401 uchar16_t x = static_cast<uchar16_t>(c);
402 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
403 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
405 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
406 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
407 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
408 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
410 ref2->replace_raw(vh, static_cast<u32>(pos));
414 // c will be a single byte.
415 uchar16_t vh = static_cast<uchar16_t>(c);
417 // If the previous position was a surrogate pair, remove the extra byte.
418 if (UTF16_IS_SURROGATE_HI(a[pos]))
419 ref2->erase_raw(static_cast<u32>(pos) + 1);
421 ref2->replace_raw(vh, static_cast<u32>(pos));
425 const ustring16* ref;
428 typedef typename ustring16::_ustring16_iterator_access access;
431 //! Iterator to iterate through a UTF-16 string.
432 class _ustring16_const_iterator : public std::iterator<
433 std::bidirectional_iterator_tag, // iterator_category
434 access, // value_type
435 ptrdiff_t, // difference_type
436 const access, // pointer
437 const access // reference
441 typedef _ustring16_const_iterator _Iter;
442 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
443 typedef const access const_pointer;
444 typedef const access const_reference;
446 typedef typename _Base::value_type value_type;
447 typedef typename _Base::difference_type difference_type;
448 typedef typename _Base::difference_type distance_type;
449 typedef typename _Base::pointer pointer;
450 typedef const_reference reference;
453 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
454 _ustring16_const_iterator(const ustring16& s) : ref(&s), pos(0) {}
455 _ustring16_const_iterator(const ustring16& s, const u32 p) : ref(&s), pos(0)
457 if (ref->size_raw() == 0 || p == 0)
460 // Go to the appropriate position.
462 u32 sr = ref->size_raw();
463 const uchar16_t* a = ref->c_str();
464 while (i != 0 && pos < sr)
466 if (UTF16_IS_SURROGATE_HI(a[pos]))
473 //! Test for equalness.
474 bool operator==(const _Iter& iter) const
476 if (ref == iter.ref && pos == iter.pos)
481 //! Test for unequalness.
482 bool operator!=(const _Iter& iter) const
484 if (ref != iter.ref || pos != iter.pos)
489 //! Switch to the next full character in the string.
492 if (pos == ref->size_raw()) return *this;
493 const uchar16_t* a = ref->c_str();
494 if (UTF16_IS_SURROGATE_HI(a[pos]))
495 pos += 2; // TODO: check for valid low surrogate?
497 if (pos > ref->size_raw()) pos = ref->size_raw();
501 //! Switch to the next full character in the string, returning the previous position.
502 _Iter operator++(int)
509 //! Switch to the previous full character in the string.
512 if (pos == 0) return *this;
513 const uchar16_t* a = ref->c_str();
515 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
520 //! Switch to the previous full character in the string, returning the previous position.
521 _Iter operator--(int)
528 //! Advance a specified number of full characters in the string.
530 _Iter& operator+=(const difference_type v)
532 if (v == 0) return *this;
533 if (v < 0) return operator-=(v * -1);
535 if (pos >= ref->size_raw())
538 // Go to the appropriate position.
539 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
541 u32 sr = ref->size_raw();
542 const uchar16_t* a = ref->c_str();
543 while (i != 0 && pos < sr)
545 if (UTF16_IS_SURROGATE_HI(a[pos]))
556 //! Go back a specified number of full characters in the string.
558 _Iter& operator-=(const difference_type v)
560 if (v == 0) return *this;
561 if (v > 0) return operator+=(v * -1);
566 // Go to the appropriate position.
567 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
569 const uchar16_t* a = ref->c_str();
570 while (i != 0 && pos != 0)
573 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
581 //! Return a new iterator that is a variable number of full characters forward from the current position.
582 _Iter operator+(const difference_type v) const
589 //! Return a new iterator that is a variable number of full characters backward from the current position.
590 _Iter operator-(const difference_type v) const
597 //! Returns the distance between two iterators.
598 difference_type operator-(const _Iter& iter) const
600 // Make sure we reference the same object!
602 return difference_type();
627 //! Accesses the full character at the iterator's position.
628 const_reference operator*() const
630 if (pos >= ref->size_raw())
632 const uchar16_t* a = ref->c_str();
633 u32 p = ref->size_raw();
634 if (UTF16_IS_SURROGATE_LO(a[p]))
636 reference ret(ref, p);
639 const_reference ret(ref, pos);
643 //! Accesses the full character at the iterator's position.
644 reference operator*()
646 if (pos >= ref->size_raw())
648 const uchar16_t* a = ref->c_str();
649 u32 p = ref->size_raw();
650 if (UTF16_IS_SURROGATE_LO(a[p]))
652 reference ret(ref, p);
655 reference ret(ref, pos);
659 //! Accesses the full character at the iterator's position.
660 const_pointer operator->() const
665 //! Accesses the full character at the iterator's position.
671 //! Is the iterator at the start of the string?
677 //! Is the iterator at the end of the string?
680 const uchar16_t* a = ref->c_str();
681 if (UTF16_IS_SURROGATE(a[pos]))
682 return (pos + 1) >= ref->size_raw();
683 else return pos >= ref->size_raw();
686 //! Moves the iterator to the start of the string.
692 //! Moves the iterator to the end of the string.
695 pos = ref->size_raw();
698 //! Returns the iterator's position.
699 //! \return The iterator's position.
706 const ustring16* ref;
710 //! Iterator to iterate through a UTF-16 string.
711 class _ustring16_iterator : public _ustring16_const_iterator
714 typedef _ustring16_iterator _Iter;
715 typedef _ustring16_const_iterator _Base;
716 typedef typename _Base::const_pointer const_pointer;
717 typedef typename _Base::const_reference const_reference;
720 typedef typename _Base::value_type value_type;
721 typedef typename _Base::difference_type difference_type;
722 typedef typename _Base::distance_type distance_type;
723 typedef access pointer;
724 typedef access reference;
730 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
731 _ustring16_iterator(const ustring16& s) : _ustring16_const_iterator(s) {}
732 _ustring16_iterator(const ustring16& s, const u32 p) : _ustring16_const_iterator(s, p) {}
734 //! Accesses the full character at the iterator's position.
735 reference operator*() const
737 if (pos >= ref->size_raw())
739 const uchar16_t* a = ref->c_str();
740 u32 p = ref->size_raw();
741 if (UTF16_IS_SURROGATE_LO(a[p]))
743 reference ret(ref, p);
746 reference ret(ref, pos);
750 //! Accesses the full character at the iterator's position.
751 reference operator*()
753 if (pos >= ref->size_raw())
755 const uchar16_t* a = ref->c_str();
756 u32 p = ref->size_raw();
757 if (UTF16_IS_SURROGATE_LO(a[p]))
759 reference ret(ref, p);
762 reference ret(ref, pos);
766 //! Accesses the full character at the iterator's position.
767 pointer operator->() const
772 //! Accesses the full character at the iterator's position.
779 typedef typename ustring16::_ustring16_iterator iterator;
780 typedef typename ustring16::_ustring16_const_iterator const_iterator;
782 ///----------------------///
783 /// end iterator classes ///
784 ///----------------------///
786 //! Default constructor
788 : array(0), allocated(1), used(0)
790 #if __BYTE_ORDER == __BIG_ENDIAN
791 encoding = unicode::EUTFE_UTF16_BE;
793 encoding = unicode::EUTFE_UTF16_LE;
795 array = new uchar16_t[1];
801 ustring16(const ustring16& other)
802 : array(0), allocated(0), used(0)
804 #if __BYTE_ORDER == __BIG_ENDIAN
805 encoding = unicode::EUTFE_UTF16_BE;
807 encoding = unicode::EUTFE_UTF16_LE;
813 //! Constructor from other string types
815 ustring16(const string<B>& other)
816 : array(0), allocated(0), used(0)
818 #if __BYTE_ORDER == __BIG_ENDIAN
819 encoding = unicode::EUTFE_UTF16_BE;
821 encoding = unicode::EUTFE_UTF16_LE;
826 //! Constructor from std::string
827 template <class B, class A, typename Alloc>
828 ustring16(const std::basic_string<B, A, Alloc>& other)
829 : array(0), allocated(0), used(0)
831 #if __BYTE_ORDER == __BIG_ENDIAN
832 encoding = unicode::EUTFE_UTF16_BE;
834 encoding = unicode::EUTFE_UTF16_LE;
836 *this = other.c_str();
840 //! Constructor from iterator.
841 template <typename Itr>
842 ustring16(Itr first, Itr last)
843 : array(0), allocated(0), used(0)
845 #if __BYTE_ORDER == __BIG_ENDIAN
846 encoding = unicode::EUTFE_UTF16_BE;
848 encoding = unicode::EUTFE_UTF16_LE;
850 reserve(std::distance(first, last));
853 for (; first != last; ++first)
854 append((uchar32_t)*first);
857 //! Constructor for copying a UTF-8 string from a pointer.
858 ustring16(const uchar8_t* const c)
859 : array(0), allocated(0), used(0)
861 #if __BYTE_ORDER == __BIG_ENDIAN
862 encoding = unicode::EUTFE_UTF16_BE;
864 encoding = unicode::EUTFE_UTF16_LE;
871 //! Constructor for copying a UTF-8 string from a single char.
872 ustring16(const char c)
873 : array(0), allocated(0), used(0)
875 #if __BYTE_ORDER == __BIG_ENDIAN
876 encoding = unicode::EUTFE_UTF16_BE;
878 encoding = unicode::EUTFE_UTF16_LE;
881 append((uchar32_t)c);
885 //! Constructor for copying a UTF-8 string from a pointer with a given length.
886 ustring16(const uchar8_t* const c, u32 length)
887 : array(0), allocated(0), used(0)
889 #if __BYTE_ORDER == __BIG_ENDIAN
890 encoding = unicode::EUTFE_UTF16_BE;
892 encoding = unicode::EUTFE_UTF16_LE;
899 //! Constructor for copying a UTF-16 string from a pointer.
900 ustring16(const uchar16_t* const c)
901 : array(0), allocated(0), used(0)
903 #if __BYTE_ORDER == __BIG_ENDIAN
904 encoding = unicode::EUTFE_UTF16_BE;
906 encoding = unicode::EUTFE_UTF16_LE;
913 //! Constructor for copying a UTF-16 string from a pointer with a given length
914 ustring16(const uchar16_t* const c, u32 length)
915 : array(0), allocated(0), used(0)
917 #if __BYTE_ORDER == __BIG_ENDIAN
918 encoding = unicode::EUTFE_UTF16_BE;
920 encoding = unicode::EUTFE_UTF16_LE;
927 //! Constructor for copying a UTF-32 string from a pointer.
928 ustring16(const uchar32_t* const c)
929 : array(0), allocated(0), used(0)
931 #if __BYTE_ORDER == __BIG_ENDIAN
932 encoding = unicode::EUTFE_UTF16_BE;
934 encoding = unicode::EUTFE_UTF16_LE;
941 //! Constructor for copying a UTF-32 from a pointer with a given length.
942 ustring16(const uchar32_t* const c, u32 length)
943 : array(0), allocated(0), used(0)
945 #if __BYTE_ORDER == __BIG_ENDIAN
946 encoding = unicode::EUTFE_UTF16_BE;
948 encoding = unicode::EUTFE_UTF16_LE;
955 //! Constructor for copying a wchar_t string from a pointer.
956 ustring16(const wchar_t* const c)
957 : array(0), allocated(0), used(0)
959 #if __BYTE_ORDER == __BIG_ENDIAN
960 encoding = unicode::EUTFE_UTF16_BE;
962 encoding = unicode::EUTFE_UTF16_LE;
965 if (sizeof(wchar_t) == 4)
966 append(reinterpret_cast<const uchar32_t*>(c));
967 else if (sizeof(wchar_t) == 2)
968 append(reinterpret_cast<const uchar16_t*>(c));
969 else if (sizeof(wchar_t) == 1)
970 append(reinterpret_cast<const uchar8_t*>(c));
974 //! Constructor for copying a wchar_t string from a pointer with a given length.
975 ustring16(const wchar_t* const c, u32 length)
976 : array(0), allocated(0), used(0)
978 #if __BYTE_ORDER == __BIG_ENDIAN
979 encoding = unicode::EUTFE_UTF16_BE;
981 encoding = unicode::EUTFE_UTF16_LE;
984 if (sizeof(wchar_t) == 4)
985 append(reinterpret_cast<const uchar32_t*>(c), length);
986 else if (sizeof(wchar_t) == 2)
987 append(reinterpret_cast<const uchar16_t*>(c), length);
988 else if (sizeof(wchar_t) == 1)
989 append(reinterpret_cast<const uchar8_t*>(c), length);
993 //! Constructor for moving a ustring16
994 ustring16(ustring16&& other)
995 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1009 //! Assignment operator
1010 ustring16& operator=(const ustring16& other)
1015 used = other.size_raw();
1016 if (used >= allocated)
1019 allocated = used + 1;
1020 array = new uchar16_t[used + 1];
1023 const uchar16_t* p = other.c_str();
1024 for (u32 i=0; i<=used; ++i, ++p)
1029 // Validate our new UTF-16 string.
1035 //! Move assignment operator
1036 ustring16& operator=(ustring16&& other)
1042 array = other.array;
1043 allocated = other.allocated;
1044 encoding = other.encoding;
1052 //! Assignment operator for other string types
1054 ustring16& operator=(const string<B>& other)
1056 *this = other.c_str();
1061 //! Assignment operator for UTF-8 strings
1062 ustring16& operator=(const uchar8_t* const c)
1066 array = new uchar16_t[1];
1071 if (!c) return *this;
1078 //! Assignment operator for UTF-16 strings
1079 ustring16& operator=(const uchar16_t* const c)
1083 array = new uchar16_t[1];
1088 if (!c) return *this;
1095 //! Assignment operator for UTF-32 strings
1096 ustring16& operator=(const uchar32_t* const c)
1100 array = new uchar16_t[1];
1105 if (!c) return *this;
1112 //! Assignment operator for wchar_t strings.
1113 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1114 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1115 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1116 ustring16& operator=(const wchar_t* const c)
1118 if (sizeof(wchar_t) == 4)
1119 *this = reinterpret_cast<const uchar32_t*>(c);
1120 else if (sizeof(wchar_t) == 2)
1121 *this = reinterpret_cast<const uchar16_t*>(c);
1122 else if (sizeof(wchar_t) == 1)
1123 *this = reinterpret_cast<const uchar8_t*>(c);
1129 //! Assignment operator for other strings.
1130 /** Note that this assumes that a correct unicode string is stored in the string. **/
1132 ustring16& operator=(const B* const c)
1135 *this = reinterpret_cast<const uchar32_t* const>(c);
1136 else if (sizeof(B) == 2)
1137 *this = reinterpret_cast<const uchar16_t* const>(c);
1138 else if (sizeof(B) == 1)
1139 *this = reinterpret_cast<const uchar8_t* const>(c);
1145 //! Direct access operator
1146 access operator [](const u32 index)
1148 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1149 iterator iter(*this, index);
1150 return iter.operator*();
1154 //! Direct access operator
1155 const access operator [](const u32 index) const
1157 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1158 const_iterator iter(*this, index);
1159 return iter.operator*();
1163 //! Equality operator
1164 bool operator ==(const uchar16_t* const str) const
1170 for(i=0; array[i] && str[i]; ++i)
1171 if (array[i] != str[i])
1174 return !array[i] && !str[i];
1178 //! Equality operator
1179 bool operator ==(const ustring16& other) const
1181 for(u32 i=0; array[i] && other.array[i]; ++i)
1182 if (array[i] != other.array[i])
1185 return used == other.used;
1189 //! Is smaller comparator
1190 bool operator <(const ustring16& other) const
1192 for(u32 i=0; array[i] && other.array[i]; ++i)
1194 s32 diff = array[i] - other.array[i];
1199 return used < other.used;
1203 //! Inequality operator
1204 bool operator !=(const uchar16_t* const str) const
1206 return !(*this == str);
1210 //! Inequality operator
1211 bool operator !=(const ustring16& other) const
1213 return !(*this == other);
1217 //! Returns the length of a ustring16 in full characters.
1218 //! \return Length of a ustring16 in full characters.
1221 const_iterator i(*this, 0);
1232 //! Informs if the ustring is empty or not.
1233 //! \return True if the ustring is empty, false if not.
1236 return (size_raw() == 0);
1240 //! Returns a pointer to the raw UTF-16 string data.
1241 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1242 const uchar16_t* c_str() const
1248 //! Compares the first n characters of this string with another.
1249 //! \param other Other string to compare to.
1250 //! \param n Number of characters to compare.
1251 //! \return True if the n first characters of both strings are equal.
1252 bool equalsn(const ustring16& other, u32 n) const
1255 const uchar16_t* oa = other.c_str();
1256 for(i=0; i < n && array[i] && oa[i]; ++i)
1257 if (array[i] != oa[i])
1260 // if one (or both) of the strings was smaller then they
1261 // are only equal if they have the same length
1262 return (i == n) || (used == other.used);
1266 //! Compares the first n characters of this string with another.
1267 //! \param str Other string to compare to.
1268 //! \param n Number of characters to compare.
1269 //! \return True if the n first characters of both strings are equal.
1270 bool equalsn(const uchar16_t* const str, u32 n) const
1275 for(i=0; i < n && array[i] && str[i]; ++i)
1276 if (array[i] != str[i])
1279 // if one (or both) of the strings was smaller then they
1280 // are only equal if they have the same length
1281 return (i == n) || (array[i] == 0 && str[i] == 0);
1285 //! Appends a character to this ustring16
1286 //! \param character The character to append.
1287 //! \return A reference to our current string.
1288 ustring16& append(uchar32_t character)
1290 if (used + 2 >= allocated)
1291 reallocate(used + 2);
1293 if (character > 0xFFFF)
1297 // character will be multibyte, so split it up into a surrogate pair.
1298 uchar16_t x = static_cast<uchar16_t>(character);
1299 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1300 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1307 array[used-1] = character;
1315 //! Appends a UTF-8 string to this ustring16
1316 //! \param other The UTF-8 string to append.
1317 //! \param length The length of the string to append.
1318 //! \return A reference to our current string.
1319 ustring16& append(const uchar8_t* const other, u32 length=0xffffffff)
1324 // Determine if the string is long enough for a BOM.
1326 const uchar8_t* p = other;
1330 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1333 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1334 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1336 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1337 c_bom = unicode::EUTFE_UTF8;
1340 // If a BOM was found, don't include it in the string.
1341 const uchar8_t* c2 = other;
1342 if (c_bom != unicode::EUTFE_NONE)
1344 c2 = other + unicode::BOM_UTF8_LEN;
1345 length -= unicode::BOM_UTF8_LEN;
1348 // Calculate the size of the string to read in.
1354 } while(*p++ && len < length);
1358 // If we need to grow the array, do it now.
1359 if (used + len >= allocated)
1360 reallocate(used + (len * 2));
1363 // Convert UTF-8 to UTF-16.
1365 for (u32 l = 0; l<len;)
1368 if (((c2[l] >> 6) & 0x03) == 0x02)
1369 { // Invalid continuation byte.
1370 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1373 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1374 { // Invalid byte - overlong encoding.
1375 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1378 else if ((c2[l] & 0xF8) == 0xF0)
1379 { // 4 bytes UTF-8, 2 bytes UTF-16.
1380 // Check for a full string.
1383 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1391 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1392 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1393 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1396 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1402 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1403 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1404 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1405 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1407 // Split v up into a surrogate pair.
1408 uchar16_t x = static_cast<uchar16_t>(v);
1409 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1410 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1415 ++used; // Using two shorts this time, so increase used by 1.
1417 else if ((c2[l] & 0xF0) == 0xE0)
1418 { // 3 bytes UTF-8, 1 byte UTF-16.
1419 // Check for a full string.
1422 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1430 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1431 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1434 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1440 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1441 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1442 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1446 else if ((c2[l] & 0xE0) == 0xC0)
1447 { // 2 bytes UTF-8, 1 byte UTF-16.
1448 // Check for a full string.
1451 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1457 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1459 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1465 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1466 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1467 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1472 { // 1 byte UTF-8, 1 byte UTF-16.
1475 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1476 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1478 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1484 // Validate our new UTF-16 string.
1491 //! Appends a UTF-16 string to this ustring16
1492 //! \param other The UTF-16 string to append.
1493 //! \param length The length of the string to append.
1494 //! \return A reference to our current string.
1495 ustring16& append(const uchar16_t* const other, u32 length=0xffffffff)
1500 // Determine if the string is long enough for a BOM.
1502 const uchar16_t* p = other;
1506 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1508 // Check for the BOM to determine the string's endianness.
1509 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1510 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1511 c_end = unicode::EUTFEE_LITTLE;
1512 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1513 c_end = unicode::EUTFEE_BIG;
1515 // If a BOM was found, don't include it in the string.
1516 const uchar16_t* c2 = other;
1517 if (c_end != unicode::EUTFEE_NATIVE)
1519 c2 = other + unicode::BOM_UTF16_LEN;
1520 length -= unicode::BOM_UTF16_LEN;
1523 // Calculate the size of the string to read in.
1529 } while(*p++ && len < length);
1533 // If we need to grow the size of the array, do it now.
1534 if (used + len >= allocated)
1535 reallocate(used + (len * 2));
1539 // Copy the string now.
1540 unicode::EUTF_ENDIAN m_end = getEndianness();
1541 for (u32 l = start; l < start + len; ++l)
1543 array[l] = (uchar16_t)c2[l];
1544 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1545 array[l] = unicode::swapEndian16(array[l]);
1550 // Validate our new UTF-16 string.
1556 //! Appends a UTF-32 string to this ustring16
1557 //! \param other The UTF-32 string to append.
1558 //! \param length The length of the string to append.
1559 //! \return A reference to our current string.
1560 ustring16& append(const uchar32_t* const other, u32 length=0xffffffff)
1565 // Check for the BOM to determine the string's endianness.
1566 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1567 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1568 c_end = unicode::EUTFEE_LITTLE;
1569 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1570 c_end = unicode::EUTFEE_BIG;
1572 // If a BOM was found, don't include it in the string.
1573 const uchar32_t* c2 = other;
1574 if (c_end != unicode::EUTFEE_NATIVE)
1576 c2 = other + unicode::BOM_UTF32_LEN;
1577 length -= unicode::BOM_UTF32_LEN;
1580 // Calculate the size of the string to read in.
1582 const uchar32_t* p = c2;
1586 } while(*p++ && len < length);
1590 // If we need to grow the size of the array, do it now.
1591 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1592 if (used + (len * 2) >= allocated)
1593 reallocate(used + ((len * 2) * 2));
1596 // Convert UTF-32 to UTF-16.
1597 unicode::EUTF_ENDIAN m_end = getEndianness();
1599 for (u32 l = 0; l<len; ++l)
1603 uchar32_t ch = c2[l];
1604 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1605 ch = unicode::swapEndian32(ch);
1609 // Split ch up into a surrogate pair as it is over 16 bits long.
1610 uchar16_t x = static_cast<uchar16_t>(ch);
1611 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1612 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1615 ++used; // Using two shorts, so increased used again.
1617 else if (ch >= 0xD800 && ch <= 0xDFFF)
1619 // Between possible UTF-16 surrogates (invalid!)
1620 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1622 else array[pos++] = static_cast<uchar16_t>(ch);
1626 // Validate our new UTF-16 string.
1633 //! Appends a ustring16 to this ustring16
1634 //! \param other The string to append to this one.
1635 //! \return A reference to our current string.
1636 ustring16& append(const ustring16& other)
1638 const uchar16_t* oa = other.c_str();
1640 u32 len = other.size_raw();
1642 if (used + len >= allocated)
1643 reallocate(used + len);
1645 for (u32 l=0; l<len; ++l)
1646 array[used+l] = oa[l];
1655 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1656 //! \param other The string to append to this one.
1657 //! \param length How many characters of the other string to add to this one.
1658 //! \return A reference to our current string.
1659 ustring16& append(const ustring16& other, u32 length)
1661 if (other.size() == 0)
1664 if (other.size() < length)
1670 if (used + length * 2 >= allocated)
1671 reallocate(used + length * 2);
1673 const_iterator iter(other, 0);
1675 while (!iter.atEnd() && l)
1677 uchar32_t c = *iter;
1687 //! Reserves some memory.
1688 //! \param count The amount of characters to reserve.
1689 void reserve(u32 count)
1691 if (count < allocated)
1698 //! Finds first occurrence of character.
1699 //! \param c The character to search for.
1700 //! \return Position where the character has been found, or -1 if not found.
1701 s32 findFirst(uchar32_t c) const
1703 const_iterator i(*this, 0);
1718 //! Finds first occurrence of a character of a list.
1719 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1720 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1721 //! \return Position where one of the characters has been found, or -1 if not found.
1722 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1727 const_iterator i(*this, 0);
1733 for (u32 j=0; j<count; ++j)
1744 //! Finds first position of a character not in a given list.
1745 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1746 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1747 //! \return Position where the character has been found, or -1 if not found.
1748 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1753 const_iterator i(*this, 0);
1760 for (j=0; j<count; ++j)
1773 //! Finds last position of a character not in a given list.
1774 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1775 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1776 //! \return Position where the character has been found, or -1 if not found.
1777 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1782 const_iterator i(end());
1785 s32 pos = size() - 1;
1786 while (!i.atStart())
1790 for (j=0; j<count; ++j)
1803 //! Finds next occurrence of character.
1804 //! \param c The character to search for.
1805 //! \param startPos The position in the string to start searching.
1806 //! \return Position where the character has been found, or -1 if not found.
1807 s32 findNext(uchar32_t c, u32 startPos) const
1809 const_iterator i(*this, startPos);
1825 //! Finds last occurrence of character.
1826 //! \param c The character to search for.
1827 //! \param start The start position of the reverse search ( default = -1, on end ).
1828 //! \return Position where the character has been found, or -1 if not found.
1829 s32 findLast(uchar32_t c, s32 start = -1) const
1832 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1834 const_iterator i(*this, start);
1836 while (!i.atStart())
1848 //! Finds last occurrence of a character in a list.
1849 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1850 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1851 //! \return Position where one of the characters has been found, or -1 if not found.
1852 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1857 const_iterator i(end());
1861 while (!i.atStart())
1864 for (u32 j=0; j<count; ++j)
1875 //! Finds another ustring16 in this ustring16.
1876 //! \param str The string to find.
1877 //! \param start The start position of the search.
1878 //! \return Positions where the ustring16 has been found, or -1 if not found.
1879 s32 find(const ustring16& str, const u32 start = 0) const
1881 u32 my_size = size();
1882 u32 their_size = str.size();
1884 if (their_size == 0 || my_size - start < their_size)
1887 const_iterator i(*this, start);
1892 const_iterator i2(i);
1893 const_iterator j(str, 0);
1894 uchar32_t t1 = (uchar32_t)*i2;
1895 uchar32_t t2 = (uchar32_t)*j;
1902 t1 = (uchar32_t)*i2;
1913 //! Finds another ustring16 in this ustring16.
1914 //! \param str The string to find.
1915 //! \param start The start position of the search.
1916 //! \return Positions where the string has been found, or -1 if not found.
1917 s32 find_raw(const ustring16& str, const u32 start = 0) const
1919 const uchar16_t* data = str.c_str();
1930 for (u32 i=start; i<=used-len; ++i)
1934 while(data[j] && array[i+j] == data[j])
1946 //! Returns a substring.
1947 //! \param begin: Start of substring.
1948 //! \param length: Length of substring.
1949 //! \return A reference to our current string.
1950 ustring16 subString(u32 begin, s32 length) const
1953 // if start after ustring16
1954 // or no proper substring length
1955 if ((length <= 0) || (begin>=len))
1956 return ustring16("");
1957 // clamp length to maximal value
1958 if ((length+begin) > len)
1962 o.reserve((length+1) * 2);
1964 const_iterator i(*this, begin);
1965 while (!i.atEnd() && length)
1976 //! Appends a character to this ustring16.
1977 //! \param c Character to append.
1978 //! \return A reference to our current string.
1979 ustring16& operator += (char c)
1981 append((uchar32_t)c);
1986 //! Appends a character to this ustring16.
1987 //! \param c Character to append.
1988 //! \return A reference to our current string.
1989 ustring16& operator += (uchar32_t c)
1996 //! Appends a number to this ustring16.
1997 //! \param c Number to append.
1998 //! \return A reference to our current string.
1999 ustring16& operator += (short c)
2001 append(core::stringc(c));
2006 //! Appends a number to this ustring16.
2007 //! \param c Number to append.
2008 //! \return A reference to our current string.
2009 ustring16& operator += (unsigned short c)
2011 append(core::stringc(c));
2016 //! Appends a number to this ustring16.
2017 //! \param c Number to append.
2018 //! \return A reference to our current string.
2019 ustring16& operator += (int c)
2021 append(core::stringc(c));
2026 //! Appends a number to this ustring16.
2027 //! \param c Number to append.
2028 //! \return A reference to our current string.
2029 ustring16& operator += (unsigned int c)
2031 append(core::stringc(c));
2036 //! Appends a number to this ustring16.
2037 //! \param c Number to append.
2038 //! \return A reference to our current string.
2039 ustring16& operator += (long c)
2041 append(core::stringc(c));
2046 //! Appends a number to this ustring16.
2047 //! \param c Number to append.
2048 //! \return A reference to our current string.
2049 ustring16& operator += (unsigned long c)
2051 append(core::stringc(c));
2056 //! Appends a number to this ustring16.
2057 //! \param c Number to append.
2058 //! \return A reference to our current string.
2059 ustring16& operator += (double c)
2061 append(core::stringc(c));
2066 //! Appends a char ustring16 to this ustring16.
2067 //! \param c Char ustring16 to append.
2068 //! \return A reference to our current string.
2069 ustring16& operator += (const uchar16_t* const c)
2076 //! Appends a ustring16 to this ustring16.
2077 //! \param other ustring16 to append.
2078 //! \return A reference to our current string.
2079 ustring16& operator += (const ustring16& other)
2086 //! Replaces all characters of a given type with another one.
2087 //! \param toReplace Character to replace.
2088 //! \param replaceWith Character replacing the old one.
2089 //! \return A reference to our current string.
2090 ustring16& replace(uchar32_t toReplace, uchar32_t replaceWith)
2092 iterator i(*this, 0);
2095 typename ustring16::access a = *i;
2096 if ((uchar32_t)a == toReplace)
2104 //! Replaces all instances of a string with another one.
2105 //! \param toReplace The string to replace.
2106 //! \param replaceWith The string replacing the old one.
2107 //! \return A reference to our current string.
2108 ustring16& replace(const ustring16& toReplace, const ustring16& replaceWith)
2110 if (toReplace.size() == 0)
2113 const uchar16_t* other = toReplace.c_str();
2114 const uchar16_t* replace = replaceWith.c_str();
2115 const u32 other_size = toReplace.size_raw();
2116 const u32 replace_size = replaceWith.size_raw();
2118 // Determine the delta. The algorithm will change depending on the delta.
2119 s32 delta = replace_size - other_size;
2121 // A character for character replace. The string will not shrink or grow.
2125 while ((pos = find_raw(other, pos)) != -1)
2127 for (u32 i = 0; i < replace_size; ++i)
2128 array[pos + i] = replace[i];
2134 // We are going to be removing some characters. The string will shrink.
2138 for (u32 pos = 0; pos <= used; ++i, ++pos)
2140 // Is this potentially a match?
2141 if (array[pos] == *other)
2143 // Check to see if we have a match.
2145 for (j = 0; j < other_size; ++j)
2147 if (array[pos + j] != other[j])
2151 // If we have a match, replace characters.
2152 if (j == other_size)
2154 for (j = 0; j < replace_size; ++j)
2155 array[i + j] = replace[j];
2156 i += replace_size - 1;
2157 pos += other_size - 1;
2162 // No match found, just copy characters.
2163 array[i - 1] = array[pos];
2171 // We are going to be adding characters, so the string size will increase.
2172 // Count the number of times toReplace exists in the string so we can allocate the new size.
2175 while ((pos = find_raw(other, pos)) != -1)
2181 // Re-allocate the string now, if needed.
2182 u32 len = delta * find_count;
2183 if (used + len >= allocated)
2184 reallocate(used + len);
2188 while ((pos = find_raw(other, pos)) != -1)
2190 uchar16_t* start = array + pos + other_size - 1;
2191 uchar16_t* ptr = array + used;
2192 uchar16_t* end = array + used + delta;
2194 // Shift characters to make room for the string.
2195 while (ptr != start)
2202 // Add the new string now.
2203 for (u32 i = 0; i < replace_size; ++i)
2204 array[pos + i] = replace[i];
2206 pos += replace_size;
2210 // Terminate the string and return ourself.
2216 //! Removes characters from a ustring16..
2217 //! \param c The character to remove.
2218 //! \return A reference to our current string.
2219 ustring16& remove(uchar32_t c)
2223 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2224 for (u32 i=0; i<=used; ++i)
2227 if (!UTF16_IS_SURROGATE_HI(array[i]))
2229 else if (i + 1 <= used)
2231 // Convert the surrogate pair into a single UTF-32 character.
2232 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2234 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2242 array[pos++] = array[i];
2244 array[pos++] = array[++i];
2252 //! Removes a ustring16 from the ustring16.
2253 //! \param toRemove The string to remove.
2254 //! \return A reference to our current string.
2255 ustring16& remove(const ustring16& toRemove)
2257 u32 size = toRemove.size_raw();
2258 if (size == 0) return *this;
2260 const uchar16_t* tra = toRemove.c_str();
2263 for (u32 i=0; i<=used; ++i)
2268 if (array[i + j] != tra[j])
2279 array[pos++] = array[i];
2287 //! Removes characters from the ustring16.
2288 //! \param characters The characters to remove.
2289 //! \return A reference to our current string.
2290 ustring16& removeChars(const ustring16& characters)
2292 if (characters.size_raw() == 0)
2297 const_iterator iter(characters);
2298 for (u32 i=0; i<=used; ++i)
2301 if (!UTF16_IS_SURROGATE_HI(array[i]))
2303 else if (i + 1 <= used)
2305 // Convert the surrogate pair into a single UTF-32 character.
2306 uc32 = unicode::toUTF32(array[i], array[i+1]);
2308 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2312 while (!iter.atEnd())
2314 uchar32_t c = *iter;
2317 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2326 array[pos++] = array[i];
2328 array[pos++] = array[++i];
2336 //! Trims the ustring16.
2337 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2338 //! \param whitespace The characters that are to be considered as whitespace.
2339 //! \return A reference to our current string.
2340 ustring16& trim(const ustring16& whitespace = " \t\n\r")
2342 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2344 // find start and end of the substring without the specified characters
2345 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2349 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2351 return (*this = subString(begin, (end +1) - begin));
2355 //! Erases a character from the ustring16.
2356 //! May be slow, because all elements following after the erased element have to be copied.
2357 //! \param index Index of element to be erased.
2358 //! \return A reference to our current string.
2359 ustring16& erase(u32 index)
2361 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2363 iterator i(*this, index);
2366 u32 len = (t > 0xFFFF ? 2 : 1);
2368 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2369 array[j - len] = array[j];
2378 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2379 //! \return A reference to our current string.
2380 ustring16& validate()
2382 // Validate all unicode characters.
2383 for (u32 i=0; i<allocated; ++i)
2385 // Terminate on existing null.
2391 if (UTF16_IS_SURROGATE(array[i]))
2393 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2394 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2395 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2396 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2399 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2400 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2407 used = allocated - 1;
2414 //! Gets the last char of the ustring16, or 0.
2415 //! \return The last char of the ustring16, or 0.
2416 uchar32_t lastChar() const
2421 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2423 // Make sure we have a paired surrogate.
2427 // Check for an invalid surrogate.
2428 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2431 // Convert the surrogate pair into a single UTF-32 character.
2432 return unicode::toUTF32(array[used-2], array[used-1]);
2436 return array[used-1];
2441 //! Split the ustring16 into parts.
2442 /** This method will split a ustring16 at certain delimiter characters
2443 into the container passed in as reference. The type of the container
2444 has to be given as template parameter. It must provide a push_back and
2446 \param ret The result container
2447 \param c C-style ustring16 of delimiter characters
2448 \param count Number of delimiter characters
2449 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2450 container. If two delimiters occur without a character in between, an
2451 empty substring would be placed in the result. If this flag is set,
2452 only non-empty strings are stored.
2453 \param keepSeparators Flag which allows to add the separator to the
2454 result ustring16. If this flag is true, the concatenation of the
2455 substrings results in the original ustring16. Otherwise, only the
2456 characters between the delimiters are returned.
2457 \return The number of resulting substrings
2459 template<class container>
2460 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2465 const_iterator i(*this);
2466 const u32 oldSize=ret.size();
2470 bool lastWasSeparator = false;
2474 bool foundSeparator = false;
2475 for (u32 j=0; j<count; ++j)
2479 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2481 ret.push_back(ustring16(&array[lastpospos], pos - lastpos));
2482 foundSeparator = true;
2483 lastpos = (keepSeparators ? pos : pos + 1);
2484 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2488 lastWasSeparator = foundSeparator;
2494 ret.push_back(ustring16(&array[lastpospos], s - lastpos));
2495 return ret.size()-oldSize;
2499 //! Split the ustring16 into parts.
2500 /** This method will split a ustring16 at certain delimiter characters
2501 into the container passed in as reference. The type of the container
2502 has to be given as template parameter. It must provide a push_back and
2504 \param ret The result container
2505 \param c A unicode string of delimiter characters
2506 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2507 container. If two delimiters occur without a character in between, an
2508 empty substring would be placed in the result. If this flag is set,
2509 only non-empty strings are stored.
2510 \param keepSeparators Flag which allows to add the separator to the
2511 result ustring16. If this flag is true, the concatenation of the
2512 substrings results in the original ustring16. Otherwise, only the
2513 characters between the delimiters are returned.
2514 \return The number of resulting substrings
2516 template<class container>
2517 u32 split(container& ret, const ustring16& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2519 core::array<uchar32_t> v = c.toUTF32();
2520 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2524 //! Gets the size of the allocated memory buffer for the string.
2525 //! \return The size of the allocated memory buffer.
2526 u32 capacity() const
2532 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2533 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2534 u32 size_raw() const
2540 //! Inserts a character into the string.
2541 //! \param c The character to insert.
2542 //! \param pos The position to insert the character.
2543 //! \return A reference to our current string.
2544 ustring16& insert(uchar32_t c, u32 pos)
2546 u8 len = (c > 0xFFFF ? 2 : 1);
2548 if (used + len >= allocated)
2549 reallocate(used + len);
2553 iterator iter(*this, pos);
2554 for (u32 i = used - 2; i > iter.getPos(); --i)
2555 array[i] = array[i - len];
2559 // c will be multibyte, so split it up into a surrogate pair.
2560 uchar16_t x = static_cast<uchar16_t>(c);
2561 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2562 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2563 array[iter.getPos()] = vh;
2564 array[iter.getPos()+1] = vl;
2568 array[iter.getPos()] = static_cast<uchar16_t>(c);
2575 //! Inserts a string into the string.
2576 //! \param c The string to insert.
2577 //! \param pos The position to insert the string.
2578 //! \return A reference to our current string.
2579 ustring16& insert(const ustring16& c, u32 pos)
2581 u32 len = c.size_raw();
2582 if (len == 0) return *this;
2584 if (used + len >= allocated)
2585 reallocate(used + len);
2589 iterator iter(*this, pos);
2590 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2591 array[i] = array[i - len];
2593 const uchar16_t* s = c.c_str();
2594 for (u32 i = 0; i < len; ++i)
2605 //! Inserts a character into the string.
2606 //! \param c The character to insert.
2607 //! \param pos The position to insert the character.
2608 //! \return A reference to our current string.
2609 ustring16& insert_raw(uchar16_t c, u32 pos)
2611 if (used + 1 >= allocated)
2612 reallocate(used + 1);
2616 for (u32 i = used - 1; i > pos; --i)
2617 array[i] = array[i - 1];
2625 //! Removes a character from string.
2626 //! \param pos Position of the character to remove.
2627 //! \return A reference to our current string.
2628 ustring16& erase_raw(u32 pos)
2630 for (u32 i=pos; i<=used; ++i)
2632 array[i] = array[i + 1];
2640 //! Replaces a character in the string.
2641 //! \param c The new character.
2642 //! \param pos The position of the character to replace.
2643 //! \return A reference to our current string.
2644 ustring16& replace_raw(uchar16_t c, u32 pos)
2651 //! Returns an iterator to the beginning of the string.
2652 //! \return An iterator to the beginning of the string.
2655 iterator i(*this, 0);
2660 //! Returns an iterator to the beginning of the string.
2661 //! \return An iterator to the beginning of the string.
2662 const_iterator begin() const
2664 const_iterator i(*this, 0);
2669 //! Returns an iterator to the beginning of the string.
2670 //! \return An iterator to the beginning of the string.
2671 const_iterator cbegin() const
2673 const_iterator i(*this, 0);
2678 //! Returns an iterator to the end of the string.
2679 //! \return An iterator to the end of the string.
2682 iterator i(*this, 0);
2688 //! Returns an iterator to the end of the string.
2689 //! \return An iterator to the end of the string.
2690 const_iterator end() const
2692 const_iterator i(*this, 0);
2698 //! Returns an iterator to the end of the string.
2699 //! \return An iterator to the end of the string.
2700 const_iterator cend() const
2702 const_iterator i(*this, 0);
2708 //! Converts the string to a UTF-8 encoded string.
2709 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2710 //! \return A string containing the UTF-8 encoded string.
2711 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2713 core::string<uchar8_t> ret;
2714 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2715 const_iterator iter(*this, 0);
2717 // Add the byte order mark if the user wants it.
2720 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2721 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2722 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2725 while (!iter.atEnd())
2727 uchar32_t c = *iter;
2730 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2731 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2732 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2733 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2741 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2742 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2743 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2750 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2751 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2757 ret.append(static_cast<uchar8_t>(c));
2765 //! Converts the string to a UTF-8 encoded string array.
2766 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2767 //! \return An array containing the UTF-8 encoded string.
2768 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2770 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2771 const_iterator iter(*this, 0);
2773 // Add the byte order mark if the user wants it.
2776 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2777 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2778 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2781 while (!iter.atEnd())
2783 uchar32_t c = *iter;
2786 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2787 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2788 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2789 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2797 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2798 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2799 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2806 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2807 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2813 ret.push_back(static_cast<uchar8_t>(c));
2822 //! Converts the string to a UTF-16 encoded string array.
2823 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2824 //! \param endian The desired endianness of the string.
2825 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2826 //! \return An array containing the UTF-16 encoded string.
2827 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2829 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2830 uchar16_t* ptr = ret.pointer();
2832 // Add the BOM if specified.
2835 if (endian == unicode::EUTFEE_NATIVE)
2836 *ptr = unicode::BOM;
2837 else if (endian == unicode::EUTFEE_LITTLE)
2839 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2840 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2841 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2845 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2846 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2847 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2852 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2853 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2855 for (u32 i = 0; i <= used; ++i)
2856 ptr[i] = unicode::swapEndian16(ptr[i]);
2858 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2864 //! Converts the string to a UTF-32 encoded string array.
2865 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
2866 //! \param endian The desired endianness of the string.
2867 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2868 //! \return An array containing the UTF-32 encoded string.
2869 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2871 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
2872 const_iterator iter(*this, 0);
2874 // Add the BOM if specified.
2877 if (endian == unicode::EUTFEE_NATIVE)
2878 ret.push_back(unicode::BOM);
2887 if (endian == unicode::EUTFEE_LITTLE)
2889 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
2890 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
2891 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
2892 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
2896 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
2897 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
2898 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
2899 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
2901 ret.push_back(t.full);
2906 while (!iter.atEnd())
2908 uchar32_t c = *iter;
2909 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2910 c = unicode::swapEndian32(c);
2918 //! Converts the string to a wchar_t encoded string.
2919 /** The size of a wchar_t changes depending on the platform. This function will store a
2920 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
2921 //! \param endian The desired endianness of the string.
2922 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2923 //! \return A string containing the wchar_t encoded string.
2924 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2926 if (sizeof(wchar_t) == 4)
2928 core::array<uchar32_t> a(toUTF32(endian, addBOM));
2929 core::stringw ret(a.pointer());
2932 else if (sizeof(wchar_t) == 2)
2934 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
2936 core::stringw ret(array);
2941 core::array<uchar16_t> a(toUTF16(endian, addBOM));
2942 core::stringw ret(a.pointer());
2946 else if (sizeof(wchar_t) == 1)
2948 core::array<uchar8_t> a(toUTF8(addBOM));
2949 core::stringw ret(a.pointer());
2953 // Shouldn't happen.
2954 return core::stringw();
2958 //! Converts the string to a wchar_t encoded string array.
2959 /** The size of a wchar_t changes depending on the platform. This function will store a
2960 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
2961 //! \param endian The desired endianness of the string.
2962 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2963 //! \return An array containing the wchar_t encoded string.
2964 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2966 if (sizeof(wchar_t) == 4)
2968 core::array<uchar32_t> a(toUTF32(endian, addBOM));
2969 core::array<wchar_t> ret(a.size());
2970 ret.set_used(a.size());
2971 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
2974 if (sizeof(wchar_t) == 2)
2976 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
2978 core::array<wchar_t> ret(used);
2980 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
2985 core::array<uchar16_t> a(toUTF16(endian, addBOM));
2986 core::array<wchar_t> ret(a.size());
2987 ret.set_used(a.size());
2988 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
2992 if (sizeof(wchar_t) == 1)
2994 core::array<uchar8_t> a(toUTF8(addBOM));
2995 core::array<wchar_t> ret(a.size());
2996 ret.set_used(a.size());
2997 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3001 // Shouldn't happen.
3002 return core::array<wchar_t>();
3005 //! Converts the string to a properly encoded io::path string.
3006 //! \param endian The desired endianness of the string.
3007 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3008 //! \return An io::path string containing the properly encoded string.
3009 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3011 return toUTF8_s(addBOM);
3014 //! Loads an unknown stream of data.
3015 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3016 //! \param data The data stream to load from.
3017 //! \param data_size The length of the data string.
3018 //! \return A reference to our current string.
3019 ustring16& loadDataStream(const char* data, size_t data_size)
3021 // Clear our string.
3026 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3030 case unicode::EUTFE_UTF8:
3031 append((uchar8_t*)data, data_size);
3034 case unicode::EUTFE_UTF16:
3035 case unicode::EUTFE_UTF16_BE:
3036 case unicode::EUTFE_UTF16_LE:
3037 append((uchar16_t*)data, data_size / 2);
3040 case unicode::EUTFE_UTF32:
3041 case unicode::EUTFE_UTF32_BE:
3042 case unicode::EUTFE_UTF32_LE:
3043 append((uchar32_t*)data, data_size / 4);
3050 //! Gets the encoding of the Unicode string this class contains.
3051 //! \return An enum describing the current encoding of this string.
3052 unicode::EUTF_ENCODE getEncoding() const
3057 //! Gets the endianness of the Unicode string this class contains.
3058 //! \return An enum describing the endianness of this string.
3059 unicode::EUTF_ENDIAN getEndianness() const
3061 if (encoding == unicode::EUTFE_UTF16_LE ||
3062 encoding == unicode::EUTFE_UTF32_LE)
3063 return unicode::EUTFEE_LITTLE;
3064 else return unicode::EUTFEE_BIG;
3069 //! Reallocate the string, making it bigger or smaller.
3070 //! \param new_size The new size of the string.
3071 void reallocate(u32 new_size)
3073 uchar16_t* old_array = array;
3075 array = new uchar16_t[new_size + 1];
3076 allocated = new_size + 1;
3077 if (old_array == 0) return;
3079 u32 amount = used < new_size ? used : new_size;
3080 for (u32 i=0; i<=amount; ++i)
3081 array[i] = old_array[i];
3083 if (allocated <= used)
3084 used = allocated - 1;
3088 delete [] old_array;
3091 //--- member variables
3094 unicode::EUTF_ENCODE encoding;
3099 typedef ustring16 ustring;
3102 /* these cause ambigous overloads errors and don't seem to be actually in use */
3104 //! Appends two ustring16s.
3105 inline ustring16 operator+(const ustring16& left, const ustring16& right)
3107 ustring16 ret(left);
3113 //! Appends a ustring16 and a null-terminated unicode string.
3115 inline ustring16 operator+(const ustring16& left, const B* const right)
3117 ustring16 ret(left);
3123 //! Appends a ustring16 and a null-terminated unicode string.
3125 inline ustring16 operator+(const B* const left, const ustring16& right)
3127 ustring16 ret(left);
3133 //! Appends a ustring16 and an Irrlicht string.
3134 template <typename B>
3135 inline ustring16 operator+(const ustring16& left, const string<B>& right)
3137 ustring16 ret(left);
3143 //! Appends a ustring16 and an Irrlicht string.
3144 template <typename B>
3145 inline ustring16 operator+(const string<B>& left, const ustring16& right)
3147 ustring16 ret(left);
3153 //! Appends a ustring16 and a std::basic_string.
3154 template <typename B, typename A, typename BAlloc>
3155 inline ustring16 operator+(const ustring16& left, const std::basic_string<B, A, BAlloc>& right)
3157 ustring16 ret(left);
3163 //! Appends a ustring16 and a std::basic_string.
3164 template <typename B, typename A, typename BAlloc>
3165 inline ustring16 operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16& right)
3167 ustring16 ret(left);
3173 //! Appends a ustring16 and a char.
3174 inline ustring16 operator+(const ustring16& left, const char right)
3176 ustring16 ret(left);
3182 //! Appends a ustring16 and a char.
3183 inline ustring16 operator+(const char left, const ustring16& right)
3185 ustring16 ret(left);
3191 //! Appends a ustring16 and a uchar32_t.
3192 inline ustring16 operator+(const ustring16& left, const uchar32_t right)
3194 ustring16 ret(left);
3200 //! Appends a ustring16 and a uchar32_t.
3201 inline ustring16 operator+(const uchar32_t left, const ustring16& right)
3203 ustring16 ret(left);
3209 //! Appends a ustring16 and a short.
3210 inline ustring16 operator+(const ustring16& left, const short right)
3212 ustring16 ret(left);
3213 ret += core::stringc(right);
3218 //! Appends a ustring16 and a short.
3219 inline ustring16 operator+(const short left, const ustring16& right)
3221 ustring16 ret((core::stringc(left)));
3227 //! Appends a ustring16 and an unsigned short.
3228 inline ustring16 operator+(const ustring16& left, const unsigned short right)
3230 ustring16 ret(left);
3231 ret += core::stringc(right);
3236 //! Appends a ustring16 and an unsigned short.
3237 inline ustring16 operator+(const unsigned short left, const ustring16& right)
3239 ustring16 ret((core::stringc(left)));
3245 //! Appends a ustring16 and an int.
3246 inline ustring16 operator+(const ustring16& left, const int right)
3248 ustring16 ret(left);
3249 ret += core::stringc(right);
3254 //! Appends a ustring16 and an int.
3255 inline ustring16 operator+(const int left, const ustring16& right)
3257 ustring16 ret((core::stringc(left)));
3263 //! Appends a ustring16 and an unsigned int.
3264 inline ustring16 operator+(const ustring16& left, const unsigned int right)
3266 ustring16 ret(left);
3267 ret += core::stringc(right);
3272 //! Appends a ustring16 and an unsigned int.
3273 inline ustring16 operator+(const unsigned int left, const ustring16& right)
3275 ustring16 ret((core::stringc(left)));
3281 //! Appends a ustring16 and a long.
3282 inline ustring16 operator+(const ustring16& left, const long right)
3284 ustring16 ret(left);
3285 ret += core::stringc(right);
3290 //! Appends a ustring16 and a long.
3291 inline ustring16 operator+(const long left, const ustring16& right)
3293 ustring16 ret((core::stringc(left)));
3299 //! Appends a ustring16 and an unsigned long.
3300 inline ustring16 operator+(const ustring16& left, const unsigned long right)
3302 ustring16 ret(left);
3303 ret += core::stringc(right);
3308 //! Appends a ustring16 and an unsigned long.
3309 inline ustring16 operator+(const unsigned long left, const ustring16& right)
3311 ustring16 ret((core::stringc(left)));
3317 //! Appends a ustring16 and a float.
3318 inline ustring16 operator+(const ustring16& left, const float right)
3320 ustring16 ret(left);
3321 ret += core::stringc(right);
3326 //! Appends a ustring16 and a float.
3327 inline ustring16 operator+(const float left, const ustring16& right)
3329 ustring16 ret((core::stringc(left)));
3335 //! Appends a ustring16 and a double.
3336 inline ustring16 operator+(const ustring16& left, const double right)
3338 ustring16 ret(left);
3339 ret += core::stringc(right);
3344 //! Appends a ustring16 and a double.
3345 inline ustring16 operator+(const double left, const ustring16& right)
3347 ustring16 ret((core::stringc(left)));
3353 //! Appends two ustring16s.
3354 inline ustring16&& operator+(const ustring16& left, ustring16&& right)
3356 right.insert(left, 0);
3357 return std::move(right);
3361 //! Appends two ustring16s.
3362 inline ustring16&& operator+(ustring16&& left, const ustring16& right)
3365 return std::move(left);
3369 //! Appends two ustring16s.
3370 inline ustring16&& operator+(ustring16&& left, ustring16&& right)
3372 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3373 (right.capacity() - right.size_raw() < left.size_raw()))
3376 return std::move(left);
3380 right.insert(left, 0);
3381 return std::move(right);
3386 //! Appends a ustring16 and a null-terminated unicode string.
3388 inline ustring16&& operator+(ustring16&& left, const B* const right)
3391 return std::move(left);
3395 //! Appends a ustring16 and a null-terminated unicode string.
3397 inline ustring16&& operator+(const B* const left, ustring16&& right)
3399 right.insert(left, 0);
3400 return std::move(right);
3404 //! Appends a ustring16 and an Irrlicht string.
3405 template <typename B>
3406 inline ustring16&& operator+(const string<B>& left, ustring16&& right)
3408 right.insert(left, 0);
3409 return std::move(right);
3413 //! Appends a ustring16 and an Irrlicht string.
3414 template <typename B>
3415 inline ustring16&& operator+(ustring16&& left, const string<B>& right)
3418 return std::move(left);
3422 //! Appends a ustring16 and a std::basic_string.
3423 template <typename B, typename A, typename BAlloc>
3424 inline ustring16&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16&& right)
3426 right.insert(core::ustring16(left), 0);
3427 return std::move(right);
3431 //! Appends a ustring16 and a std::basic_string.
3432 template <typename B, typename A, typename BAlloc>
3433 inline ustring16&& operator+(ustring16&& left, const std::basic_string<B, A, BAlloc>& right)
3436 return std::move(left);
3440 //! Appends a ustring16 and a char.
3441 inline ustring16 operator+(ustring16&& left, const char right)
3443 left.append((uchar32_t)right);
3444 return std::move(left);
3448 //! Appends a ustring16 and a char.
3449 inline ustring16 operator+(const char left, ustring16&& right)
3451 right.insert((uchar32_t)left, 0);
3452 return std::move(right);
3456 //! Appends a ustring16 and a uchar32_t.
3457 inline ustring16 operator+(ustring16&& left, const uchar32_t right)
3460 return std::move(left);
3464 //! Appends a ustring16 and a uchar32_t.
3465 inline ustring16 operator+(const uchar32_t left, ustring16&& right)
3467 right.insert(left, 0);
3468 return std::move(right);
3472 //! Appends a ustring16 and a short.
3473 inline ustring16 operator+(ustring16&& left, const short right)
3475 left.append(core::stringc(right));
3476 return std::move(left);
3480 //! Appends a ustring16 and a short.
3481 inline ustring16 operator+(const short left, ustring16&& right)
3483 right.insert(core::stringc(left), 0);
3484 return std::move(right);
3488 //! Appends a ustring16 and an unsigned short.
3489 inline ustring16 operator+(ustring16&& left, const unsigned short right)
3491 left.append(core::stringc(right));
3492 return std::move(left);
3496 //! Appends a ustring16 and an unsigned short.
3497 inline ustring16 operator+(const unsigned short left, ustring16&& right)
3499 right.insert(core::stringc(left), 0);
3500 return std::move(right);
3504 //! Appends a ustring16 and an int.
3505 inline ustring16 operator+(ustring16&& left, const int right)
3507 left.append(core::stringc(right));
3508 return std::move(left);
3512 //! Appends a ustring16 and an int.
3513 inline ustring16 operator+(const int left, ustring16&& right)
3515 right.insert(core::stringc(left), 0);
3516 return std::move(right);
3520 //! Appends a ustring16 and an unsigned int.
3521 inline ustring16 operator+(ustring16&& left, const unsigned int right)
3523 left.append(core::stringc(right));
3524 return std::move(left);
3528 //! Appends a ustring16 and an unsigned int.
3529 inline ustring16 operator+(const unsigned int left, ustring16&& right)
3531 right.insert(core::stringc(left), 0);
3532 return std::move(right);
3536 //! Appends a ustring16 and a long.
3537 inline ustring16 operator+(ustring16&& left, const long right)
3539 left.append(core::stringc(right));
3540 return std::move(left);
3544 //! Appends a ustring16 and a long.
3545 inline ustring16 operator+(const long left, ustring16&& right)
3547 right.insert(core::stringc(left), 0);
3548 return std::move(right);
3552 //! Appends a ustring16 and an unsigned long.
3553 inline ustring16 operator+(ustring16&& left, const unsigned long right)
3555 left.append(core::stringc(right));
3556 return std::move(left);
3560 //! Appends a ustring16 and an unsigned long.
3561 inline ustring16 operator+(const unsigned long left, ustring16&& right)
3563 right.insert(core::stringc(left), 0);
3564 return std::move(right);
3568 //! Appends a ustring16 and a float.
3569 inline ustring16 operator+(ustring16&& left, const float right)
3571 left.append(core::stringc(right));
3572 return std::move(left);
3576 //! Appends a ustring16 and a float.
3577 inline ustring16 operator+(const float left, ustring16&& right)
3579 right.insert(core::stringc(left), 0);
3580 return std::move(right);
3584 //! Appends a ustring16 and a double.
3585 inline ustring16 operator+(ustring16&& left, const double right)
3587 left.append(core::stringc(right));
3588 return std::move(left);
3592 //! Appends a ustring16 and a double.
3593 inline ustring16 operator+(const double left, ustring16&& right)
3595 right.insert(core::stringc(left), 0);
3596 return std::move(right);
3601 //! Writes a ustring16 to an ostream.
3602 inline std::ostream& operator<<(std::ostream& out, const ustring16& in)
3604 out << in.toUTF8_s().c_str();
3608 //! Writes a ustring16 to a wostream.
3609 inline std::wostream& operator<<(std::wostream& out, const ustring16& in)
3611 out << in.toWCHAR_s().c_str();
3615 } // end namespace core
3616 } // end namespace irr