2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
50 #ifndef USTRING_NO_STL
57 #include "irrAllocator.h"
60 #include "irrString.h"
63 //! UTF-16 surrogate start values.
64 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
65 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
67 //! Is a UTF-16 code point a surrogate?
68 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
69 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
70 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
76 // Define our character types.
77 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
78 typedef char32_t uchar32_t;
79 typedef char16_t uchar16_t;
80 typedef char uchar8_t;
82 typedef u32 uchar32_t;
83 typedef u16 uchar16_t;
93 //! The unicode replacement character. Used to replace invalid characters.
94 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
96 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
97 //! \param high The high value of the pair.
98 //! \param low The low value of the pair.
99 //! \return The UTF-32 character expressed by the surrogate pair.
100 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
102 // Convert the surrogate pair into a single UTF-32 character.
103 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
104 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
105 return (wu << 16) | x;
108 //! Swaps the endianness of a 16-bit value.
109 //! \return The new value.
110 inline uchar16_t swapEndian16(const uchar16_t& c)
112 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
115 //! Swaps the endianness of a 32-bit value.
116 //! \return The new value.
117 inline uchar32_t swapEndian32(const uchar32_t& c)
119 return ((c >> 24) & 0x000000FF) |
120 ((c >> 8) & 0x0000FF00) |
121 ((c << 8) & 0x00FF0000) |
122 ((c << 24) & 0xFF000000);
125 //! The Unicode byte order mark.
126 const u16 BOM = 0xFEFF;
128 //! The size of the Unicode byte order mark in terms of the Unicode character size.
129 const u8 BOM_UTF8_LEN = 3;
130 const u8 BOM_UTF16_LEN = 1;
131 const u8 BOM_UTF32_LEN = 1;
133 //! Unicode byte order marks for file operations.
134 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
135 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
136 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
137 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
138 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
140 //! The size in bytes of the Unicode byte marks for file operations.
141 const u8 BOM_ENCODE_UTF8_LEN = 3;
142 const u8 BOM_ENCODE_UTF16_LEN = 2;
143 const u8 BOM_ENCODE_UTF32_LEN = 4;
145 //! Unicode encoding type.
158 //! Unicode endianness.
166 //! Returns the specified unicode byte order mark in a byte array.
167 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
168 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
169 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
170 //! \return An array that contains a byte order mark.
171 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
173 #define COPY_ARRAY(source, size) \
174 memcpy(ret.pointer(), source, size); \
177 core::array<u8> ret(4);
181 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
184 #ifdef __BIG_ENDIAN__
185 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
187 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
191 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
194 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
197 #ifdef __BIG_ENDIAN__
198 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
200 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
204 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
207 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
210 // TODO sapier: fixed warning only,
211 // don't know if something needs to be done here
219 //! Detects if the given data stream starts with a unicode BOM.
220 //! \param data The data stream to check.
221 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
222 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
224 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
225 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
226 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
227 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
228 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
232 } // end namespace unicode
235 //! UTF-16 string class.
236 template <typename TAlloc = irrAllocator<uchar16_t> >
241 ///------------------///
242 /// iterator classes ///
243 ///------------------///
245 //! Access an element in a unicode string, allowing one to change it.
246 class _ustring16_iterator_access
249 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
251 //! Allow the class to be interpreted as a single UTF-32 character.
252 operator uchar32_t() const
257 //! Allow one to change the character in the unicode string.
258 //! \param c The new character to use.
260 _ustring16_iterator_access& operator=(const uchar32_t c)
266 //! Increments the value by 1.
268 _ustring16_iterator_access& operator++()
274 //! Increments the value by 1, returning the old value.
275 //! \return A unicode character.
276 uchar32_t operator++(int)
278 uchar32_t old = _get();
283 //! Decrements the value by 1.
285 _ustring16_iterator_access& operator--()
291 //! Decrements the value by 1, returning the old value.
292 //! \return A unicode character.
293 uchar32_t operator--(int)
295 uchar32_t old = _get();
300 //! Adds to the value by a specified amount.
301 //! \param val The amount to add to this character.
303 _ustring16_iterator_access& operator+=(int val)
309 //! Subtracts from the value by a specified amount.
310 //! \param val The amount to subtract from this character.
312 _ustring16_iterator_access& operator-=(int val)
318 //! Multiples the value by a specified amount.
319 //! \param val The amount to multiply this character by.
321 _ustring16_iterator_access& operator*=(int val)
327 //! Divides the value by a specified amount.
328 //! \param val The amount to divide this character by.
330 _ustring16_iterator_access& operator/=(int val)
336 //! Modulos the value by a specified amount.
337 //! \param val The amount to modulo this character by.
339 _ustring16_iterator_access& operator%=(int val)
345 //! Adds to the value by a specified amount.
346 //! \param val The amount to add to this character.
347 //! \return A unicode character.
348 uchar32_t operator+(int val) const
353 //! Subtracts from the value by a specified amount.
354 //! \param val The amount to subtract from this character.
355 //! \return A unicode character.
356 uchar32_t operator-(int val) const
361 //! Multiplies the value by a specified amount.
362 //! \param val The amount to multiply this character by.
363 //! \return A unicode character.
364 uchar32_t operator*(int val) const
369 //! Divides the value by a specified amount.
370 //! \param val The amount to divide this character by.
371 //! \return A unicode character.
372 uchar32_t operator/(int val) const
377 //! Modulos the value by a specified amount.
378 //! \param val The amount to modulo this character by.
379 //! \return A unicode character.
380 uchar32_t operator%(int val) const
386 //! Gets a uchar32_t from our current position.
387 uchar32_t _get() const
389 const uchar16_t* a = ref->c_str();
390 if (!UTF16_IS_SURROGATE(a[pos]))
391 return static_cast<uchar32_t>(a[pos]);
394 if (pos + 1 >= ref->size_raw())
397 return unicode::toUTF32(a[pos], a[pos + 1]);
401 //! Sets a uchar32_t at our current position.
402 void _set(uchar32_t c)
404 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
405 const uchar16_t* a = ref2->c_str();
408 // c will be multibyte, so split it up into the high and low surrogate pairs.
409 uchar16_t x = static_cast<uchar16_t>(c);
410 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
411 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
413 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
414 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
415 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
416 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
418 ref2->replace_raw(vh, static_cast<u32>(pos));
422 // c will be a single byte.
423 uchar16_t vh = static_cast<uchar16_t>(c);
425 // If the previous position was a surrogate pair, remove the extra byte.
426 if (UTF16_IS_SURROGATE_HI(a[pos]))
427 ref2->erase_raw(static_cast<u32>(pos) + 1);
429 ref2->replace_raw(vh, static_cast<u32>(pos));
433 const ustring16<TAlloc>* ref;
436 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
439 //! Iterator to iterate through a UTF-16 string.
440 #ifndef USTRING_NO_STL
441 class _ustring16_const_iterator : public std::iterator<
442 std::bidirectional_iterator_tag, // iterator_category
443 access, // value_type
444 ptrdiff_t, // difference_type
445 const access, // pointer
446 const access // reference
449 class _ustring16_const_iterator
453 typedef _ustring16_const_iterator _Iter;
454 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
455 typedef const access const_pointer;
456 typedef const access const_reference;
458 #ifndef USTRING_NO_STL
459 typedef typename _Base::value_type value_type;
460 typedef typename _Base::difference_type difference_type;
461 typedef typename _Base::difference_type distance_type;
462 typedef typename _Base::pointer pointer;
463 typedef const_reference reference;
465 typedef access value_type;
466 typedef u32 difference_type;
467 typedef u32 distance_type;
468 typedef const_pointer pointer;
469 typedef const_reference reference;
473 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
474 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
475 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
477 if (ref->size_raw() == 0 || p == 0)
480 // Go to the appropriate position.
482 u32 sr = ref->size_raw();
483 const uchar16_t* a = ref->c_str();
484 while (i != 0 && pos < sr)
486 if (UTF16_IS_SURROGATE_HI(a[pos]))
493 //! Test for equalness.
494 bool operator==(const _Iter& iter) const
496 if (ref == iter.ref && pos == iter.pos)
501 //! Test for unequalness.
502 bool operator!=(const _Iter& iter) const
504 if (ref != iter.ref || pos != iter.pos)
509 //! Switch to the next full character in the string.
512 if (pos == ref->size_raw()) return *this;
513 const uchar16_t* a = ref->c_str();
514 if (UTF16_IS_SURROGATE_HI(a[pos]))
515 pos += 2; // TODO: check for valid low surrogate?
517 if (pos > ref->size_raw()) pos = ref->size_raw();
521 //! Switch to the next full character in the string, returning the previous position.
522 _Iter operator++(int)
529 //! Switch to the previous full character in the string.
532 if (pos == 0) return *this;
533 const uchar16_t* a = ref->c_str();
535 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
540 //! Switch to the previous full character in the string, returning the previous position.
541 _Iter operator--(int)
548 //! Advance a specified number of full characters in the string.
550 _Iter& operator+=(const difference_type v)
552 if (v == 0) return *this;
553 if (v < 0) return operator-=(v * -1);
555 if (pos >= ref->size_raw())
558 // Go to the appropriate position.
559 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
561 u32 sr = ref->size_raw();
562 const uchar16_t* a = ref->c_str();
563 while (i != 0 && pos < sr)
565 if (UTF16_IS_SURROGATE_HI(a[pos]))
576 //! Go back a specified number of full characters in the string.
578 _Iter& operator-=(const difference_type v)
580 if (v == 0) return *this;
581 if (v > 0) return operator+=(v * -1);
586 // Go to the appropriate position.
587 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
589 const uchar16_t* a = ref->c_str();
590 while (i != 0 && pos != 0)
593 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
601 //! Return a new iterator that is a variable number of full characters forward from the current position.
602 _Iter operator+(const difference_type v) const
609 //! Return a new iterator that is a variable number of full characters backward from the current position.
610 _Iter operator-(const difference_type v) const
617 //! Returns the distance between two iterators.
618 difference_type operator-(const _Iter& iter) const
620 // Make sure we reference the same object!
622 return difference_type();
647 //! Accesses the full character at the iterator's position.
648 const_reference operator*() const
650 if (pos >= ref->size_raw())
652 const uchar16_t* a = ref->c_str();
653 u32 p = ref->size_raw();
654 if (UTF16_IS_SURROGATE_LO(a[p]))
656 reference ret(ref, p);
659 const_reference ret(ref, pos);
663 //! Accesses the full character at the iterator's position.
664 reference operator*()
666 if (pos >= ref->size_raw())
668 const uchar16_t* a = ref->c_str();
669 u32 p = ref->size_raw();
670 if (UTF16_IS_SURROGATE_LO(a[p]))
672 reference ret(ref, p);
675 reference ret(ref, pos);
679 //! Accesses the full character at the iterator's position.
680 const_pointer operator->() const
685 //! Accesses the full character at the iterator's position.
691 //! Is the iterator at the start of the string?
697 //! Is the iterator at the end of the string?
700 const uchar16_t* a = ref->c_str();
701 if (UTF16_IS_SURROGATE(a[pos]))
702 return (pos + 1) >= ref->size_raw();
703 else return pos >= ref->size_raw();
706 //! Moves the iterator to the start of the string.
712 //! Moves the iterator to the end of the string.
715 pos = ref->size_raw();
718 //! Returns the iterator's position.
719 //! \return The iterator's position.
726 const ustring16<TAlloc>* ref;
730 //! Iterator to iterate through a UTF-16 string.
731 class _ustring16_iterator : public _ustring16_const_iterator
734 typedef _ustring16_iterator _Iter;
735 typedef _ustring16_const_iterator _Base;
736 typedef typename _Base::const_pointer const_pointer;
737 typedef typename _Base::const_reference const_reference;
740 typedef typename _Base::value_type value_type;
741 typedef typename _Base::difference_type difference_type;
742 typedef typename _Base::distance_type distance_type;
743 typedef access pointer;
744 typedef access reference;
750 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
751 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
752 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
754 //! Accesses the full character at the iterator's position.
755 reference operator*() const
757 if (pos >= ref->size_raw())
759 const uchar16_t* a = ref->c_str();
760 u32 p = ref->size_raw();
761 if (UTF16_IS_SURROGATE_LO(a[p]))
763 reference ret(ref, p);
766 reference ret(ref, pos);
770 //! Accesses the full character at the iterator's position.
771 reference operator*()
773 if (pos >= ref->size_raw())
775 const uchar16_t* a = ref->c_str();
776 u32 p = ref->size_raw();
777 if (UTF16_IS_SURROGATE_LO(a[p]))
779 reference ret(ref, p);
782 reference ret(ref, pos);
786 //! Accesses the full character at the iterator's position.
787 pointer operator->() const
792 //! Accesses the full character at the iterator's position.
799 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
800 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
802 ///----------------------///
803 /// end iterator classes ///
804 ///----------------------///
806 //! Default constructor
808 : array(0), allocated(1), used(0)
810 #if __BYTE_ORDER == __BIG_ENDIAN
811 encoding = unicode::EUTFE_UTF16_BE;
813 encoding = unicode::EUTFE_UTF16_LE;
815 array = allocator.allocate(1); // new u16[1];
821 ustring16(const ustring16<TAlloc>& other)
822 : array(0), allocated(0), used(0)
824 #if __BYTE_ORDER == __BIG_ENDIAN
825 encoding = unicode::EUTFE_UTF16_BE;
827 encoding = unicode::EUTFE_UTF16_LE;
833 //! Constructor from other string types
834 template <class B, class A>
835 ustring16(const string<B, A>& other)
836 : array(0), allocated(0), used(0)
838 #if __BYTE_ORDER == __BIG_ENDIAN
839 encoding = unicode::EUTFE_UTF16_BE;
841 encoding = unicode::EUTFE_UTF16_LE;
847 #ifndef USTRING_NO_STL
848 //! Constructor from std::string
849 template <class B, class A, typename Alloc>
850 ustring16(const std::basic_string<B, A, Alloc>& other)
851 : array(0), allocated(0), used(0)
853 #if __BYTE_ORDER == __BIG_ENDIAN
854 encoding = unicode::EUTFE_UTF16_BE;
856 encoding = unicode::EUTFE_UTF16_LE;
858 *this = other.c_str();
862 //! Constructor from iterator.
863 template <typename Itr>
864 ustring16(Itr first, Itr last)
865 : array(0), allocated(0), used(0)
867 #if __BYTE_ORDER == __BIG_ENDIAN
868 encoding = unicode::EUTFE_UTF16_BE;
870 encoding = unicode::EUTFE_UTF16_LE;
872 reserve(std::distance(first, last));
875 for (; first != last; ++first)
876 append((uchar32_t)*first);
881 #ifndef USTRING_CPP0X_NEWLITERALS
882 //! Constructor for copying a character string from a pointer.
883 ustring16(const char* const c)
884 : array(0), allocated(0), used(0)
886 #if __BYTE_ORDER == __BIG_ENDIAN
887 encoding = unicode::EUTFE_UTF16_BE;
889 encoding = unicode::EUTFE_UTF16_LE;
892 loadDataStream(c, strlen(c));
893 //append((uchar8_t*)c);
897 //! Constructor for copying a character string from a pointer with a given length.
898 ustring16(const char* const c, u32 length)
899 : array(0), allocated(0), used(0)
901 #if __BYTE_ORDER == __BIG_ENDIAN
902 encoding = unicode::EUTFE_UTF16_BE;
904 encoding = unicode::EUTFE_UTF16_LE;
907 loadDataStream(c, length);
912 //! Constructor for copying a UTF-8 string from a pointer.
913 ustring16(const uchar8_t* const c)
914 : array(0), allocated(0), used(0)
916 #if __BYTE_ORDER == __BIG_ENDIAN
917 encoding = unicode::EUTFE_UTF16_BE;
919 encoding = unicode::EUTFE_UTF16_LE;
926 //! Constructor for copying a UTF-8 string from a single char.
927 ustring16(const char c)
928 : array(0), allocated(0), used(0)
930 #if __BYTE_ORDER == __BIG_ENDIAN
931 encoding = unicode::EUTFE_UTF16_BE;
933 encoding = unicode::EUTFE_UTF16_LE;
936 append((uchar32_t)c);
940 //! Constructor for copying a UTF-8 string from a pointer with a given length.
941 ustring16(const uchar8_t* const c, u32 length)
942 : array(0), allocated(0), used(0)
944 #if __BYTE_ORDER == __BIG_ENDIAN
945 encoding = unicode::EUTFE_UTF16_BE;
947 encoding = unicode::EUTFE_UTF16_LE;
954 //! Constructor for copying a UTF-16 string from a pointer.
955 ustring16(const uchar16_t* const c)
956 : array(0), allocated(0), used(0)
958 #if __BYTE_ORDER == __BIG_ENDIAN
959 encoding = unicode::EUTFE_UTF16_BE;
961 encoding = unicode::EUTFE_UTF16_LE;
968 //! Constructor for copying a UTF-16 string from a pointer with a given length
969 ustring16(const uchar16_t* const c, u32 length)
970 : array(0), allocated(0), used(0)
972 #if __BYTE_ORDER == __BIG_ENDIAN
973 encoding = unicode::EUTFE_UTF16_BE;
975 encoding = unicode::EUTFE_UTF16_LE;
982 //! Constructor for copying a UTF-32 string from a pointer.
983 ustring16(const uchar32_t* const c)
984 : array(0), allocated(0), used(0)
986 #if __BYTE_ORDER == __BIG_ENDIAN
987 encoding = unicode::EUTFE_UTF16_BE;
989 encoding = unicode::EUTFE_UTF16_LE;
996 //! Constructor for copying a UTF-32 from a pointer with a given length.
997 ustring16(const uchar32_t* const c, u32 length)
998 : array(0), allocated(0), used(0)
1000 #if __BYTE_ORDER == __BIG_ENDIAN
1001 encoding = unicode::EUTFE_UTF16_BE;
1003 encoding = unicode::EUTFE_UTF16_LE;
1010 //! Constructor for copying a wchar_t string from a pointer.
1011 ustring16(const wchar_t* const c)
1012 : array(0), allocated(0), used(0)
1014 #if __BYTE_ORDER == __BIG_ENDIAN
1015 encoding = unicode::EUTFE_UTF16_BE;
1017 encoding = unicode::EUTFE_UTF16_LE;
1020 if (sizeof(wchar_t) == 4)
1021 append(reinterpret_cast<const uchar32_t* const>(c));
1022 else if (sizeof(wchar_t) == 2)
1023 append(reinterpret_cast<const uchar16_t* const>(c));
1024 else if (sizeof(wchar_t) == 1)
1025 append(reinterpret_cast<const uchar8_t* const>(c));
1029 //! Constructor for copying a wchar_t string from a pointer with a given length.
1030 ustring16(const wchar_t* const c, u32 length)
1031 : array(0), allocated(0), used(0)
1033 #if __BYTE_ORDER == __BIG_ENDIAN
1034 encoding = unicode::EUTFE_UTF16_BE;
1036 encoding = unicode::EUTFE_UTF16_LE;
1039 if (sizeof(wchar_t) == 4)
1040 append(reinterpret_cast<const uchar32_t* const>(c), length);
1041 else if (sizeof(wchar_t) == 2)
1042 append(reinterpret_cast<const uchar16_t* const>(c), length);
1043 else if (sizeof(wchar_t) == 1)
1044 append(reinterpret_cast<const uchar8_t* const>(c), length);
1048 #ifdef USTRING_CPP0X
1049 //! Constructor for moving a ustring16
1050 ustring16(ustring16<TAlloc>&& other)
1051 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1053 //std::cout << "MOVE constructor" << std::endl;
1055 other.allocated = 0;
1064 allocator.deallocate(array); // delete [] array;
1068 //! Assignment operator
1069 ustring16& operator=(const ustring16<TAlloc>& other)
1074 used = other.size_raw();
1075 if (used >= allocated)
1077 allocator.deallocate(array); // delete [] array;
1078 allocated = used + 1;
1079 array = allocator.allocate(used + 1); //new u16[used];
1082 const uchar16_t* p = other.c_str();
1083 for (u32 i=0; i<=used; ++i, ++p)
1088 // Validate our new UTF-16 string.
1095 #ifdef USTRING_CPP0X
1096 //! Move assignment operator
1097 ustring16& operator=(ustring16<TAlloc>&& other)
1101 //std::cout << "MOVE operator=" << std::endl;
1102 allocator.deallocate(array);
1104 array = other.array;
1105 allocated = other.allocated;
1106 encoding = other.encoding;
1116 //! Assignment operator for other string types
1117 template <class B, class A>
1118 ustring16<TAlloc>& operator=(const string<B, A>& other)
1120 *this = other.c_str();
1125 //! Assignment operator for UTF-8 strings
1126 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1130 array = allocator.allocate(1); //new u16[1];
1135 if (!c) return *this;
1137 //! Append our string now.
1143 //! Assignment operator for UTF-16 strings
1144 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1148 array = allocator.allocate(1); //new u16[1];
1153 if (!c) return *this;
1155 //! Append our string now.
1161 //! Assignment operator for UTF-32 strings
1162 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1166 array = allocator.allocate(1); //new u16[1];
1171 if (!c) return *this;
1173 //! Append our string now.
1179 //! Assignment operator for wchar_t strings.
1180 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1181 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1182 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1183 ustring16<TAlloc>& operator=(const wchar_t* const c)
1185 if (sizeof(wchar_t) == 4)
1186 *this = reinterpret_cast<const uchar32_t* const>(c);
1187 else if (sizeof(wchar_t) == 2)
1188 *this = reinterpret_cast<const uchar16_t* const>(c);
1189 else if (sizeof(wchar_t) == 1)
1190 *this = reinterpret_cast<const uchar8_t* const>(c);
1196 //! Assignment operator for other strings.
1197 /** Note that this assumes that a correct unicode string is stored in the string. **/
1199 ustring16<TAlloc>& operator=(const B* const c)
1202 *this = reinterpret_cast<const uchar32_t* const>(c);
1203 else if (sizeof(B) == 2)
1204 *this = reinterpret_cast<const uchar16_t* const>(c);
1205 else if (sizeof(B) == 1)
1206 *this = reinterpret_cast<const uchar8_t* const>(c);
1212 //! Direct access operator
1213 access operator [](const u32 index)
1215 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1216 iterator iter(*this, index);
1217 return iter.operator*();
1221 //! Direct access operator
1222 const access operator [](const u32 index) const
1224 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1225 const_iterator iter(*this, index);
1226 return iter.operator*();
1230 //! Equality operator
1231 bool operator ==(const uchar16_t* const str) const
1237 for(i=0; array[i] && str[i]; ++i)
1238 if (array[i] != str[i])
1241 return !array[i] && !str[i];
1245 //! Equality operator
1246 bool operator ==(const ustring16<TAlloc>& other) const
1248 for(u32 i=0; array[i] && other.array[i]; ++i)
1249 if (array[i] != other.array[i])
1252 return used == other.used;
1256 //! Is smaller comparator
1257 bool operator <(const ustring16<TAlloc>& other) const
1259 for(u32 i=0; array[i] && other.array[i]; ++i)
1261 s32 diff = array[i] - other.array[i];
1266 return used < other.used;
1270 //! Inequality operator
1271 bool operator !=(const uchar16_t* const str) const
1273 return !(*this == str);
1277 //! Inequality operator
1278 bool operator !=(const ustring16<TAlloc>& other) const
1280 return !(*this == other);
1284 //! Returns the length of a ustring16 in full characters.
1285 //! \return Length of a ustring16 in full characters.
1288 const_iterator i(*this, 0);
1299 //! Informs if the ustring is empty or not.
1300 //! \return True if the ustring is empty, false if not.
1303 return (size_raw() == 0);
1307 //! Returns a pointer to the raw UTF-16 string data.
1308 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1309 const uchar16_t* c_str() const
1315 //! Compares the first n characters of this string with another.
1316 //! \param other Other string to compare to.
1317 //! \param n Number of characters to compare.
1318 //! \return True if the n first characters of both strings are equal.
1319 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1322 const uchar16_t* oa = other.c_str();
1323 for(i=0; array[i] && oa[i] && i < n; ++i)
1324 if (array[i] != oa[i])
1327 // if one (or both) of the strings was smaller then they
1328 // are only equal if they have the same length
1329 return (i == n) || (used == other.used);
1333 //! Compares the first n characters of this string with another.
1334 //! \param str Other string to compare to.
1335 //! \param n Number of characters to compare.
1336 //! \return True if the n first characters of both strings are equal.
1337 bool equalsn(const uchar16_t* const str, u32 n) const
1342 for(i=0; array[i] && str[i] && i < n; ++i)
1343 if (array[i] != str[i])
1346 // if one (or both) of the strings was smaller then they
1347 // are only equal if they have the same length
1348 return (i == n) || (array[i] == 0 && str[i] == 0);
1352 //! Appends a character to this ustring16
1353 //! \param character The character to append.
1354 //! \return A reference to our current string.
1355 ustring16<TAlloc>& append(uchar32_t character)
1357 if (used + 2 >= allocated)
1358 reallocate(used + 2);
1360 if (character > 0xFFFF)
1364 // character will be multibyte, so split it up into a surrogate pair.
1365 uchar16_t x = static_cast<uchar16_t>(character);
1366 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1367 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1374 array[used-1] = character;
1382 //! Appends a UTF-8 string to this ustring16
1383 //! \param other The UTF-8 string to append.
1384 //! \param length The length of the string to append.
1385 //! \return A reference to our current string.
1386 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1391 // Determine if the string is long enough for a BOM.
1393 const uchar8_t* p = other;
1397 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1400 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1401 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1403 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1404 c_bom = unicode::EUTFE_UTF8;
1407 // If a BOM was found, don't include it in the string.
1408 const uchar8_t* c2 = other;
1409 if (c_bom != unicode::EUTFE_NONE)
1411 c2 = other + unicode::BOM_UTF8_LEN;
1412 length -= unicode::BOM_UTF8_LEN;
1415 // Calculate the size of the string to read in.
1421 } while(*p++ && len < length);
1425 // If we need to grow the array, do it now.
1426 if (used + len >= allocated)
1427 reallocate(used + (len * 2));
1430 // Convert UTF-8 to UTF-16.
1432 for (u32 l = 0; l<len;)
1435 if (((c2[l] >> 6) & 0x03) == 0x02)
1436 { // Invalid continuation byte.
1437 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1440 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1441 { // Invalid byte - overlong encoding.
1442 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1445 else if ((c2[l] & 0xF8) == 0xF0)
1446 { // 4 bytes UTF-8, 2 bytes UTF-16.
1447 // Check for a full string.
1450 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1458 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1459 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1460 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1463 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1469 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1470 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1471 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1472 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1474 // Split v up into a surrogate pair.
1475 uchar16_t x = static_cast<uchar16_t>(v);
1476 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1477 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1482 ++used; // Using two shorts this time, so increase used by 1.
1484 else if ((c2[l] & 0xF0) == 0xE0)
1485 { // 3 bytes UTF-8, 1 byte UTF-16.
1486 // Check for a full string.
1489 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1497 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1498 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1501 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1507 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1508 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1509 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1513 else if ((c2[l] & 0xE0) == 0xC0)
1514 { // 2 bytes UTF-8, 1 byte UTF-16.
1515 // Check for a full string.
1518 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1524 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1526 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1532 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1533 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1534 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1539 { // 1 byte UTF-8, 1 byte UTF-16.
1542 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1543 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1545 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1551 // Validate our new UTF-16 string.
1558 //! Appends a UTF-16 string to this ustring16
1559 //! \param other The UTF-16 string to append.
1560 //! \param length The length of the string to append.
1561 //! \return A reference to our current string.
1562 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1567 // Determine if the string is long enough for a BOM.
1569 const uchar16_t* p = other;
1573 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1575 // Check for the BOM to determine the string's endianness.
1576 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1577 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1578 c_end = unicode::EUTFEE_LITTLE;
1579 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1580 c_end = unicode::EUTFEE_BIG;
1582 // If a BOM was found, don't include it in the string.
1583 const uchar16_t* c2 = other;
1584 if (c_end != unicode::EUTFEE_NATIVE)
1586 c2 = other + unicode::BOM_UTF16_LEN;
1587 length -= unicode::BOM_UTF16_LEN;
1590 // Calculate the size of the string to read in.
1596 } while(*p++ && len < length);
1600 // If we need to grow the size of the array, do it now.
1601 if (used + len >= allocated)
1602 reallocate(used + (len * 2));
1606 // Copy the string now.
1607 unicode::EUTF_ENDIAN m_end = getEndianness();
1608 for (u32 l = start; l < start + len; ++l)
1610 array[l] = (uchar16_t)c2[l];
1611 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1612 array[l] = unicode::swapEndian16(array[l]);
1617 // Validate our new UTF-16 string.
1623 //! Appends a UTF-32 string to this ustring16
1624 //! \param other The UTF-32 string to append.
1625 //! \param length The length of the string to append.
1626 //! \return A reference to our current string.
1627 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1632 // Check for the BOM to determine the string's endianness.
1633 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1634 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1635 c_end = unicode::EUTFEE_LITTLE;
1636 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1637 c_end = unicode::EUTFEE_BIG;
1639 // If a BOM was found, don't include it in the string.
1640 const uchar32_t* c2 = other;
1641 if (c_end != unicode::EUTFEE_NATIVE)
1643 c2 = other + unicode::BOM_UTF32_LEN;
1644 length -= unicode::BOM_UTF32_LEN;
1647 // Calculate the size of the string to read in.
1649 const uchar32_t* p = c2;
1653 } while(*p++ && len < length);
1657 // If we need to grow the size of the array, do it now.
1658 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1659 if (used + (len * 2) >= allocated)
1660 reallocate(used + ((len * 2) * 2));
1663 // Convert UTF-32 to UTF-16.
1664 unicode::EUTF_ENDIAN m_end = getEndianness();
1666 for (u32 l = 0; l<len; ++l)
1670 uchar32_t ch = c2[l];
1671 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1672 ch = unicode::swapEndian32(ch);
1676 // Split ch up into a surrogate pair as it is over 16 bits long.
1677 uchar16_t x = static_cast<uchar16_t>(ch);
1678 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1679 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1682 ++used; // Using two shorts, so increased used again.
1684 else if (ch >= 0xD800 && ch <= 0xDFFF)
1686 // Between possible UTF-16 surrogates (invalid!)
1687 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1689 else array[pos++] = static_cast<uchar16_t>(ch);
1693 // Validate our new UTF-16 string.
1700 //! Appends a ustring16 to this ustring16
1701 //! \param other The string to append to this one.
1702 //! \return A reference to our current string.
1703 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1705 const uchar16_t* oa = other.c_str();
1707 u32 len = other.size_raw();
1709 if (used + len >= allocated)
1710 reallocate(used + len);
1712 for (u32 l=0; l<len; ++l)
1713 array[used+l] = oa[l];
1722 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1723 //! \param other The string to append to this one.
1724 //! \param length How many characters of the other string to add to this one.
1725 //! \return A reference to our current string.
1726 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1728 if (other.size() == 0)
1731 if (other.size() < length)
1737 if (used + length * 2 >= allocated)
1738 reallocate(used + length * 2);
1740 const_iterator iter(other, 0);
1742 while (!iter.atEnd() && l)
1744 uchar32_t c = *iter;
1754 //! Reserves some memory.
1755 //! \param count The amount of characters to reserve.
1756 void reserve(u32 count)
1758 if (count < allocated)
1765 //! Finds first occurrence of character.
1766 //! \param c The character to search for.
1767 //! \return Position where the character has been found, or -1 if not found.
1768 s32 findFirst(uchar32_t c) const
1770 const_iterator i(*this, 0);
1785 //! Finds first occurrence of a character of a list.
1786 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1787 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1788 //! \return Position where one of the characters has been found, or -1 if not found.
1789 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1794 const_iterator i(*this, 0);
1800 for (u32 j=0; j<count; ++j)
1811 //! Finds first position of a character not in a given list.
1812 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1813 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1814 //! \return Position where the character has been found, or -1 if not found.
1815 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1820 const_iterator i(*this, 0);
1827 for (j=0; j<count; ++j)
1840 //! Finds last position of a character not in a given list.
1841 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1842 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1843 //! \return Position where the character has been found, or -1 if not found.
1844 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1849 const_iterator i(end());
1852 s32 pos = size() - 1;
1853 while (!i.atStart())
1857 for (j=0; j<count; ++j)
1870 //! Finds next occurrence of character.
1871 //! \param c The character to search for.
1872 //! \param startPos The position in the string to start searching.
1873 //! \return Position where the character has been found, or -1 if not found.
1874 s32 findNext(uchar32_t c, u32 startPos) const
1876 const_iterator i(*this, startPos);
1892 //! Finds last occurrence of character.
1893 //! \param c The character to search for.
1894 //! \param start The start position of the reverse search ( default = -1, on end ).
1895 //! \return Position where the character has been found, or -1 if not found.
1896 s32 findLast(uchar32_t c, s32 start = -1) const
1899 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1901 const_iterator i(*this, start);
1903 while (!i.atStart())
1915 //! Finds last occurrence of a character in a list.
1916 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1917 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1918 //! \return Position where one of the characters has been found, or -1 if not found.
1919 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1924 const_iterator i(end());
1928 while (!i.atStart())
1931 for (u32 j=0; j<count; ++j)
1942 //! Finds another ustring16 in this ustring16.
1943 //! \param str The string to find.
1944 //! \param start The start position of the search.
1945 //! \return Positions where the ustring16 has been found, or -1 if not found.
1946 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1948 u32 my_size = size();
1949 u32 their_size = str.size();
1951 if (their_size == 0 || my_size - start < their_size)
1954 const_iterator i(*this, start);
1959 const_iterator i2(i);
1960 const_iterator j(str, 0);
1961 uchar32_t t1 = (uchar32_t)*i2;
1962 uchar32_t t2 = (uchar32_t)*j;
1969 t1 = (uchar32_t)*i2;
1980 //! Finds another ustring16 in this ustring16.
1981 //! \param str The string to find.
1982 //! \param start The start position of the search.
1983 //! \return Positions where the string has been found, or -1 if not found.
1984 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1986 const uchar16_t* data = str.c_str();
1997 for (u32 i=start; i<=used-len; ++i)
2001 while(data[j] && array[i+j] == data[j])
2013 //! Returns a substring.
2014 //! \param begin: Start of substring.
2015 //! \param length: Length of substring.
2016 //! \return A reference to our current string.
2017 ustring16<TAlloc> subString(u32 begin, s32 length) const
2020 // if start after ustring16
2021 // or no proper substring length
2022 if ((length <= 0) || (begin>=len))
2023 return ustring16<TAlloc>("");
2024 // clamp length to maximal value
2025 if ((length+begin) > len)
2028 ustring16<TAlloc> o;
2029 o.reserve((length+1) * 2);
2031 const_iterator i(*this, begin);
2032 while (!i.atEnd() && length)
2043 //! Appends a character to this ustring16.
2044 //! \param c Character to append.
2045 //! \return A reference to our current string.
2046 ustring16<TAlloc>& operator += (char c)
2048 append((uchar32_t)c);
2053 //! Appends a character to this ustring16.
2054 //! \param c Character to append.
2055 //! \return A reference to our current string.
2056 ustring16<TAlloc>& operator += (uchar32_t c)
2063 //! Appends a number to this ustring16.
2064 //! \param c Number to append.
2065 //! \return A reference to our current string.
2066 ustring16<TAlloc>& operator += (short c)
2068 append(core::stringc(c));
2073 //! Appends a number to this ustring16.
2074 //! \param c Number to append.
2075 //! \return A reference to our current string.
2076 ustring16<TAlloc>& operator += (unsigned short c)
2078 append(core::stringc(c));
2083 #ifdef USTRING_CPP0X_NEWLITERALS
2084 //! Appends a number to this ustring16.
2085 //! \param c Number to append.
2086 //! \return A reference to our current string.
2087 ustring16<TAlloc>& operator += (int c)
2089 append(core::stringc(c));
2094 //! Appends a number to this ustring16.
2095 //! \param c Number to append.
2096 //! \return A reference to our current string.
2097 ustring16<TAlloc>& operator += (unsigned int c)
2099 append(core::stringc(c));
2105 //! Appends a number to this ustring16.
2106 //! \param c Number to append.
2107 //! \return A reference to our current string.
2108 ustring16<TAlloc>& operator += (long c)
2110 append(core::stringc(c));
2115 //! Appends a number to this ustring16.
2116 //! \param c Number to append.
2117 //! \return A reference to our current string.
2118 ustring16<TAlloc>& operator += (unsigned long c)
2120 append(core::stringc(c));
2125 //! Appends a number to this ustring16.
2126 //! \param c Number to append.
2127 //! \return A reference to our current string.
2128 ustring16<TAlloc>& operator += (double c)
2130 append(core::stringc(c));
2135 //! Appends a char ustring16 to this ustring16.
2136 //! \param c Char ustring16 to append.
2137 //! \return A reference to our current string.
2138 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2145 //! Appends a ustring16 to this ustring16.
2146 //! \param other ustring16 to append.
2147 //! \return A reference to our current string.
2148 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2155 //! Replaces all characters of a given type with another one.
2156 //! \param toReplace Character to replace.
2157 //! \param replaceWith Character replacing the old one.
2158 //! \return A reference to our current string.
2159 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2161 iterator i(*this, 0);
2164 typename ustring16<TAlloc>::access a = *i;
2165 if ((uchar32_t)a == toReplace)
2173 //! Replaces all instances of a string with another one.
2174 //! \param toReplace The string to replace.
2175 //! \param replaceWith The string replacing the old one.
2176 //! \return A reference to our current string.
2177 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2179 if (toReplace.size() == 0)
2182 const uchar16_t* other = toReplace.c_str();
2183 const uchar16_t* replace = replaceWith.c_str();
2184 const u32 other_size = toReplace.size_raw();
2185 const u32 replace_size = replaceWith.size_raw();
2187 // Determine the delta. The algorithm will change depending on the delta.
2188 s32 delta = replace_size - other_size;
2190 // A character for character replace. The string will not shrink or grow.
2194 while ((pos = find_raw(other, pos)) != -1)
2196 for (u32 i = 0; i < replace_size; ++i)
2197 array[pos + i] = replace[i];
2203 // We are going to be removing some characters. The string will shrink.
2207 for (u32 pos = 0; pos <= used; ++i, ++pos)
2209 // Is this potentially a match?
2210 if (array[pos] == *other)
2212 // Check to see if we have a match.
2214 for (j = 0; j < other_size; ++j)
2216 if (array[pos + j] != other[j])
2220 // If we have a match, replace characters.
2221 if (j == other_size)
2223 for (j = 0; j < replace_size; ++j)
2224 array[i + j] = replace[j];
2225 i += replace_size - 1;
2226 pos += other_size - 1;
2231 // No match found, just copy characters.
2232 array[i - 1] = array[pos];
2240 // We are going to be adding characters, so the string size will increase.
2241 // Count the number of times toReplace exists in the string so we can allocate the new size.
2244 while ((pos = find_raw(other, pos)) != -1)
2250 // Re-allocate the string now, if needed.
2251 u32 len = delta * find_count;
2252 if (used + len >= allocated)
2253 reallocate(used + len);
2257 while ((pos = find_raw(other, pos)) != -1)
2259 uchar16_t* start = array + pos + other_size - 1;
2260 uchar16_t* ptr = array + used;
2261 uchar16_t* end = array + used + delta;
2263 // Shift characters to make room for the string.
2264 while (ptr != start)
2271 // Add the new string now.
2272 for (u32 i = 0; i < replace_size; ++i)
2273 array[pos + i] = replace[i];
2275 pos += replace_size;
2279 // Terminate the string and return ourself.
2285 //! Removes characters from a ustring16..
2286 //! \param c The character to remove.
2287 //! \return A reference to our current string.
2288 ustring16<TAlloc>& remove(uchar32_t c)
2292 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2293 for (u32 i=0; i<=used; ++i)
2296 if (!UTF16_IS_SURROGATE_HI(array[i]))
2298 else if (i + 1 <= used)
2300 // Convert the surrogate pair into a single UTF-32 character.
2301 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2303 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2311 array[pos++] = array[i];
2313 array[pos++] = array[++i];
2321 //! Removes a ustring16 from the ustring16.
2322 //! \param toRemove The string to remove.
2323 //! \return A reference to our current string.
2324 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2326 u32 size = toRemove.size_raw();
2327 if (size == 0) return *this;
2329 const uchar16_t* tra = toRemove.c_str();
2332 for (u32 i=0; i<=used; ++i)
2337 if (array[i + j] != tra[j])
2348 array[pos++] = array[i];
2356 //! Removes characters from the ustring16.
2357 //! \param characters The characters to remove.
2358 //! \return A reference to our current string.
2359 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2361 if (characters.size_raw() == 0)
2366 const_iterator iter(characters);
2367 for (u32 i=0; i<=used; ++i)
2370 if (!UTF16_IS_SURROGATE_HI(array[i]))
2372 else if (i + 1 <= used)
2374 // Convert the surrogate pair into a single UTF-32 character.
2375 uc32 = unicode::toUTF32(array[i], array[i+1]);
2377 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2381 while (!iter.atEnd())
2383 uchar32_t c = *iter;
2386 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2395 array[pos++] = array[i];
2397 array[pos++] = array[++i];
2405 //! Trims the ustring16.
2406 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2407 //! \param whitespace The characters that are to be considered as whitespace.
2408 //! \return A reference to our current string.
2409 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2411 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2413 // find start and end of the substring without the specified characters
2414 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2418 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2420 return (*this = subString(begin, (end +1) - begin));
2424 //! Erases a character from the ustring16.
2425 //! May be slow, because all elements following after the erased element have to be copied.
2426 //! \param index Index of element to be erased.
2427 //! \return A reference to our current string.
2428 ustring16<TAlloc>& erase(u32 index)
2430 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2432 iterator i(*this, index);
2435 u32 len = (t > 0xFFFF ? 2 : 1);
2437 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2438 array[j - len] = array[j];
2447 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2448 //! \return A reference to our current string.
2449 ustring16<TAlloc>& validate()
2451 // Validate all unicode characters.
2452 for (u32 i=0; i<allocated; ++i)
2454 // Terminate on existing null.
2460 if (UTF16_IS_SURROGATE(array[i]))
2462 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2463 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2464 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2465 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2468 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2469 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2476 used = allocated - 1;
2483 //! Gets the last char of the ustring16, or 0.
2484 //! \return The last char of the ustring16, or 0.
2485 uchar32_t lastChar() const
2490 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2492 // Make sure we have a paired surrogate.
2496 // Check for an invalid surrogate.
2497 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2500 // Convert the surrogate pair into a single UTF-32 character.
2501 return unicode::toUTF32(array[used-2], array[used-1]);
2505 return array[used-1];
2510 //! Split the ustring16 into parts.
2511 /** This method will split a ustring16 at certain delimiter characters
2512 into the container passed in as reference. The type of the container
2513 has to be given as template parameter. It must provide a push_back and
2515 \param ret The result container
2516 \param c C-style ustring16 of delimiter characters
2517 \param count Number of delimiter characters
2518 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2519 container. If two delimiters occur without a character in between, an
2520 empty substring would be placed in the result. If this flag is set,
2521 only non-empty strings are stored.
2522 \param keepSeparators Flag which allows to add the separator to the
2523 result ustring16. If this flag is true, the concatenation of the
2524 substrings results in the original ustring16. Otherwise, only the
2525 characters between the delimiters are returned.
2526 \return The number of resulting substrings
2528 template<class container>
2529 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2534 const_iterator i(*this);
2535 const u32 oldSize=ret.size();
2539 bool lastWasSeparator = false;
2543 bool foundSeparator = false;
2544 for (u32 j=0; j<count; ++j)
2548 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2550 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2551 foundSeparator = true;
2552 lastpos = (keepSeparators ? pos : pos + 1);
2553 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2557 lastWasSeparator = foundSeparator;
2563 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2564 return ret.size()-oldSize;
2568 //! Split the ustring16 into parts.
2569 /** This method will split a ustring16 at certain delimiter characters
2570 into the container passed in as reference. The type of the container
2571 has to be given as template parameter. It must provide a push_back and
2573 \param ret The result container
2574 \param c A unicode string of delimiter characters
2575 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2576 container. If two delimiters occur without a character in between, an
2577 empty substring would be placed in the result. If this flag is set,
2578 only non-empty strings are stored.
2579 \param keepSeparators Flag which allows to add the separator to the
2580 result ustring16. If this flag is true, the concatenation of the
2581 substrings results in the original ustring16. Otherwise, only the
2582 characters between the delimiters are returned.
2583 \return The number of resulting substrings
2585 template<class container>
2586 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2588 core::array<uchar32_t> v = c.toUTF32();
2589 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2593 //! Gets the size of the allocated memory buffer for the string.
2594 //! \return The size of the allocated memory buffer.
2595 u32 capacity() const
2601 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2602 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2603 u32 size_raw() const
2609 //! Inserts a character into the string.
2610 //! \param c The character to insert.
2611 //! \param pos The position to insert the character.
2612 //! \return A reference to our current string.
2613 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2615 u8 len = (c > 0xFFFF ? 2 : 1);
2617 if (used + len >= allocated)
2618 reallocate(used + len);
2622 iterator iter(*this, pos);
2623 for (u32 i = used - 2; i > iter.getPos(); --i)
2624 array[i] = array[i - len];
2628 // c will be multibyte, so split it up into a surrogate pair.
2629 uchar16_t x = static_cast<uchar16_t>(c);
2630 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2631 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2632 array[iter.getPos()] = vh;
2633 array[iter.getPos()+1] = vl;
2637 array[iter.getPos()] = static_cast<uchar16_t>(c);
2644 //! Inserts a string into the string.
2645 //! \param c The string to insert.
2646 //! \param pos The position to insert the string.
2647 //! \return A reference to our current string.
2648 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2650 u32 len = c.size_raw();
2651 if (len == 0) return *this;
2653 if (used + len >= allocated)
2654 reallocate(used + len);
2658 iterator iter(*this, pos);
2659 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2660 array[i] = array[i - len];
2662 const uchar16_t* s = c.c_str();
2663 for (u32 i = 0; i < len; ++i)
2674 //! Inserts a character into the string.
2675 //! \param c The character to insert.
2676 //! \param pos The position to insert the character.
2677 //! \return A reference to our current string.
2678 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2680 if (used + 1 >= allocated)
2681 reallocate(used + 1);
2685 for (u32 i = used - 1; i > pos; --i)
2686 array[i] = array[i - 1];
2694 //! Removes a character from string.
2695 //! \param pos Position of the character to remove.
2696 //! \return A reference to our current string.
2697 ustring16<TAlloc>& erase_raw(u32 pos)
2699 for (u32 i=pos; i<=used; ++i)
2701 array[i] = array[i + 1];
2709 //! Replaces a character in the string.
2710 //! \param c The new character.
2711 //! \param pos The position of the character to replace.
2712 //! \return A reference to our current string.
2713 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2720 //! Returns an iterator to the beginning of the string.
2721 //! \return An iterator to the beginning of the string.
2724 iterator i(*this, 0);
2729 //! Returns an iterator to the beginning of the string.
2730 //! \return An iterator to the beginning of the string.
2731 const_iterator begin() const
2733 const_iterator i(*this, 0);
2738 //! Returns an iterator to the beginning of the string.
2739 //! \return An iterator to the beginning of the string.
2740 const_iterator cbegin() const
2742 const_iterator i(*this, 0);
2747 //! Returns an iterator to the end of the string.
2748 //! \return An iterator to the end of the string.
2751 iterator i(*this, 0);
2757 //! Returns an iterator to the end of the string.
2758 //! \return An iterator to the end of the string.
2759 const_iterator end() const
2761 const_iterator i(*this, 0);
2767 //! Returns an iterator to the end of the string.
2768 //! \return An iterator to the end of the string.
2769 const_iterator cend() const
2771 const_iterator i(*this, 0);
2777 //! Converts the string to a UTF-8 encoded string.
2778 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2779 //! \return A string containing the UTF-8 encoded string.
2780 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2782 core::string<uchar8_t> ret;
2783 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2784 const_iterator iter(*this, 0);
2786 // Add the byte order mark if the user wants it.
2789 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2790 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2791 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2794 while (!iter.atEnd())
2796 uchar32_t c = *iter;
2799 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2800 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2801 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2802 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2810 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2811 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2812 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2819 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2820 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2826 ret.append(static_cast<uchar8_t>(c));
2834 //! Converts the string to a UTF-8 encoded string array.
2835 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2836 //! \return An array containing the UTF-8 encoded string.
2837 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2839 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2840 const_iterator iter(*this, 0);
2842 // Add the byte order mark if the user wants it.
2845 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2846 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2847 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2850 while (!iter.atEnd())
2852 uchar32_t c = *iter;
2855 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2856 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2857 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2858 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2866 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2867 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2868 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2875 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2876 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2882 ret.push_back(static_cast<uchar8_t>(c));
2891 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2892 //! Converts the string to a UTF-16 encoded string.
2893 //! \param endian The desired endianness of the string.
2894 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2895 //! \return A string containing the UTF-16 encoded string.
2896 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2898 core::string<char16_t> ret;
2899 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2901 // Add the BOM if specified.
2904 if (endian == unicode::EUTFEE_NATIVE)
2905 ret[0] = unicode::BOM;
2906 else if (endian == unicode::EUTFEE_LITTLE)
2908 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2909 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2910 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2914 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2915 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2916 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2921 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2923 char16_t* ptr = ret.c_str();
2924 for (u32 i = 0; i < ret.size(); ++i)
2925 *ptr++ = unicode::swapEndian16(*ptr);
2932 //! Converts the string to a UTF-16 encoded string array.
2933 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2934 //! \param endian The desired endianness of the string.
2935 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2936 //! \return An array containing the UTF-16 encoded string.
2937 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2939 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2940 uchar16_t* ptr = ret.pointer();
2942 // Add the BOM if specified.
2945 if (endian == unicode::EUTFEE_NATIVE)
2946 *ptr = unicode::BOM;
2947 else if (endian == unicode::EUTFEE_LITTLE)
2949 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2950 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2951 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2955 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2956 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2957 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2962 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2963 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2965 for (u32 i = 0; i <= used; ++i)
2966 ptr[i] = unicode::swapEndian16(ptr[i]);
2968 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2974 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2975 //! Converts the string to a UTF-32 encoded string.
2976 //! \param endian The desired endianness of the string.
2977 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2978 //! \return A string containing the UTF-32 encoded string.
2979 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2981 core::string<char32_t> ret;
2982 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2983 const_iterator iter(*this, 0);
2985 // Add the BOM if specified.
2988 if (endian == unicode::EUTFEE_NATIVE)
2989 ret.append(unicode::BOM);
2998 if (endian == unicode::EUTFEE_LITTLE)
3000 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3001 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3002 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3003 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3007 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3008 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3009 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3010 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3016 while (!iter.atEnd())
3018 uchar32_t c = *iter;
3019 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3020 c = unicode::swapEndian32(c);
3029 //! Converts the string to a UTF-32 encoded string array.
3030 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3031 //! \param endian The desired endianness of the string.
3032 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3033 //! \return An array containing the UTF-32 encoded string.
3034 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3036 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3037 const_iterator iter(*this, 0);
3039 // Add the BOM if specified.
3042 if (endian == unicode::EUTFEE_NATIVE)
3043 ret.push_back(unicode::BOM);
3052 if (endian == unicode::EUTFEE_LITTLE)
3054 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3055 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3056 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3057 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3061 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3062 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3063 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3064 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3066 ret.push_back(t.full);
3071 while (!iter.atEnd())
3073 uchar32_t c = *iter;
3074 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3075 c = unicode::swapEndian32(c);
3083 //! Converts the string to a wchar_t encoded string.
3084 /** The size of a wchar_t changes depending on the platform. This function will store a
3085 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3086 //! \param endian The desired endianness of the string.
3087 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3088 //! \return A string containing the wchar_t encoded string.
3089 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3091 if (sizeof(wchar_t) == 4)
3093 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3094 core::stringw ret(a.pointer());
3097 else if (sizeof(wchar_t) == 2)
3099 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3101 core::stringw ret(array);
3106 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3107 core::stringw ret(a.pointer());
3111 else if (sizeof(wchar_t) == 1)
3113 core::array<uchar8_t> a(toUTF8(addBOM));
3114 core::stringw ret(a.pointer());
3118 // Shouldn't happen.
3119 return core::stringw();
3123 //! Converts the string to a wchar_t encoded string array.
3124 /** The size of a wchar_t changes depending on the platform. This function will store a
3125 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3126 //! \param endian The desired endianness of the string.
3127 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3128 //! \return An array containing the wchar_t encoded string.
3129 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3131 if (sizeof(wchar_t) == 4)
3133 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3134 core::array<wchar_t> ret(a.size());
3135 ret.set_used(a.size());
3136 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3139 if (sizeof(wchar_t) == 2)
3141 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3143 core::array<wchar_t> ret(used);
3145 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3150 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3151 core::array<wchar_t> ret(a.size());
3152 ret.set_used(a.size());
3153 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3157 if (sizeof(wchar_t) == 1)
3159 core::array<uchar8_t> a(toUTF8(addBOM));
3160 core::array<wchar_t> ret(a.size());
3161 ret.set_used(a.size());
3162 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3166 // Shouldn't happen.
3167 return core::array<wchar_t>();
3170 //! Converts the string to a properly encoded io::path string.
3171 //! \param endian The desired endianness of the string.
3172 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3173 //! \return An io::path string containing the properly encoded string.
3174 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3176 #if defined(_IRR_WCHAR_FILESYSTEM)
3177 return toWCHAR_s(endian, addBOM);
3179 return toUTF8_s(addBOM);
3183 //! Loads an unknown stream of data.
3184 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3185 //! \param data The data stream to load from.
3186 //! \param data_size The length of the data string.
3187 //! \return A reference to our current string.
3188 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3190 // Clear our string.
3195 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3199 case unicode::EUTFE_UTF8:
3200 append((uchar8_t*)data, data_size);
3203 case unicode::EUTFE_UTF16:
3204 case unicode::EUTFE_UTF16_BE:
3205 case unicode::EUTFE_UTF16_LE:
3206 append((uchar16_t*)data, data_size / 2);
3209 case unicode::EUTFE_UTF32:
3210 case unicode::EUTFE_UTF32_BE:
3211 case unicode::EUTFE_UTF32_LE:
3212 append((uchar32_t*)data, data_size / 4);
3219 //! Gets the encoding of the Unicode string this class contains.
3220 //! \return An enum describing the current encoding of this string.
3221 const unicode::EUTF_ENCODE getEncoding() const
3226 //! Gets the endianness of the Unicode string this class contains.
3227 //! \return An enum describing the endianness of this string.
3228 const unicode::EUTF_ENDIAN getEndianness() const
3230 if (encoding == unicode::EUTFE_UTF16_LE ||
3231 encoding == unicode::EUTFE_UTF32_LE)
3232 return unicode::EUTFEE_LITTLE;
3233 else return unicode::EUTFEE_BIG;
3238 //! Reallocate the string, making it bigger or smaller.
3239 //! \param new_size The new size of the string.
3240 void reallocate(u32 new_size)
3242 uchar16_t* old_array = array;
3244 array = allocator.allocate(new_size + 1); //new u16[new_size];
3245 allocated = new_size + 1;
3246 if (old_array == 0) return;
3248 u32 amount = used < new_size ? used : new_size;
3249 for (u32 i=0; i<=amount; ++i)
3250 array[i] = old_array[i];
3252 if (allocated <= used)
3253 used = allocated - 1;
3257 allocator.deallocate(old_array); // delete [] old_array;
3260 //--- member variables
3263 unicode::EUTF_ENCODE encoding;
3267 //irrAllocator<uchar16_t> allocator;
3270 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3273 //! Appends two ustring16s.
3274 template <typename TAlloc>
3275 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3277 ustring16<TAlloc> ret(left);
3283 //! Appends a ustring16 and a null-terminated unicode string.
3284 template <typename TAlloc, class B>
3285 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3287 ustring16<TAlloc> ret(left);
3293 //! Appends a ustring16 and a null-terminated unicode string.
3294 template <class B, typename TAlloc>
3295 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3297 ustring16<TAlloc> ret(left);
3303 //! Appends a ustring16 and an Irrlicht string.
3304 template <typename TAlloc, typename B, typename BAlloc>
3305 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3307 ustring16<TAlloc> ret(left);
3313 //! Appends a ustring16 and an Irrlicht string.
3314 template <typename TAlloc, typename B, typename BAlloc>
3315 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3317 ustring16<TAlloc> ret(left);
3323 //! Appends a ustring16 and a std::basic_string.
3324 template <typename TAlloc, typename B, typename A, typename BAlloc>
3325 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3327 ustring16<TAlloc> ret(left);
3333 //! Appends a ustring16 and a std::basic_string.
3334 template <typename TAlloc, typename B, typename A, typename BAlloc>
3335 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3337 ustring16<TAlloc> ret(left);
3343 //! Appends a ustring16 and a char.
3344 template <typename TAlloc>
3345 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3347 ustring16<TAlloc> ret(left);
3353 //! Appends a ustring16 and a char.
3354 template <typename TAlloc>
3355 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3357 ustring16<TAlloc> ret(left);
3363 #ifdef USTRING_CPP0X_NEWLITERALS
3364 //! Appends a ustring16 and a uchar32_t.
3365 template <typename TAlloc>
3366 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3368 ustring16<TAlloc> ret(left);
3374 //! Appends a ustring16 and a uchar32_t.
3375 template <typename TAlloc>
3376 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3378 ustring16<TAlloc> ret(left);
3385 //! Appends a ustring16 and a short.
3386 template <typename TAlloc>
3387 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3389 ustring16<TAlloc> ret(left);
3390 ret += core::stringc(right);
3395 //! Appends a ustring16 and a short.
3396 template <typename TAlloc>
3397 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3399 ustring16<TAlloc> ret((core::stringc(left)));
3405 //! Appends a ustring16 and an unsigned short.
3406 template <typename TAlloc>
3407 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3409 ustring16<TAlloc> ret(left);
3410 ret += core::stringc(right);
3415 //! Appends a ustring16 and an unsigned short.
3416 template <typename TAlloc>
3417 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3419 ustring16<TAlloc> ret((core::stringc(left)));
3425 //! Appends a ustring16 and an int.
3426 template <typename TAlloc>
3427 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3429 ustring16<TAlloc> ret(left);
3430 ret += core::stringc(right);
3435 //! Appends a ustring16 and an int.
3436 template <typename TAlloc>
3437 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3439 ustring16<TAlloc> ret((core::stringc(left)));
3445 //! Appends a ustring16 and an unsigned int.
3446 template <typename TAlloc>
3447 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3449 ustring16<TAlloc> ret(left);
3450 ret += core::stringc(right);
3455 //! Appends a ustring16 and an unsigned int.
3456 template <typename TAlloc>
3457 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3459 ustring16<TAlloc> ret((core::stringc(left)));
3465 //! Appends a ustring16 and a long.
3466 template <typename TAlloc>
3467 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3469 ustring16<TAlloc> ret(left);
3470 ret += core::stringc(right);
3475 //! Appends a ustring16 and a long.
3476 template <typename TAlloc>
3477 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3479 ustring16<TAlloc> ret((core::stringc(left)));
3485 //! Appends a ustring16 and an unsigned long.
3486 template <typename TAlloc>
3487 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3489 ustring16<TAlloc> ret(left);
3490 ret += core::stringc(right);
3495 //! Appends a ustring16 and an unsigned long.
3496 template <typename TAlloc>
3497 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3499 ustring16<TAlloc> ret((core::stringc(left)));
3505 //! Appends a ustring16 and a float.
3506 template <typename TAlloc>
3507 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3509 ustring16<TAlloc> ret(left);
3510 ret += core::stringc(right);
3515 //! Appends a ustring16 and a float.
3516 template <typename TAlloc>
3517 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3519 ustring16<TAlloc> ret((core::stringc(left)));
3525 //! Appends a ustring16 and a double.
3526 template <typename TAlloc>
3527 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3529 ustring16<TAlloc> ret(left);
3530 ret += core::stringc(right);
3535 //! Appends a ustring16 and a double.
3536 template <typename TAlloc>
3537 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3539 ustring16<TAlloc> ret((core::stringc(left)));
3545 #ifdef USTRING_CPP0X
3546 //! Appends two ustring16s.
3547 template <typename TAlloc>
3548 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3550 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3551 right.insert(left, 0);
3552 return std::move(right);
3556 //! Appends two ustring16s.
3557 template <typename TAlloc>
3558 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3560 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3562 return std::move(left);
3566 //! Appends two ustring16s.
3567 template <typename TAlloc>
3568 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3570 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3571 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3572 (right.capacity() - right.size_raw() < left.size_raw()))
3575 return std::move(left);
3579 right.insert(left, 0);
3580 return std::move(right);
3585 //! Appends a ustring16 and a null-terminated unicode string.
3586 template <typename TAlloc, class B>
3587 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3589 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3591 return std::move(left);
3595 //! Appends a ustring16 and a null-terminated unicode string.
3596 template <class B, typename TAlloc>
3597 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3599 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3600 right.insert(left, 0);
3601 return std::move(right);
3605 //! Appends a ustring16 and an Irrlicht string.
3606 template <typename TAlloc, typename B, typename BAlloc>
3607 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3609 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3610 right.insert(left, 0);
3611 return std::move(right);
3615 //! Appends a ustring16 and an Irrlicht string.
3616 template <typename TAlloc, typename B, typename BAlloc>
3617 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3619 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3621 return std::move(left);
3625 //! Appends a ustring16 and a std::basic_string.
3626 template <typename TAlloc, typename B, typename A, typename BAlloc>
3627 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3629 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3630 right.insert(core::ustring16<TAlloc>(left), 0);
3631 return std::move(right);
3635 //! Appends a ustring16 and a std::basic_string.
3636 template <typename TAlloc, typename B, typename A, typename BAlloc>
3637 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3639 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3641 return std::move(left);
3645 //! Appends a ustring16 and a char.
3646 template <typename TAlloc>
3647 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3649 left.append((uchar32_t)right);
3650 return std::move(left);
3654 //! Appends a ustring16 and a char.
3655 template <typename TAlloc>
3656 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3658 right.insert((uchar32_t)left, 0);
3659 return std::move(right);
3663 #ifdef USTRING_CPP0X_NEWLITERALS
3664 //! Appends a ustring16 and a uchar32_t.
3665 template <typename TAlloc>
3666 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3669 return std::move(left);
3673 //! Appends a ustring16 and a uchar32_t.
3674 template <typename TAlloc>
3675 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3677 right.insert(left, 0);
3678 return std::move(right);
3683 //! Appends a ustring16 and a short.
3684 template <typename TAlloc>
3685 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3687 left.append(core::stringc(right));
3688 return std::move(left);
3692 //! Appends a ustring16 and a short.
3693 template <typename TAlloc>
3694 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3696 right.insert(core::stringc(left), 0);
3697 return std::move(right);
3701 //! Appends a ustring16 and an unsigned short.
3702 template <typename TAlloc>
3703 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3705 left.append(core::stringc(right));
3706 return std::move(left);
3710 //! Appends a ustring16 and an unsigned short.
3711 template <typename TAlloc>
3712 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3714 right.insert(core::stringc(left), 0);
3715 return std::move(right);
3719 //! Appends a ustring16 and an int.
3720 template <typename TAlloc>
3721 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3723 left.append(core::stringc(right));
3724 return std::move(left);
3728 //! Appends a ustring16 and an int.
3729 template <typename TAlloc>
3730 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3732 right.insert(core::stringc(left), 0);
3733 return std::move(right);
3737 //! Appends a ustring16 and an unsigned int.
3738 template <typename TAlloc>
3739 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3741 left.append(core::stringc(right));
3742 return std::move(left);
3746 //! Appends a ustring16 and an unsigned int.
3747 template <typename TAlloc>
3748 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3750 right.insert(core::stringc(left), 0);
3751 return std::move(right);
3755 //! Appends a ustring16 and a long.
3756 template <typename TAlloc>
3757 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3759 left.append(core::stringc(right));
3760 return std::move(left);
3764 //! Appends a ustring16 and a long.
3765 template <typename TAlloc>
3766 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3768 right.insert(core::stringc(left), 0);
3769 return std::move(right);
3773 //! Appends a ustring16 and an unsigned long.
3774 template <typename TAlloc>
3775 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3777 left.append(core::stringc(right));
3778 return std::move(left);
3782 //! Appends a ustring16 and an unsigned long.
3783 template <typename TAlloc>
3784 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3786 right.insert(core::stringc(left), 0);
3787 return std::move(right);
3791 //! Appends a ustring16 and a float.
3792 template <typename TAlloc>
3793 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3795 left.append(core::stringc(right));
3796 return std::move(left);
3800 //! Appends a ustring16 and a float.
3801 template <typename TAlloc>
3802 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3804 right.insert(core::stringc(left), 0);
3805 return std::move(right);
3809 //! Appends a ustring16 and a double.
3810 template <typename TAlloc>
3811 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3813 left.append(core::stringc(right));
3814 return std::move(left);
3818 //! Appends a ustring16 and a double.
3819 template <typename TAlloc>
3820 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3822 right.insert(core::stringc(left), 0);
3823 return std::move(right);
3828 #ifndef USTRING_NO_STL
3829 //! Writes a ustring16 to an ostream.
3830 template <typename TAlloc>
3831 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3833 out << in.toUTF8_s().c_str();
3837 //! Writes a ustring16 to a wostream.
3838 template <typename TAlloc>
3839 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3841 out << in.toWCHAR_s().c_str();
3847 #ifndef USTRING_NO_STL
3852 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3853 //! Algorithm taken from std::hash<std::string>.
3854 class hash : public std::unary_function<core::ustring, size_t>
3857 size_t operator()(const core::ustring& s) const
3859 size_t ret = 2166136261U;
3861 size_t stride = 1 + s.size_raw() / 10;
3863 core::ustring::const_iterator i = s.begin();
3864 while (i != s.end())
3866 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3867 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3875 } // end namespace unicode
3879 } // end namespace core
3880 } // end namespace irr