2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
33 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
34 # define USTRING_CPP0X
35 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
36 # define USTRING_CPP0X_NEWLITERALS
46 #define __BYTE_ORDER 0
47 #define __LITTLE_ENDIAN 0
48 #define __BIG_ENDIAN 1
49 #elif defined(__MACH__) && defined(__APPLE__)
50 #include <machine/endian.h>
51 #elif defined(__FreeBSD__)
52 #include <sys/endian.h>
61 #ifndef USTRING_NO_STL
68 #include "irrAllocator.h"
71 #include "irrString.h"
74 //! UTF-16 surrogate start values.
75 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
76 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
78 //! Is a UTF-16 code point a surrogate?
79 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
80 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
81 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
87 // Define our character types.
88 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
89 typedef char32_t uchar32_t;
90 typedef char16_t uchar16_t;
91 typedef char uchar8_t;
93 typedef u32 uchar32_t;
94 typedef u16 uchar16_t;
104 //! The unicode replacement character. Used to replace invalid characters.
105 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
107 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
108 //! \param high The high value of the pair.
109 //! \param low The low value of the pair.
110 //! \return The UTF-32 character expressed by the surrogate pair.
111 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
113 // Convert the surrogate pair into a single UTF-32 character.
114 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
115 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
116 return (wu << 16) | x;
119 //! Swaps the endianness of a 16-bit value.
120 //! \return The new value.
121 inline uchar16_t swapEndian16(const uchar16_t& c)
123 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
126 //! Swaps the endianness of a 32-bit value.
127 //! \return The new value.
128 inline uchar32_t swapEndian32(const uchar32_t& c)
130 return ((c >> 24) & 0x000000FF) |
131 ((c >> 8) & 0x0000FF00) |
132 ((c << 8) & 0x00FF0000) |
133 ((c << 24) & 0xFF000000);
136 //! The Unicode byte order mark.
137 const u16 BOM = 0xFEFF;
139 //! The size of the Unicode byte order mark in terms of the Unicode character size.
140 const u8 BOM_UTF8_LEN = 3;
141 const u8 BOM_UTF16_LEN = 1;
142 const u8 BOM_UTF32_LEN = 1;
144 //! Unicode byte order marks for file operations.
145 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
146 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
147 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
148 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
149 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
151 //! The size in bytes of the Unicode byte marks for file operations.
152 const u8 BOM_ENCODE_UTF8_LEN = 3;
153 const u8 BOM_ENCODE_UTF16_LEN = 2;
154 const u8 BOM_ENCODE_UTF32_LEN = 4;
156 //! Unicode encoding type.
169 //! Unicode endianness.
177 //! Returns the specified unicode byte order mark in a byte array.
178 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
179 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
180 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
181 //! \return An array that contains a byte order mark.
182 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
184 #define COPY_ARRAY(source, size) \
185 memcpy(ret.pointer(), source, size); \
188 core::array<u8> ret(4);
192 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
195 #ifdef __BIG_ENDIAN__
196 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
198 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
202 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
205 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
208 #ifdef __BIG_ENDIAN__
209 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
211 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
215 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
218 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
221 // TODO sapier: fixed warning only,
222 // don't know if something needs to be done here
230 //! Detects if the given data stream starts with a unicode BOM.
231 //! \param data The data stream to check.
232 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
233 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
235 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
236 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
237 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
238 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
239 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
243 } // end namespace unicode
246 //! UTF-16 string class.
247 template <typename TAlloc = irrAllocator<uchar16_t> >
252 ///------------------///
253 /// iterator classes ///
254 ///------------------///
256 //! Access an element in a unicode string, allowing one to change it.
257 class _ustring16_iterator_access
260 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
262 //! Allow the class to be interpreted as a single UTF-32 character.
263 operator uchar32_t() const
268 //! Allow one to change the character in the unicode string.
269 //! \param c The new character to use.
271 _ustring16_iterator_access& operator=(const uchar32_t c)
277 //! Increments the value by 1.
279 _ustring16_iterator_access& operator++()
285 //! Increments the value by 1, returning the old value.
286 //! \return A unicode character.
287 uchar32_t operator++(int)
289 uchar32_t old = _get();
294 //! Decrements the value by 1.
296 _ustring16_iterator_access& operator--()
302 //! Decrements the value by 1, returning the old value.
303 //! \return A unicode character.
304 uchar32_t operator--(int)
306 uchar32_t old = _get();
311 //! Adds to the value by a specified amount.
312 //! \param val The amount to add to this character.
314 _ustring16_iterator_access& operator+=(int val)
320 //! Subtracts from the value by a specified amount.
321 //! \param val The amount to subtract from this character.
323 _ustring16_iterator_access& operator-=(int val)
329 //! Multiples the value by a specified amount.
330 //! \param val The amount to multiply this character by.
332 _ustring16_iterator_access& operator*=(int val)
338 //! Divides the value by a specified amount.
339 //! \param val The amount to divide this character by.
341 _ustring16_iterator_access& operator/=(int val)
347 //! Modulos the value by a specified amount.
348 //! \param val The amount to modulo this character by.
350 _ustring16_iterator_access& operator%=(int val)
356 //! Adds to the value by a specified amount.
357 //! \param val The amount to add to this character.
358 //! \return A unicode character.
359 uchar32_t operator+(int val) const
364 //! Subtracts from the value by a specified amount.
365 //! \param val The amount to subtract from this character.
366 //! \return A unicode character.
367 uchar32_t operator-(int val) const
372 //! Multiplies the value by a specified amount.
373 //! \param val The amount to multiply this character by.
374 //! \return A unicode character.
375 uchar32_t operator*(int val) const
380 //! Divides the value by a specified amount.
381 //! \param val The amount to divide this character by.
382 //! \return A unicode character.
383 uchar32_t operator/(int val) const
388 //! Modulos the value by a specified amount.
389 //! \param val The amount to modulo this character by.
390 //! \return A unicode character.
391 uchar32_t operator%(int val) const
397 //! Gets a uchar32_t from our current position.
398 uchar32_t _get() const
400 const uchar16_t* a = ref->c_str();
401 if (!UTF16_IS_SURROGATE(a[pos]))
402 return static_cast<uchar32_t>(a[pos]);
405 if (pos + 1 >= ref->size_raw())
408 return unicode::toUTF32(a[pos], a[pos + 1]);
412 //! Sets a uchar32_t at our current position.
413 void _set(uchar32_t c)
415 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
416 const uchar16_t* a = ref2->c_str();
419 // c will be multibyte, so split it up into the high and low surrogate pairs.
420 uchar16_t x = static_cast<uchar16_t>(c);
421 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
422 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
424 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
425 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
426 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
427 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
429 ref2->replace_raw(vh, static_cast<u32>(pos));
433 // c will be a single byte.
434 uchar16_t vh = static_cast<uchar16_t>(c);
436 // If the previous position was a surrogate pair, remove the extra byte.
437 if (UTF16_IS_SURROGATE_HI(a[pos]))
438 ref2->erase_raw(static_cast<u32>(pos) + 1);
440 ref2->replace_raw(vh, static_cast<u32>(pos));
444 const ustring16<TAlloc>* ref;
447 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
450 //! Iterator to iterate through a UTF-16 string.
451 #ifndef USTRING_NO_STL
452 class _ustring16_const_iterator : public std::iterator<
453 std::bidirectional_iterator_tag, // iterator_category
454 access, // value_type
455 ptrdiff_t, // difference_type
456 const access, // pointer
457 const access // reference
460 class _ustring16_const_iterator
464 typedef _ustring16_const_iterator _Iter;
465 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
466 typedef const access const_pointer;
467 typedef const access const_reference;
469 #ifndef USTRING_NO_STL
470 typedef typename _Base::value_type value_type;
471 typedef typename _Base::difference_type difference_type;
472 typedef typename _Base::difference_type distance_type;
473 typedef typename _Base::pointer pointer;
474 typedef const_reference reference;
476 typedef access value_type;
477 typedef u32 difference_type;
478 typedef u32 distance_type;
479 typedef const_pointer pointer;
480 typedef const_reference reference;
484 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
485 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
486 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
488 if (ref->size_raw() == 0 || p == 0)
491 // Go to the appropriate position.
493 u32 sr = ref->size_raw();
494 const uchar16_t* a = ref->c_str();
495 while (i != 0 && pos < sr)
497 if (UTF16_IS_SURROGATE_HI(a[pos]))
504 //! Test for equalness.
505 bool operator==(const _Iter& iter) const
507 if (ref == iter.ref && pos == iter.pos)
512 //! Test for unequalness.
513 bool operator!=(const _Iter& iter) const
515 if (ref != iter.ref || pos != iter.pos)
520 //! Switch to the next full character in the string.
523 if (pos == ref->size_raw()) return *this;
524 const uchar16_t* a = ref->c_str();
525 if (UTF16_IS_SURROGATE_HI(a[pos]))
526 pos += 2; // TODO: check for valid low surrogate?
528 if (pos > ref->size_raw()) pos = ref->size_raw();
532 //! Switch to the next full character in the string, returning the previous position.
533 _Iter operator++(int)
540 //! Switch to the previous full character in the string.
543 if (pos == 0) return *this;
544 const uchar16_t* a = ref->c_str();
546 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
551 //! Switch to the previous full character in the string, returning the previous position.
552 _Iter operator--(int)
559 //! Advance a specified number of full characters in the string.
561 _Iter& operator+=(const difference_type v)
563 if (v == 0) return *this;
564 if (v < 0) return operator-=(v * -1);
566 if (pos >= ref->size_raw())
569 // Go to the appropriate position.
570 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
572 u32 sr = ref->size_raw();
573 const uchar16_t* a = ref->c_str();
574 while (i != 0 && pos < sr)
576 if (UTF16_IS_SURROGATE_HI(a[pos]))
587 //! Go back a specified number of full characters in the string.
589 _Iter& operator-=(const difference_type v)
591 if (v == 0) return *this;
592 if (v > 0) return operator+=(v * -1);
597 // Go to the appropriate position.
598 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
600 const uchar16_t* a = ref->c_str();
601 while (i != 0 && pos != 0)
604 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
612 //! Return a new iterator that is a variable number of full characters forward from the current position.
613 _Iter operator+(const difference_type v) const
620 //! Return a new iterator that is a variable number of full characters backward from the current position.
621 _Iter operator-(const difference_type v) const
628 //! Returns the distance between two iterators.
629 difference_type operator-(const _Iter& iter) const
631 // Make sure we reference the same object!
633 return difference_type();
658 //! Accesses the full character at the iterator's position.
659 const_reference operator*() const
661 if (pos >= ref->size_raw())
663 const uchar16_t* a = ref->c_str();
664 u32 p = ref->size_raw();
665 if (UTF16_IS_SURROGATE_LO(a[p]))
667 reference ret(ref, p);
670 const_reference ret(ref, pos);
674 //! Accesses the full character at the iterator's position.
675 reference operator*()
677 if (pos >= ref->size_raw())
679 const uchar16_t* a = ref->c_str();
680 u32 p = ref->size_raw();
681 if (UTF16_IS_SURROGATE_LO(a[p]))
683 reference ret(ref, p);
686 reference ret(ref, pos);
690 //! Accesses the full character at the iterator's position.
691 const_pointer operator->() const
696 //! Accesses the full character at the iterator's position.
702 //! Is the iterator at the start of the string?
708 //! Is the iterator at the end of the string?
711 const uchar16_t* a = ref->c_str();
712 if (UTF16_IS_SURROGATE(a[pos]))
713 return (pos + 1) >= ref->size_raw();
714 else return pos >= ref->size_raw();
717 //! Moves the iterator to the start of the string.
723 //! Moves the iterator to the end of the string.
726 pos = ref->size_raw();
729 //! Returns the iterator's position.
730 //! \return The iterator's position.
737 const ustring16<TAlloc>* ref;
741 //! Iterator to iterate through a UTF-16 string.
742 class _ustring16_iterator : public _ustring16_const_iterator
745 typedef _ustring16_iterator _Iter;
746 typedef _ustring16_const_iterator _Base;
747 typedef typename _Base::const_pointer const_pointer;
748 typedef typename _Base::const_reference const_reference;
751 typedef typename _Base::value_type value_type;
752 typedef typename _Base::difference_type difference_type;
753 typedef typename _Base::distance_type distance_type;
754 typedef access pointer;
755 typedef access reference;
761 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
762 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
763 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
765 //! Accesses the full character at the iterator's position.
766 reference operator*() const
768 if (pos >= ref->size_raw())
770 const uchar16_t* a = ref->c_str();
771 u32 p = ref->size_raw();
772 if (UTF16_IS_SURROGATE_LO(a[p]))
774 reference ret(ref, p);
777 reference ret(ref, pos);
781 //! Accesses the full character at the iterator's position.
782 reference operator*()
784 if (pos >= ref->size_raw())
786 const uchar16_t* a = ref->c_str();
787 u32 p = ref->size_raw();
788 if (UTF16_IS_SURROGATE_LO(a[p]))
790 reference ret(ref, p);
793 reference ret(ref, pos);
797 //! Accesses the full character at the iterator's position.
798 pointer operator->() const
803 //! Accesses the full character at the iterator's position.
810 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
811 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
813 ///----------------------///
814 /// end iterator classes ///
815 ///----------------------///
817 //! Default constructor
819 : array(0), allocated(1), used(0)
821 #if __BYTE_ORDER == __BIG_ENDIAN
822 encoding = unicode::EUTFE_UTF16_BE;
824 encoding = unicode::EUTFE_UTF16_LE;
826 array = allocator.allocate(1); // new u16[1];
832 ustring16(const ustring16<TAlloc>& other)
833 : array(0), allocated(0), used(0)
835 #if __BYTE_ORDER == __BIG_ENDIAN
836 encoding = unicode::EUTFE_UTF16_BE;
838 encoding = unicode::EUTFE_UTF16_LE;
844 //! Constructor from other string types
845 template <class B, class A>
846 ustring16(const string<B, A>& other)
847 : array(0), allocated(0), used(0)
849 #if __BYTE_ORDER == __BIG_ENDIAN
850 encoding = unicode::EUTFE_UTF16_BE;
852 encoding = unicode::EUTFE_UTF16_LE;
858 #ifndef USTRING_NO_STL
859 //! Constructor from std::string
860 template <class B, class A, typename Alloc>
861 ustring16(const std::basic_string<B, A, Alloc>& other)
862 : array(0), allocated(0), used(0)
864 #if __BYTE_ORDER == __BIG_ENDIAN
865 encoding = unicode::EUTFE_UTF16_BE;
867 encoding = unicode::EUTFE_UTF16_LE;
869 *this = other.c_str();
873 //! Constructor from iterator.
874 template <typename Itr>
875 ustring16(Itr first, Itr last)
876 : array(0), allocated(0), used(0)
878 #if __BYTE_ORDER == __BIG_ENDIAN
879 encoding = unicode::EUTFE_UTF16_BE;
881 encoding = unicode::EUTFE_UTF16_LE;
883 reserve(std::distance(first, last));
886 for (; first != last; ++first)
887 append((uchar32_t)*first);
892 #ifndef USTRING_CPP0X_NEWLITERALS
893 //! Constructor for copying a character string from a pointer.
894 ustring16(const char* const c)
895 : array(0), allocated(0), used(0)
897 #if __BYTE_ORDER == __BIG_ENDIAN
898 encoding = unicode::EUTFE_UTF16_BE;
900 encoding = unicode::EUTFE_UTF16_LE;
903 loadDataStream(c, strlen(c));
904 //append((uchar8_t*)c);
908 //! Constructor for copying a character string from a pointer with a given length.
909 ustring16(const char* const c, u32 length)
910 : array(0), allocated(0), used(0)
912 #if __BYTE_ORDER == __BIG_ENDIAN
913 encoding = unicode::EUTFE_UTF16_BE;
915 encoding = unicode::EUTFE_UTF16_LE;
918 loadDataStream(c, length);
923 //! Constructor for copying a UTF-8 string from a pointer.
924 ustring16(const uchar8_t* const c)
925 : array(0), allocated(0), used(0)
927 #if __BYTE_ORDER == __BIG_ENDIAN
928 encoding = unicode::EUTFE_UTF16_BE;
930 encoding = unicode::EUTFE_UTF16_LE;
937 //! Constructor for copying a UTF-8 string from a single char.
938 ustring16(const char c)
939 : array(0), allocated(0), used(0)
941 #if __BYTE_ORDER == __BIG_ENDIAN
942 encoding = unicode::EUTFE_UTF16_BE;
944 encoding = unicode::EUTFE_UTF16_LE;
947 append((uchar32_t)c);
951 //! Constructor for copying a UTF-8 string from a pointer with a given length.
952 ustring16(const uchar8_t* const c, u32 length)
953 : array(0), allocated(0), used(0)
955 #if __BYTE_ORDER == __BIG_ENDIAN
956 encoding = unicode::EUTFE_UTF16_BE;
958 encoding = unicode::EUTFE_UTF16_LE;
965 //! Constructor for copying a UTF-16 string from a pointer.
966 ustring16(const uchar16_t* const c)
967 : array(0), allocated(0), used(0)
969 #if __BYTE_ORDER == __BIG_ENDIAN
970 encoding = unicode::EUTFE_UTF16_BE;
972 encoding = unicode::EUTFE_UTF16_LE;
979 //! Constructor for copying a UTF-16 string from a pointer with a given length
980 ustring16(const uchar16_t* const c, u32 length)
981 : array(0), allocated(0), used(0)
983 #if __BYTE_ORDER == __BIG_ENDIAN
984 encoding = unicode::EUTFE_UTF16_BE;
986 encoding = unicode::EUTFE_UTF16_LE;
993 //! Constructor for copying a UTF-32 string from a pointer.
994 ustring16(const uchar32_t* const c)
995 : array(0), allocated(0), used(0)
997 #if __BYTE_ORDER == __BIG_ENDIAN
998 encoding = unicode::EUTFE_UTF16_BE;
1000 encoding = unicode::EUTFE_UTF16_LE;
1007 //! Constructor for copying a UTF-32 from a pointer with a given length.
1008 ustring16(const uchar32_t* const c, u32 length)
1009 : array(0), allocated(0), used(0)
1011 #if __BYTE_ORDER == __BIG_ENDIAN
1012 encoding = unicode::EUTFE_UTF16_BE;
1014 encoding = unicode::EUTFE_UTF16_LE;
1021 //! Constructor for copying a wchar_t string from a pointer.
1022 ustring16(const wchar_t* const c)
1023 : array(0), allocated(0), used(0)
1025 #if __BYTE_ORDER == __BIG_ENDIAN
1026 encoding = unicode::EUTFE_UTF16_BE;
1028 encoding = unicode::EUTFE_UTF16_LE;
1031 if (sizeof(wchar_t) == 4)
1032 append(reinterpret_cast<const uchar32_t* const>(c));
1033 else if (sizeof(wchar_t) == 2)
1034 append(reinterpret_cast<const uchar16_t* const>(c));
1035 else if (sizeof(wchar_t) == 1)
1036 append(reinterpret_cast<const uchar8_t* const>(c));
1040 //! Constructor for copying a wchar_t string from a pointer with a given length.
1041 ustring16(const wchar_t* const c, u32 length)
1042 : array(0), allocated(0), used(0)
1044 #if __BYTE_ORDER == __BIG_ENDIAN
1045 encoding = unicode::EUTFE_UTF16_BE;
1047 encoding = unicode::EUTFE_UTF16_LE;
1050 if (sizeof(wchar_t) == 4)
1051 append(reinterpret_cast<const uchar32_t* const>(c), length);
1052 else if (sizeof(wchar_t) == 2)
1053 append(reinterpret_cast<const uchar16_t* const>(c), length);
1054 else if (sizeof(wchar_t) == 1)
1055 append(reinterpret_cast<const uchar8_t* const>(c), length);
1059 #ifdef USTRING_CPP0X
1060 //! Constructor for moving a ustring16
1061 ustring16(ustring16<TAlloc>&& other)
1062 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1064 //std::cout << "MOVE constructor" << std::endl;
1066 other.allocated = 0;
1075 allocator.deallocate(array); // delete [] array;
1079 //! Assignment operator
1080 ustring16& operator=(const ustring16<TAlloc>& other)
1085 used = other.size_raw();
1086 if (used >= allocated)
1088 allocator.deallocate(array); // delete [] array;
1089 allocated = used + 1;
1090 array = allocator.allocate(used + 1); //new u16[used];
1093 const uchar16_t* p = other.c_str();
1094 for (u32 i=0; i<=used; ++i, ++p)
1099 // Validate our new UTF-16 string.
1106 #ifdef USTRING_CPP0X
1107 //! Move assignment operator
1108 ustring16& operator=(ustring16<TAlloc>&& other)
1112 //std::cout << "MOVE operator=" << std::endl;
1113 allocator.deallocate(array);
1115 array = other.array;
1116 allocated = other.allocated;
1117 encoding = other.encoding;
1127 //! Assignment operator for other string types
1128 template <class B, class A>
1129 ustring16<TAlloc>& operator=(const string<B, A>& other)
1131 *this = other.c_str();
1136 //! Assignment operator for UTF-8 strings
1137 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1141 array = allocator.allocate(1); //new u16[1];
1146 if (!c) return *this;
1148 //! Append our string now.
1154 //! Assignment operator for UTF-16 strings
1155 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1159 array = allocator.allocate(1); //new u16[1];
1164 if (!c) return *this;
1166 //! Append our string now.
1172 //! Assignment operator for UTF-32 strings
1173 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1177 array = allocator.allocate(1); //new u16[1];
1182 if (!c) return *this;
1184 //! Append our string now.
1190 //! Assignment operator for wchar_t strings.
1191 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1192 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1193 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1194 ustring16<TAlloc>& operator=(const wchar_t* const c)
1196 if (sizeof(wchar_t) == 4)
1197 *this = reinterpret_cast<const uchar32_t* const>(c);
1198 else if (sizeof(wchar_t) == 2)
1199 *this = reinterpret_cast<const uchar16_t* const>(c);
1200 else if (sizeof(wchar_t) == 1)
1201 *this = reinterpret_cast<const uchar8_t* const>(c);
1207 //! Assignment operator for other strings.
1208 /** Note that this assumes that a correct unicode string is stored in the string. **/
1210 ustring16<TAlloc>& operator=(const B* const c)
1213 *this = reinterpret_cast<const uchar32_t* const>(c);
1214 else if (sizeof(B) == 2)
1215 *this = reinterpret_cast<const uchar16_t* const>(c);
1216 else if (sizeof(B) == 1)
1217 *this = reinterpret_cast<const uchar8_t* const>(c);
1223 //! Direct access operator
1224 access operator [](const u32 index)
1226 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1227 iterator iter(*this, index);
1228 return iter.operator*();
1232 //! Direct access operator
1233 const access operator [](const u32 index) const
1235 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1236 const_iterator iter(*this, index);
1237 return iter.operator*();
1241 //! Equality operator
1242 bool operator ==(const uchar16_t* const str) const
1248 for(i=0; array[i] && str[i]; ++i)
1249 if (array[i] != str[i])
1252 return !array[i] && !str[i];
1256 //! Equality operator
1257 bool operator ==(const ustring16<TAlloc>& other) const
1259 for(u32 i=0; array[i] && other.array[i]; ++i)
1260 if (array[i] != other.array[i])
1263 return used == other.used;
1267 //! Is smaller comparator
1268 bool operator <(const ustring16<TAlloc>& other) const
1270 for(u32 i=0; array[i] && other.array[i]; ++i)
1272 s32 diff = array[i] - other.array[i];
1277 return used < other.used;
1281 //! Inequality operator
1282 bool operator !=(const uchar16_t* const str) const
1284 return !(*this == str);
1288 //! Inequality operator
1289 bool operator !=(const ustring16<TAlloc>& other) const
1291 return !(*this == other);
1295 //! Returns the length of a ustring16 in full characters.
1296 //! \return Length of a ustring16 in full characters.
1299 const_iterator i(*this, 0);
1310 //! Informs if the ustring is empty or not.
1311 //! \return True if the ustring is empty, false if not.
1314 return (size_raw() == 0);
1318 //! Returns a pointer to the raw UTF-16 string data.
1319 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1320 const uchar16_t* c_str() const
1326 //! Compares the first n characters of this string with another.
1327 //! \param other Other string to compare to.
1328 //! \param n Number of characters to compare.
1329 //! \return True if the n first characters of both strings are equal.
1330 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1333 const uchar16_t* oa = other.c_str();
1334 for(i=0; array[i] && oa[i] && i < n; ++i)
1335 if (array[i] != oa[i])
1338 // if one (or both) of the strings was smaller then they
1339 // are only equal if they have the same length
1340 return (i == n) || (used == other.used);
1344 //! Compares the first n characters of this string with another.
1345 //! \param str Other string to compare to.
1346 //! \param n Number of characters to compare.
1347 //! \return True if the n first characters of both strings are equal.
1348 bool equalsn(const uchar16_t* const str, u32 n) const
1353 for(i=0; array[i] && str[i] && i < n; ++i)
1354 if (array[i] != str[i])
1357 // if one (or both) of the strings was smaller then they
1358 // are only equal if they have the same length
1359 return (i == n) || (array[i] == 0 && str[i] == 0);
1363 //! Appends a character to this ustring16
1364 //! \param character The character to append.
1365 //! \return A reference to our current string.
1366 ustring16<TAlloc>& append(uchar32_t character)
1368 if (used + 2 >= allocated)
1369 reallocate(used + 2);
1371 if (character > 0xFFFF)
1375 // character will be multibyte, so split it up into a surrogate pair.
1376 uchar16_t x = static_cast<uchar16_t>(character);
1377 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1378 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1385 array[used-1] = character;
1393 //! Appends a UTF-8 string to this ustring16
1394 //! \param other The UTF-8 string to append.
1395 //! \param length The length of the string to append.
1396 //! \return A reference to our current string.
1397 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1402 // Determine if the string is long enough for a BOM.
1404 const uchar8_t* p = other;
1408 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1411 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1412 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1414 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1415 c_bom = unicode::EUTFE_UTF8;
1418 // If a BOM was found, don't include it in the string.
1419 const uchar8_t* c2 = other;
1420 if (c_bom != unicode::EUTFE_NONE)
1422 c2 = other + unicode::BOM_UTF8_LEN;
1423 length -= unicode::BOM_UTF8_LEN;
1426 // Calculate the size of the string to read in.
1432 } while(*p++ && len < length);
1436 // If we need to grow the array, do it now.
1437 if (used + len >= allocated)
1438 reallocate(used + (len * 2));
1441 // Convert UTF-8 to UTF-16.
1443 for (u32 l = 0; l<len;)
1446 if (((c2[l] >> 6) & 0x03) == 0x02)
1447 { // Invalid continuation byte.
1448 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1451 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1452 { // Invalid byte - overlong encoding.
1453 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1456 else if ((c2[l] & 0xF8) == 0xF0)
1457 { // 4 bytes UTF-8, 2 bytes UTF-16.
1458 // Check for a full string.
1461 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1469 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1470 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1474 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1480 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1481 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1482 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1483 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1485 // Split v up into a surrogate pair.
1486 uchar16_t x = static_cast<uchar16_t>(v);
1487 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1488 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1493 ++used; // Using two shorts this time, so increase used by 1.
1495 else if ((c2[l] & 0xF0) == 0xE0)
1496 { // 3 bytes UTF-8, 1 byte UTF-16.
1497 // Check for a full string.
1500 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1508 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1509 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1512 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1518 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1519 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1520 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1524 else if ((c2[l] & 0xE0) == 0xC0)
1525 { // 2 bytes UTF-8, 1 byte UTF-16.
1526 // Check for a full string.
1529 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1535 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1537 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1543 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1544 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1545 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1550 { // 1 byte UTF-8, 1 byte UTF-16.
1553 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1554 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1556 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1562 // Validate our new UTF-16 string.
1569 //! Appends a UTF-16 string to this ustring16
1570 //! \param other The UTF-16 string to append.
1571 //! \param length The length of the string to append.
1572 //! \return A reference to our current string.
1573 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1578 // Determine if the string is long enough for a BOM.
1580 const uchar16_t* p = other;
1584 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1586 // Check for the BOM to determine the string's endianness.
1587 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1588 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1589 c_end = unicode::EUTFEE_LITTLE;
1590 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1591 c_end = unicode::EUTFEE_BIG;
1593 // If a BOM was found, don't include it in the string.
1594 const uchar16_t* c2 = other;
1595 if (c_end != unicode::EUTFEE_NATIVE)
1597 c2 = other + unicode::BOM_UTF16_LEN;
1598 length -= unicode::BOM_UTF16_LEN;
1601 // Calculate the size of the string to read in.
1607 } while(*p++ && len < length);
1611 // If we need to grow the size of the array, do it now.
1612 if (used + len >= allocated)
1613 reallocate(used + (len * 2));
1617 // Copy the string now.
1618 unicode::EUTF_ENDIAN m_end = getEndianness();
1619 for (u32 l = start; l < start + len; ++l)
1621 array[l] = (uchar16_t)c2[l];
1622 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1623 array[l] = unicode::swapEndian16(array[l]);
1628 // Validate our new UTF-16 string.
1634 //! Appends a UTF-32 string to this ustring16
1635 //! \param other The UTF-32 string to append.
1636 //! \param length The length of the string to append.
1637 //! \return A reference to our current string.
1638 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1643 // Check for the BOM to determine the string's endianness.
1644 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1645 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1646 c_end = unicode::EUTFEE_LITTLE;
1647 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1648 c_end = unicode::EUTFEE_BIG;
1650 // If a BOM was found, don't include it in the string.
1651 const uchar32_t* c2 = other;
1652 if (c_end != unicode::EUTFEE_NATIVE)
1654 c2 = other + unicode::BOM_UTF32_LEN;
1655 length -= unicode::BOM_UTF32_LEN;
1658 // Calculate the size of the string to read in.
1660 const uchar32_t* p = c2;
1664 } while(*p++ && len < length);
1668 // If we need to grow the size of the array, do it now.
1669 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1670 if (used + (len * 2) >= allocated)
1671 reallocate(used + ((len * 2) * 2));
1674 // Convert UTF-32 to UTF-16.
1675 unicode::EUTF_ENDIAN m_end = getEndianness();
1677 for (u32 l = 0; l<len; ++l)
1681 uchar32_t ch = c2[l];
1682 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1683 ch = unicode::swapEndian32(ch);
1687 // Split ch up into a surrogate pair as it is over 16 bits long.
1688 uchar16_t x = static_cast<uchar16_t>(ch);
1689 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1690 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1693 ++used; // Using two shorts, so increased used again.
1695 else if (ch >= 0xD800 && ch <= 0xDFFF)
1697 // Between possible UTF-16 surrogates (invalid!)
1698 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1700 else array[pos++] = static_cast<uchar16_t>(ch);
1704 // Validate our new UTF-16 string.
1711 //! Appends a ustring16 to this ustring16
1712 //! \param other The string to append to this one.
1713 //! \return A reference to our current string.
1714 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1716 const uchar16_t* oa = other.c_str();
1718 u32 len = other.size_raw();
1720 if (used + len >= allocated)
1721 reallocate(used + len);
1723 for (u32 l=0; l<len; ++l)
1724 array[used+l] = oa[l];
1733 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1734 //! \param other The string to append to this one.
1735 //! \param length How many characters of the other string to add to this one.
1736 //! \return A reference to our current string.
1737 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1739 if (other.size() == 0)
1742 if (other.size() < length)
1748 if (used + length * 2 >= allocated)
1749 reallocate(used + length * 2);
1751 const_iterator iter(other, 0);
1753 while (!iter.atEnd() && l)
1755 uchar32_t c = *iter;
1765 //! Reserves some memory.
1766 //! \param count The amount of characters to reserve.
1767 void reserve(u32 count)
1769 if (count < allocated)
1776 //! Finds first occurrence of character.
1777 //! \param c The character to search for.
1778 //! \return Position where the character has been found, or -1 if not found.
1779 s32 findFirst(uchar32_t c) const
1781 const_iterator i(*this, 0);
1796 //! Finds first occurrence of a character of a list.
1797 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1798 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1799 //! \return Position where one of the characters has been found, or -1 if not found.
1800 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1805 const_iterator i(*this, 0);
1811 for (u32 j=0; j<count; ++j)
1822 //! Finds first position of a character not in a given list.
1823 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1824 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1825 //! \return Position where the character has been found, or -1 if not found.
1826 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1831 const_iterator i(*this, 0);
1838 for (j=0; j<count; ++j)
1851 //! Finds last position of a character not in a given list.
1852 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1853 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1854 //! \return Position where the character has been found, or -1 if not found.
1855 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1860 const_iterator i(end());
1863 s32 pos = size() - 1;
1864 while (!i.atStart())
1868 for (j=0; j<count; ++j)
1881 //! Finds next occurrence of character.
1882 //! \param c The character to search for.
1883 //! \param startPos The position in the string to start searching.
1884 //! \return Position where the character has been found, or -1 if not found.
1885 s32 findNext(uchar32_t c, u32 startPos) const
1887 const_iterator i(*this, startPos);
1903 //! Finds last occurrence of character.
1904 //! \param c The character to search for.
1905 //! \param start The start position of the reverse search ( default = -1, on end ).
1906 //! \return Position where the character has been found, or -1 if not found.
1907 s32 findLast(uchar32_t c, s32 start = -1) const
1910 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1912 const_iterator i(*this, start);
1914 while (!i.atStart())
1926 //! Finds last occurrence of a character in a list.
1927 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1928 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1929 //! \return Position where one of the characters has been found, or -1 if not found.
1930 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1935 const_iterator i(end());
1939 while (!i.atStart())
1942 for (u32 j=0; j<count; ++j)
1953 //! Finds another ustring16 in this ustring16.
1954 //! \param str The string to find.
1955 //! \param start The start position of the search.
1956 //! \return Positions where the ustring16 has been found, or -1 if not found.
1957 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1959 u32 my_size = size();
1960 u32 their_size = str.size();
1962 if (their_size == 0 || my_size - start < their_size)
1965 const_iterator i(*this, start);
1970 const_iterator i2(i);
1971 const_iterator j(str, 0);
1972 uchar32_t t1 = (uchar32_t)*i2;
1973 uchar32_t t2 = (uchar32_t)*j;
1980 t1 = (uchar32_t)*i2;
1991 //! Finds another ustring16 in this ustring16.
1992 //! \param str The string to find.
1993 //! \param start The start position of the search.
1994 //! \return Positions where the string has been found, or -1 if not found.
1995 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1997 const uchar16_t* data = str.c_str();
2008 for (u32 i=start; i<=used-len; ++i)
2012 while(data[j] && array[i+j] == data[j])
2024 //! Returns a substring.
2025 //! \param begin: Start of substring.
2026 //! \param length: Length of substring.
2027 //! \return A reference to our current string.
2028 ustring16<TAlloc> subString(u32 begin, s32 length) const
2031 // if start after ustring16
2032 // or no proper substring length
2033 if ((length <= 0) || (begin>=len))
2034 return ustring16<TAlloc>("");
2035 // clamp length to maximal value
2036 if ((length+begin) > len)
2039 ustring16<TAlloc> o;
2040 o.reserve((length+1) * 2);
2042 const_iterator i(*this, begin);
2043 while (!i.atEnd() && length)
2054 //! Appends a character to this ustring16.
2055 //! \param c Character to append.
2056 //! \return A reference to our current string.
2057 ustring16<TAlloc>& operator += (char c)
2059 append((uchar32_t)c);
2064 //! Appends a character to this ustring16.
2065 //! \param c Character to append.
2066 //! \return A reference to our current string.
2067 ustring16<TAlloc>& operator += (uchar32_t c)
2074 //! Appends a number to this ustring16.
2075 //! \param c Number to append.
2076 //! \return A reference to our current string.
2077 ustring16<TAlloc>& operator += (short c)
2079 append(core::stringc(c));
2084 //! Appends a number to this ustring16.
2085 //! \param c Number to append.
2086 //! \return A reference to our current string.
2087 ustring16<TAlloc>& operator += (unsigned short c)
2089 append(core::stringc(c));
2094 #ifdef USTRING_CPP0X_NEWLITERALS
2095 //! Appends a number to this ustring16.
2096 //! \param c Number to append.
2097 //! \return A reference to our current string.
2098 ustring16<TAlloc>& operator += (int c)
2100 append(core::stringc(c));
2105 //! Appends a number to this ustring16.
2106 //! \param c Number to append.
2107 //! \return A reference to our current string.
2108 ustring16<TAlloc>& operator += (unsigned int c)
2110 append(core::stringc(c));
2116 //! Appends a number to this ustring16.
2117 //! \param c Number to append.
2118 //! \return A reference to our current string.
2119 ustring16<TAlloc>& operator += (long c)
2121 append(core::stringc(c));
2126 //! Appends a number to this ustring16.
2127 //! \param c Number to append.
2128 //! \return A reference to our current string.
2129 ustring16<TAlloc>& operator += (unsigned long c)
2131 append(core::stringc(c));
2136 //! Appends a number to this ustring16.
2137 //! \param c Number to append.
2138 //! \return A reference to our current string.
2139 ustring16<TAlloc>& operator += (double c)
2141 append(core::stringc(c));
2146 //! Appends a char ustring16 to this ustring16.
2147 //! \param c Char ustring16 to append.
2148 //! \return A reference to our current string.
2149 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2156 //! Appends a ustring16 to this ustring16.
2157 //! \param other ustring16 to append.
2158 //! \return A reference to our current string.
2159 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2166 //! Replaces all characters of a given type with another one.
2167 //! \param toReplace Character to replace.
2168 //! \param replaceWith Character replacing the old one.
2169 //! \return A reference to our current string.
2170 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2172 iterator i(*this, 0);
2175 typename ustring16<TAlloc>::access a = *i;
2176 if ((uchar32_t)a == toReplace)
2184 //! Replaces all instances of a string with another one.
2185 //! \param toReplace The string to replace.
2186 //! \param replaceWith The string replacing the old one.
2187 //! \return A reference to our current string.
2188 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2190 if (toReplace.size() == 0)
2193 const uchar16_t* other = toReplace.c_str();
2194 const uchar16_t* replace = replaceWith.c_str();
2195 const u32 other_size = toReplace.size_raw();
2196 const u32 replace_size = replaceWith.size_raw();
2198 // Determine the delta. The algorithm will change depending on the delta.
2199 s32 delta = replace_size - other_size;
2201 // A character for character replace. The string will not shrink or grow.
2205 while ((pos = find_raw(other, pos)) != -1)
2207 for (u32 i = 0; i < replace_size; ++i)
2208 array[pos + i] = replace[i];
2214 // We are going to be removing some characters. The string will shrink.
2218 for (u32 pos = 0; pos <= used; ++i, ++pos)
2220 // Is this potentially a match?
2221 if (array[pos] == *other)
2223 // Check to see if we have a match.
2225 for (j = 0; j < other_size; ++j)
2227 if (array[pos + j] != other[j])
2231 // If we have a match, replace characters.
2232 if (j == other_size)
2234 for (j = 0; j < replace_size; ++j)
2235 array[i + j] = replace[j];
2236 i += replace_size - 1;
2237 pos += other_size - 1;
2242 // No match found, just copy characters.
2243 array[i - 1] = array[pos];
2251 // We are going to be adding characters, so the string size will increase.
2252 // Count the number of times toReplace exists in the string so we can allocate the new size.
2255 while ((pos = find_raw(other, pos)) != -1)
2261 // Re-allocate the string now, if needed.
2262 u32 len = delta * find_count;
2263 if (used + len >= allocated)
2264 reallocate(used + len);
2268 while ((pos = find_raw(other, pos)) != -1)
2270 uchar16_t* start = array + pos + other_size - 1;
2271 uchar16_t* ptr = array + used;
2272 uchar16_t* end = array + used + delta;
2274 // Shift characters to make room for the string.
2275 while (ptr != start)
2282 // Add the new string now.
2283 for (u32 i = 0; i < replace_size; ++i)
2284 array[pos + i] = replace[i];
2286 pos += replace_size;
2290 // Terminate the string and return ourself.
2296 //! Removes characters from a ustring16..
2297 //! \param c The character to remove.
2298 //! \return A reference to our current string.
2299 ustring16<TAlloc>& remove(uchar32_t c)
2303 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2304 for (u32 i=0; i<=used; ++i)
2307 if (!UTF16_IS_SURROGATE_HI(array[i]))
2309 else if (i + 1 <= used)
2311 // Convert the surrogate pair into a single UTF-32 character.
2312 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2314 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2322 array[pos++] = array[i];
2324 array[pos++] = array[++i];
2332 //! Removes a ustring16 from the ustring16.
2333 //! \param toRemove The string to remove.
2334 //! \return A reference to our current string.
2335 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2337 u32 size = toRemove.size_raw();
2338 if (size == 0) return *this;
2340 const uchar16_t* tra = toRemove.c_str();
2343 for (u32 i=0; i<=used; ++i)
2348 if (array[i + j] != tra[j])
2359 array[pos++] = array[i];
2367 //! Removes characters from the ustring16.
2368 //! \param characters The characters to remove.
2369 //! \return A reference to our current string.
2370 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2372 if (characters.size_raw() == 0)
2377 const_iterator iter(characters);
2378 for (u32 i=0; i<=used; ++i)
2381 if (!UTF16_IS_SURROGATE_HI(array[i]))
2383 else if (i + 1 <= used)
2385 // Convert the surrogate pair into a single UTF-32 character.
2386 uc32 = unicode::toUTF32(array[i], array[i+1]);
2388 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2392 while (!iter.atEnd())
2394 uchar32_t c = *iter;
2397 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2406 array[pos++] = array[i];
2408 array[pos++] = array[++i];
2416 //! Trims the ustring16.
2417 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2418 //! \param whitespace The characters that are to be considered as whitespace.
2419 //! \return A reference to our current string.
2420 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2422 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2424 // find start and end of the substring without the specified characters
2425 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2429 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2431 return (*this = subString(begin, (end +1) - begin));
2435 //! Erases a character from the ustring16.
2436 //! May be slow, because all elements following after the erased element have to be copied.
2437 //! \param index Index of element to be erased.
2438 //! \return A reference to our current string.
2439 ustring16<TAlloc>& erase(u32 index)
2441 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2443 iterator i(*this, index);
2446 u32 len = (t > 0xFFFF ? 2 : 1);
2448 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2449 array[j - len] = array[j];
2458 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2459 //! \return A reference to our current string.
2460 ustring16<TAlloc>& validate()
2462 // Validate all unicode characters.
2463 for (u32 i=0; i<allocated; ++i)
2465 // Terminate on existing null.
2471 if (UTF16_IS_SURROGATE(array[i]))
2473 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2474 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2475 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2476 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2479 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2480 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2487 used = allocated - 1;
2494 //! Gets the last char of the ustring16, or 0.
2495 //! \return The last char of the ustring16, or 0.
2496 uchar32_t lastChar() const
2501 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2503 // Make sure we have a paired surrogate.
2507 // Check for an invalid surrogate.
2508 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2511 // Convert the surrogate pair into a single UTF-32 character.
2512 return unicode::toUTF32(array[used-2], array[used-1]);
2516 return array[used-1];
2521 //! Split the ustring16 into parts.
2522 /** This method will split a ustring16 at certain delimiter characters
2523 into the container passed in as reference. The type of the container
2524 has to be given as template parameter. It must provide a push_back and
2526 \param ret The result container
2527 \param c C-style ustring16 of delimiter characters
2528 \param count Number of delimiter characters
2529 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2530 container. If two delimiters occur without a character in between, an
2531 empty substring would be placed in the result. If this flag is set,
2532 only non-empty strings are stored.
2533 \param keepSeparators Flag which allows to add the separator to the
2534 result ustring16. If this flag is true, the concatenation of the
2535 substrings results in the original ustring16. Otherwise, only the
2536 characters between the delimiters are returned.
2537 \return The number of resulting substrings
2539 template<class container>
2540 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2545 const_iterator i(*this);
2546 const u32 oldSize=ret.size();
2550 bool lastWasSeparator = false;
2554 bool foundSeparator = false;
2555 for (u32 j=0; j<count; ++j)
2559 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2561 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2562 foundSeparator = true;
2563 lastpos = (keepSeparators ? pos : pos + 1);
2564 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2568 lastWasSeparator = foundSeparator;
2574 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2575 return ret.size()-oldSize;
2579 //! Split the ustring16 into parts.
2580 /** This method will split a ustring16 at certain delimiter characters
2581 into the container passed in as reference. The type of the container
2582 has to be given as template parameter. It must provide a push_back and
2584 \param ret The result container
2585 \param c A unicode string of delimiter characters
2586 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2587 container. If two delimiters occur without a character in between, an
2588 empty substring would be placed in the result. If this flag is set,
2589 only non-empty strings are stored.
2590 \param keepSeparators Flag which allows to add the separator to the
2591 result ustring16. If this flag is true, the concatenation of the
2592 substrings results in the original ustring16. Otherwise, only the
2593 characters between the delimiters are returned.
2594 \return The number of resulting substrings
2596 template<class container>
2597 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2599 core::array<uchar32_t> v = c.toUTF32();
2600 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2604 //! Gets the size of the allocated memory buffer for the string.
2605 //! \return The size of the allocated memory buffer.
2606 u32 capacity() const
2612 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2613 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2614 u32 size_raw() const
2620 //! Inserts a character into the string.
2621 //! \param c The character to insert.
2622 //! \param pos The position to insert the character.
2623 //! \return A reference to our current string.
2624 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2626 u8 len = (c > 0xFFFF ? 2 : 1);
2628 if (used + len >= allocated)
2629 reallocate(used + len);
2633 iterator iter(*this, pos);
2634 for (u32 i = used - 2; i > iter.getPos(); --i)
2635 array[i] = array[i - len];
2639 // c will be multibyte, so split it up into a surrogate pair.
2640 uchar16_t x = static_cast<uchar16_t>(c);
2641 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2642 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2643 array[iter.getPos()] = vh;
2644 array[iter.getPos()+1] = vl;
2648 array[iter.getPos()] = static_cast<uchar16_t>(c);
2655 //! Inserts a string into the string.
2656 //! \param c The string to insert.
2657 //! \param pos The position to insert the string.
2658 //! \return A reference to our current string.
2659 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2661 u32 len = c.size_raw();
2662 if (len == 0) return *this;
2664 if (used + len >= allocated)
2665 reallocate(used + len);
2669 iterator iter(*this, pos);
2670 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2671 array[i] = array[i - len];
2673 const uchar16_t* s = c.c_str();
2674 for (u32 i = 0; i < len; ++i)
2685 //! Inserts a character into the string.
2686 //! \param c The character to insert.
2687 //! \param pos The position to insert the character.
2688 //! \return A reference to our current string.
2689 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2691 if (used + 1 >= allocated)
2692 reallocate(used + 1);
2696 for (u32 i = used - 1; i > pos; --i)
2697 array[i] = array[i - 1];
2705 //! Removes a character from string.
2706 //! \param pos Position of the character to remove.
2707 //! \return A reference to our current string.
2708 ustring16<TAlloc>& erase_raw(u32 pos)
2710 for (u32 i=pos; i<=used; ++i)
2712 array[i] = array[i + 1];
2720 //! Replaces a character in the string.
2721 //! \param c The new character.
2722 //! \param pos The position of the character to replace.
2723 //! \return A reference to our current string.
2724 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2731 //! Returns an iterator to the beginning of the string.
2732 //! \return An iterator to the beginning of the string.
2735 iterator i(*this, 0);
2740 //! Returns an iterator to the beginning of the string.
2741 //! \return An iterator to the beginning of the string.
2742 const_iterator begin() const
2744 const_iterator i(*this, 0);
2749 //! Returns an iterator to the beginning of the string.
2750 //! \return An iterator to the beginning of the string.
2751 const_iterator cbegin() const
2753 const_iterator i(*this, 0);
2758 //! Returns an iterator to the end of the string.
2759 //! \return An iterator to the end of the string.
2762 iterator i(*this, 0);
2768 //! Returns an iterator to the end of the string.
2769 //! \return An iterator to the end of the string.
2770 const_iterator end() const
2772 const_iterator i(*this, 0);
2778 //! Returns an iterator to the end of the string.
2779 //! \return An iterator to the end of the string.
2780 const_iterator cend() const
2782 const_iterator i(*this, 0);
2788 //! Converts the string to a UTF-8 encoded string.
2789 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2790 //! \return A string containing the UTF-8 encoded string.
2791 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2793 core::string<uchar8_t> ret;
2794 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2795 const_iterator iter(*this, 0);
2797 // Add the byte order mark if the user wants it.
2800 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2801 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2802 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2805 while (!iter.atEnd())
2807 uchar32_t c = *iter;
2810 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2811 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2812 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2813 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2821 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2822 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2823 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2830 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2831 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2837 ret.append(static_cast<uchar8_t>(c));
2845 //! Converts the string to a UTF-8 encoded string array.
2846 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2847 //! \return An array containing the UTF-8 encoded string.
2848 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2850 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2851 const_iterator iter(*this, 0);
2853 // Add the byte order mark if the user wants it.
2856 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2857 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2858 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2861 while (!iter.atEnd())
2863 uchar32_t c = *iter;
2866 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2867 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2868 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2869 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2877 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2878 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2879 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2886 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2887 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2893 ret.push_back(static_cast<uchar8_t>(c));
2902 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2903 //! Converts the string to a UTF-16 encoded string.
2904 //! \param endian The desired endianness of the string.
2905 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2906 //! \return A string containing the UTF-16 encoded string.
2907 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2909 core::string<char16_t> ret;
2910 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2912 // Add the BOM if specified.
2915 if (endian == unicode::EUTFEE_NATIVE)
2916 ret[0] = unicode::BOM;
2917 else if (endian == unicode::EUTFEE_LITTLE)
2919 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2920 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2921 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2925 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2926 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2927 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2932 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2934 char16_t* ptr = ret.c_str();
2935 for (u32 i = 0; i < ret.size(); ++i)
2936 *ptr++ = unicode::swapEndian16(*ptr);
2943 //! Converts the string to a UTF-16 encoded string array.
2944 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2945 //! \param endian The desired endianness of the string.
2946 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2947 //! \return An array containing the UTF-16 encoded string.
2948 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2950 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2951 uchar16_t* ptr = ret.pointer();
2953 // Add the BOM if specified.
2956 if (endian == unicode::EUTFEE_NATIVE)
2957 *ptr = unicode::BOM;
2958 else if (endian == unicode::EUTFEE_LITTLE)
2960 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2961 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2962 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2966 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2967 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2968 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2973 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2974 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2976 for (u32 i = 0; i <= used; ++i)
2977 ptr[i] = unicode::swapEndian16(ptr[i]);
2979 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2985 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2986 //! Converts the string to a UTF-32 encoded string.
2987 //! \param endian The desired endianness of the string.
2988 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2989 //! \return A string containing the UTF-32 encoded string.
2990 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2992 core::string<char32_t> ret;
2993 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2994 const_iterator iter(*this, 0);
2996 // Add the BOM if specified.
2999 if (endian == unicode::EUTFEE_NATIVE)
3000 ret.append(unicode::BOM);
3009 if (endian == unicode::EUTFEE_LITTLE)
3011 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3012 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3013 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3014 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3018 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3019 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3020 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3021 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3027 while (!iter.atEnd())
3029 uchar32_t c = *iter;
3030 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3031 c = unicode::swapEndian32(c);
3040 //! Converts the string to a UTF-32 encoded string array.
3041 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3042 //! \param endian The desired endianness of the string.
3043 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3044 //! \return An array containing the UTF-32 encoded string.
3045 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3047 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3048 const_iterator iter(*this, 0);
3050 // Add the BOM if specified.
3053 if (endian == unicode::EUTFEE_NATIVE)
3054 ret.push_back(unicode::BOM);
3063 if (endian == unicode::EUTFEE_LITTLE)
3065 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3066 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3067 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3068 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3072 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3073 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3074 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3075 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3077 ret.push_back(t.full);
3082 while (!iter.atEnd())
3084 uchar32_t c = *iter;
3085 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3086 c = unicode::swapEndian32(c);
3094 //! Converts the string to a wchar_t encoded string.
3095 /** The size of a wchar_t changes depending on the platform. This function will store a
3096 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3097 //! \param endian The desired endianness of the string.
3098 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3099 //! \return A string containing the wchar_t encoded string.
3100 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3102 if (sizeof(wchar_t) == 4)
3104 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3105 core::stringw ret(a.pointer());
3108 else if (sizeof(wchar_t) == 2)
3110 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3112 core::stringw ret(array);
3117 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3118 core::stringw ret(a.pointer());
3122 else if (sizeof(wchar_t) == 1)
3124 core::array<uchar8_t> a(toUTF8(addBOM));
3125 core::stringw ret(a.pointer());
3129 // Shouldn't happen.
3130 return core::stringw();
3134 //! Converts the string to a wchar_t encoded string array.
3135 /** The size of a wchar_t changes depending on the platform. This function will store a
3136 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3137 //! \param endian The desired endianness of the string.
3138 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3139 //! \return An array containing the wchar_t encoded string.
3140 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3142 if (sizeof(wchar_t) == 4)
3144 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3145 core::array<wchar_t> ret(a.size());
3146 ret.set_used(a.size());
3147 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3150 if (sizeof(wchar_t) == 2)
3152 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3154 core::array<wchar_t> ret(used);
3156 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3161 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3162 core::array<wchar_t> ret(a.size());
3163 ret.set_used(a.size());
3164 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3168 if (sizeof(wchar_t) == 1)
3170 core::array<uchar8_t> a(toUTF8(addBOM));
3171 core::array<wchar_t> ret(a.size());
3172 ret.set_used(a.size());
3173 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3177 // Shouldn't happen.
3178 return core::array<wchar_t>();
3181 //! Converts the string to a properly encoded io::path string.
3182 //! \param endian The desired endianness of the string.
3183 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3184 //! \return An io::path string containing the properly encoded string.
3185 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3187 #if defined(_IRR_WCHAR_FILESYSTEM)
3188 return toWCHAR_s(endian, addBOM);
3190 return toUTF8_s(addBOM);
3194 //! Loads an unknown stream of data.
3195 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3196 //! \param data The data stream to load from.
3197 //! \param data_size The length of the data string.
3198 //! \return A reference to our current string.
3199 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3201 // Clear our string.
3206 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3210 case unicode::EUTFE_UTF8:
3211 append((uchar8_t*)data, data_size);
3214 case unicode::EUTFE_UTF16:
3215 case unicode::EUTFE_UTF16_BE:
3216 case unicode::EUTFE_UTF16_LE:
3217 append((uchar16_t*)data, data_size / 2);
3220 case unicode::EUTFE_UTF32:
3221 case unicode::EUTFE_UTF32_BE:
3222 case unicode::EUTFE_UTF32_LE:
3223 append((uchar32_t*)data, data_size / 4);
3230 //! Gets the encoding of the Unicode string this class contains.
3231 //! \return An enum describing the current encoding of this string.
3232 const unicode::EUTF_ENCODE getEncoding() const
3237 //! Gets the endianness of the Unicode string this class contains.
3238 //! \return An enum describing the endianness of this string.
3239 const unicode::EUTF_ENDIAN getEndianness() const
3241 if (encoding == unicode::EUTFE_UTF16_LE ||
3242 encoding == unicode::EUTFE_UTF32_LE)
3243 return unicode::EUTFEE_LITTLE;
3244 else return unicode::EUTFEE_BIG;
3249 //! Reallocate the string, making it bigger or smaller.
3250 //! \param new_size The new size of the string.
3251 void reallocate(u32 new_size)
3253 uchar16_t* old_array = array;
3255 array = allocator.allocate(new_size + 1); //new u16[new_size];
3256 allocated = new_size + 1;
3257 if (old_array == 0) return;
3259 u32 amount = used < new_size ? used : new_size;
3260 for (u32 i=0; i<=amount; ++i)
3261 array[i] = old_array[i];
3263 if (allocated <= used)
3264 used = allocated - 1;
3268 allocator.deallocate(old_array); // delete [] old_array;
3271 //--- member variables
3274 unicode::EUTF_ENCODE encoding;
3278 //irrAllocator<uchar16_t> allocator;
3281 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3284 //! Appends two ustring16s.
3285 template <typename TAlloc>
3286 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3288 ustring16<TAlloc> ret(left);
3294 //! Appends a ustring16 and a null-terminated unicode string.
3295 template <typename TAlloc, class B>
3296 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3298 ustring16<TAlloc> ret(left);
3304 //! Appends a ustring16 and a null-terminated unicode string.
3305 template <class B, typename TAlloc>
3306 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3308 ustring16<TAlloc> ret(left);
3314 //! Appends a ustring16 and an Irrlicht string.
3315 template <typename TAlloc, typename B, typename BAlloc>
3316 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3318 ustring16<TAlloc> ret(left);
3324 //! Appends a ustring16 and an Irrlicht string.
3325 template <typename TAlloc, typename B, typename BAlloc>
3326 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3328 ustring16<TAlloc> ret(left);
3334 //! Appends a ustring16 and a std::basic_string.
3335 template <typename TAlloc, typename B, typename A, typename BAlloc>
3336 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3338 ustring16<TAlloc> ret(left);
3344 //! Appends a ustring16 and a std::basic_string.
3345 template <typename TAlloc, typename B, typename A, typename BAlloc>
3346 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3348 ustring16<TAlloc> ret(left);
3354 //! Appends a ustring16 and a char.
3355 template <typename TAlloc>
3356 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3358 ustring16<TAlloc> ret(left);
3364 //! Appends a ustring16 and a char.
3365 template <typename TAlloc>
3366 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3368 ustring16<TAlloc> ret(left);
3374 #ifdef USTRING_CPP0X_NEWLITERALS
3375 //! Appends a ustring16 and a uchar32_t.
3376 template <typename TAlloc>
3377 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3379 ustring16<TAlloc> ret(left);
3385 //! Appends a ustring16 and a uchar32_t.
3386 template <typename TAlloc>
3387 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3389 ustring16<TAlloc> ret(left);
3396 //! Appends a ustring16 and a short.
3397 template <typename TAlloc>
3398 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3400 ustring16<TAlloc> ret(left);
3401 ret += core::stringc(right);
3406 //! Appends a ustring16 and a short.
3407 template <typename TAlloc>
3408 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3410 ustring16<TAlloc> ret((core::stringc(left)));
3416 //! Appends a ustring16 and an unsigned short.
3417 template <typename TAlloc>
3418 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3420 ustring16<TAlloc> ret(left);
3421 ret += core::stringc(right);
3426 //! Appends a ustring16 and an unsigned short.
3427 template <typename TAlloc>
3428 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3430 ustring16<TAlloc> ret((core::stringc(left)));
3436 //! Appends a ustring16 and an int.
3437 template <typename TAlloc>
3438 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3440 ustring16<TAlloc> ret(left);
3441 ret += core::stringc(right);
3446 //! Appends a ustring16 and an int.
3447 template <typename TAlloc>
3448 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3450 ustring16<TAlloc> ret((core::stringc(left)));
3456 //! Appends a ustring16 and an unsigned int.
3457 template <typename TAlloc>
3458 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3460 ustring16<TAlloc> ret(left);
3461 ret += core::stringc(right);
3466 //! Appends a ustring16 and an unsigned int.
3467 template <typename TAlloc>
3468 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3470 ustring16<TAlloc> ret((core::stringc(left)));
3476 //! Appends a ustring16 and a long.
3477 template <typename TAlloc>
3478 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3480 ustring16<TAlloc> ret(left);
3481 ret += core::stringc(right);
3486 //! Appends a ustring16 and a long.
3487 template <typename TAlloc>
3488 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3490 ustring16<TAlloc> ret((core::stringc(left)));
3496 //! Appends a ustring16 and an unsigned long.
3497 template <typename TAlloc>
3498 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3500 ustring16<TAlloc> ret(left);
3501 ret += core::stringc(right);
3506 //! Appends a ustring16 and an unsigned long.
3507 template <typename TAlloc>
3508 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3510 ustring16<TAlloc> ret((core::stringc(left)));
3516 //! Appends a ustring16 and a float.
3517 template <typename TAlloc>
3518 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3520 ustring16<TAlloc> ret(left);
3521 ret += core::stringc(right);
3526 //! Appends a ustring16 and a float.
3527 template <typename TAlloc>
3528 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3530 ustring16<TAlloc> ret((core::stringc(left)));
3536 //! Appends a ustring16 and a double.
3537 template <typename TAlloc>
3538 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3540 ustring16<TAlloc> ret(left);
3541 ret += core::stringc(right);
3546 //! Appends a ustring16 and a double.
3547 template <typename TAlloc>
3548 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3550 ustring16<TAlloc> ret((core::stringc(left)));
3556 #ifdef USTRING_CPP0X
3557 //! Appends two ustring16s.
3558 template <typename TAlloc>
3559 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3561 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3562 right.insert(left, 0);
3563 return std::move(right);
3567 //! Appends two ustring16s.
3568 template <typename TAlloc>
3569 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3571 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3573 return std::move(left);
3577 //! Appends two ustring16s.
3578 template <typename TAlloc>
3579 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3581 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3582 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3583 (right.capacity() - right.size_raw() < left.size_raw()))
3586 return std::move(left);
3590 right.insert(left, 0);
3591 return std::move(right);
3596 //! Appends a ustring16 and a null-terminated unicode string.
3597 template <typename TAlloc, class B>
3598 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3600 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3602 return std::move(left);
3606 //! Appends a ustring16 and a null-terminated unicode string.
3607 template <class B, typename TAlloc>
3608 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3610 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3611 right.insert(left, 0);
3612 return std::move(right);
3616 //! Appends a ustring16 and an Irrlicht string.
3617 template <typename TAlloc, typename B, typename BAlloc>
3618 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3620 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3621 right.insert(left, 0);
3622 return std::move(right);
3626 //! Appends a ustring16 and an Irrlicht string.
3627 template <typename TAlloc, typename B, typename BAlloc>
3628 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3630 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3632 return std::move(left);
3636 //! Appends a ustring16 and a std::basic_string.
3637 template <typename TAlloc, typename B, typename A, typename BAlloc>
3638 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3640 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3641 right.insert(core::ustring16<TAlloc>(left), 0);
3642 return std::move(right);
3646 //! Appends a ustring16 and a std::basic_string.
3647 template <typename TAlloc, typename B, typename A, typename BAlloc>
3648 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3650 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3652 return std::move(left);
3656 //! Appends a ustring16 and a char.
3657 template <typename TAlloc>
3658 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3660 left.append((uchar32_t)right);
3661 return std::move(left);
3665 //! Appends a ustring16 and a char.
3666 template <typename TAlloc>
3667 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3669 right.insert((uchar32_t)left, 0);
3670 return std::move(right);
3674 #ifdef USTRING_CPP0X_NEWLITERALS
3675 //! Appends a ustring16 and a uchar32_t.
3676 template <typename TAlloc>
3677 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3680 return std::move(left);
3684 //! Appends a ustring16 and a uchar32_t.
3685 template <typename TAlloc>
3686 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3688 right.insert(left, 0);
3689 return std::move(right);
3694 //! Appends a ustring16 and a short.
3695 template <typename TAlloc>
3696 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3698 left.append(core::stringc(right));
3699 return std::move(left);
3703 //! Appends a ustring16 and a short.
3704 template <typename TAlloc>
3705 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3707 right.insert(core::stringc(left), 0);
3708 return std::move(right);
3712 //! Appends a ustring16 and an unsigned short.
3713 template <typename TAlloc>
3714 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3716 left.append(core::stringc(right));
3717 return std::move(left);
3721 //! Appends a ustring16 and an unsigned short.
3722 template <typename TAlloc>
3723 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3725 right.insert(core::stringc(left), 0);
3726 return std::move(right);
3730 //! Appends a ustring16 and an int.
3731 template <typename TAlloc>
3732 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3734 left.append(core::stringc(right));
3735 return std::move(left);
3739 //! Appends a ustring16 and an int.
3740 template <typename TAlloc>
3741 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3743 right.insert(core::stringc(left), 0);
3744 return std::move(right);
3748 //! Appends a ustring16 and an unsigned int.
3749 template <typename TAlloc>
3750 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3752 left.append(core::stringc(right));
3753 return std::move(left);
3757 //! Appends a ustring16 and an unsigned int.
3758 template <typename TAlloc>
3759 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3761 right.insert(core::stringc(left), 0);
3762 return std::move(right);
3766 //! Appends a ustring16 and a long.
3767 template <typename TAlloc>
3768 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3770 left.append(core::stringc(right));
3771 return std::move(left);
3775 //! Appends a ustring16 and a long.
3776 template <typename TAlloc>
3777 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3779 right.insert(core::stringc(left), 0);
3780 return std::move(right);
3784 //! Appends a ustring16 and an unsigned long.
3785 template <typename TAlloc>
3786 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3788 left.append(core::stringc(right));
3789 return std::move(left);
3793 //! Appends a ustring16 and an unsigned long.
3794 template <typename TAlloc>
3795 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3797 right.insert(core::stringc(left), 0);
3798 return std::move(right);
3802 //! Appends a ustring16 and a float.
3803 template <typename TAlloc>
3804 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3806 left.append(core::stringc(right));
3807 return std::move(left);
3811 //! Appends a ustring16 and a float.
3812 template <typename TAlloc>
3813 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3815 right.insert(core::stringc(left), 0);
3816 return std::move(right);
3820 //! Appends a ustring16 and a double.
3821 template <typename TAlloc>
3822 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3824 left.append(core::stringc(right));
3825 return std::move(left);
3829 //! Appends a ustring16 and a double.
3830 template <typename TAlloc>
3831 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3833 right.insert(core::stringc(left), 0);
3834 return std::move(right);
3839 #ifndef USTRING_NO_STL
3840 //! Writes a ustring16 to an ostream.
3841 template <typename TAlloc>
3842 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3844 out << in.toUTF8_s().c_str();
3848 //! Writes a ustring16 to a wostream.
3849 template <typename TAlloc>
3850 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3852 out << in.toWCHAR_s().c_str();
3858 #ifndef USTRING_NO_STL
3863 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3864 //! Algorithm taken from std::hash<std::string>.
3865 class hash : public std::unary_function<core::ustring, size_t>
3868 size_t operator()(const core::ustring& s) const
3870 size_t ret = 2166136261U;
3872 size_t stride = 1 + s.size_raw() / 10;
3874 core::ustring::const_iterator i = s.begin();
3875 while (i != s.end())
3877 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3878 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3886 } // end namespace unicode
3890 } // end namespace core
3891 } // end namespace irr