8 // Unicode is annoying. A "code point" (rune in Go-speak) may need up to
9 // 4 bytes to represent it. In general, a code point will represent a
10 // complete character, but this is not always the case. A character with
11 // accents may be made up of multiple code points (the code point for the
12 // original character, and additional code points for each accent/marking).
13 // The functions below are meant to help deal with these additional "combining"
14 // code points. In underlying operations (search, replace, etc...), micro will
15 // treat a character with combining code points as just the original code point.
16 // For rendering, micro will display the combining characters. It's not perfect
17 // but it's pretty good.
19 var minMark = rune(unicode.Mark.R16[0].Lo)
21 func isMark(r rune) bool {
26 return unicode.In(r, unicode.Mark)
29 // DecodeCharacter returns the next character from an array of bytes
30 // A character is a rune along with any accompanying combining runes
31 func DecodeCharacter(b []byte) (rune, []rune, int) {
32 r, size := utf8.DecodeRune(b)
34 c, s := utf8.DecodeRune(b)
38 combc = append(combc, c)
42 c, s = utf8.DecodeRune(b)
48 // DecodeCharacterInString returns the next character from a string
49 // A character is a rune along with any accompanying combining runes
50 func DecodeCharacterInString(str string) (rune, []rune, int) {
51 r, size := utf8.DecodeRuneInString(str)
53 c, s := utf8.DecodeRuneInString(str)
57 combc = append(combc, c)
61 c, s = utf8.DecodeRuneInString(str)
67 // CharacterCount returns the number of characters in a byte array
68 // Similar to utf8.RuneCount but for unicode characters
69 func CharacterCount(b []byte) int {
73 r, size := utf8.DecodeRune(b)
84 // CharacterCount returns the number of characters in a string
85 // Similar to utf8.RuneCountInString but for unicode characters
86 func CharacterCountInString(str string) int {
89 for _, r := range str {