Add utf-8 support to linenoise.c

author Steve Bennett <steveb@workware.net.au>

Fri, 8 Oct 2010 12:55:20 +0000 (22:55 +1000)

committer Steve Bennett <steveb@workware.net.au>

Fri, 8 Apr 2011 01:48:05 +0000 (11:48 +1000)
author Steve Bennett <steveb@workware.net.au>
Fri, 8 Oct 2010 12:55:20 +0000 (22:55 +1000)
committer Steve Bennett <steveb@workware.net.au>
Fri, 8 Apr 2011 01:48:05 +0000 (11:48 +1000)
diff --git a/Makefile b/Makefile

index a285410678fb0ee8773cab2eff4fa97531de9714..7e50716eb3f98c4939b30c4423a1e46c36343b29 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,10 @@
-linenoise_example: linenoise.h linenoise.c
+all:  linenoise_example linenoise_utf8_example
  
-linenoise_example: linenoise.c example.c
-       $(CC) -Wall -W -Os -g -o linenoise_example linenoise.c example.c
+linenoise_example: linenoise.h linenoise.c example.c
+       $(CC) -Wall -W -Os -g -o $@ linenoise.c example.c
+
+linenoise_utf8_example: linenoise.c utf8.c example.c
+       $(CC) -DNO_COMPLETION -DUSE_UTF8 -Wall -W -Os -g -o $@ linenoise.c utf8.c example.c
  
  clean:
-       rm -f linenoise_example
+       rm -f linenoise_example linenoise_utf8_example
diff --git a/linenoise.c b/linenoise.c

index d1d32caec8bdc0e46f3a40b9c82c208432cc4e58..bce5a4eeeba4fdb34a5386d2c90d18d662c4159e 100644 (file)
--- a/linenoise.c
+++ b/linenoise.c
@@ -45,8 +45,8 @@
   * - http://www.3waylabs.com/nw/WWW/products/wizcon/vt220.html
   *
   * Todo list:
- * - Switch to gets() if $TERM is something we can't support.
   * - Win32 support
+ * - Save and load history containing newlines
   *
   * Bloat:
   * - Completion?
@@ -106,10 +106,12 @@
  #include <stdlib.h>
  #include <sys/types.h>
  #include <sys/ioctl.h>
+#include <sys/poll.h>
  #include <unistd.h>
  #include "linenoise.h"
  
  #include "linenoise.h"
+#include "utf8.h"
  
  #define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100
  #define LINENOISE_MAX_LINE 4096
@@ -171,7 +173,7 @@ static int enableRawMode(int fd) {
      raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  
      /* put terminal in raw mode after flushing */
-    if (tcsetattr(fd,TCSAFLUSH,&raw) < 0) goto fatal;
+    if (tcsetattr(fd,TCSADRAIN,&raw) < 0) goto fatal;
      rawmode = 1;
      return 0;
  
@@ -182,7 +184,7 @@ fatal:
  
  static void disableRawMode(int fd) {
      /* Don't even check the return value as it's too late. */
-    if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1)
+    if (rawmode && tcsetattr(fd,TCSADRAIN,&orig_termios) != -1)
          rawmode = 0;
  }
  
@@ -205,6 +207,7 @@ struct current {
      char *buf;  /* Current buffer. Always null terminated */
      int bufmax; /* Size of the buffer, including space for the null termination */
      int len;    /* Number of bytes in 'buf' */
+    int chars;  /* Number of chars in 'buf' (utf-8 chars) */
      int pos;    /* Cursor position, measured in chars */
      int cols;   /* Size of the window, in chars */
  };
@@ -224,47 +227,122 @@ static void fd_printf(int fd, const char *format, ...)
      write(fd, buf, n);
  }
  
-static void refreshLine(const char *prompt, struct current *c) {
-    size_t plen = strlen(prompt);
-    int extra = 0;
-    size_t i, p;
-    const char *buf = c->buf;
-    int len = c->len;
-    int pos = c->pos;
+static int utf8_getchars(char *buf, int c)
+{
+#ifdef USE_UTF8
+    return utf8_fromunicode(buf, c);
+#else
+    *buf = c;
+    return 1;
+#endif
+}
+
+/**
+ * Returns the unicode character at the given offset,
+ * or -1 if none.
+ */
+static int get_char(struct current *current, int pos)
+{
+    if (pos >= 0 && pos < current->chars) {
+        int c;
+        int i = utf8_index(current->buf, pos);
+        utf8_tounicode(current->buf + i, &c);
+        return c;
+    }
+    return -1;
+}
+
+static void refreshLine(const char *prompt, struct current *current) {
+    int plen;
+    int pchars;
+    int backup = 0;
+    int i;
+    const char *buf = current->buf;
+    int chars = current->chars;
+    int pos = current->pos;
+    int b;
+    int ch;
+    int n;
  
-    //fprintf(stderr, "\nrefreshLine: prompt=<<%s>>, buf=<<%s>>\n", prompt, c->buf);
-    //fprintf(stderr, "pos=%d, len=%d, cols=%d\n", pos, len, c->cols);
+    /* Should intercept SIGWINCH. For now, just get the size every time */
+    current->cols = getColumns();
+
+    plen = strlen(prompt);
+    pchars = utf8_strlen(prompt, plen);
+
+    /* Account for a line which is too long to fit in the window.
+     * Note that control chars require an extra column
+     */
+
+    /* How many cols are required to the left of 'pos'?
+     * The prompt, plus one extra for each control char
+     */
+    n = pchars + utf8_strlen(buf, current->len);
+    b = 0;
+    for (i = 0; i < pos; i++) {
+        b += utf8_tounicode(buf + b, &ch);
+        if (ch < ' ') {
+            n++;
+        }
+    }
  
-    while((plen+pos) >= c->cols) {
-        buf++;
-        len--;
-        pos--;
+    /* If too many are need, strip chars off the front of 'buf'
+     * until it fits. Note that if the current char is a control character,
+     * we need one extra col.
+     */
+    if (current->pos < current->chars && get_char(current, current->pos) < ' ') {
+        n++;
      }
-    while (plen+len > c->cols) {
-        len--;
+
+    while (n >= current->cols) {
+        b = utf8_tounicode(buf, &ch);
+        if (ch < ' ') {
+            n--;
+        }
+        n--;
+        buf += b;
+        pos--;
+        chars--;
      }
  
      /* Cursor to left edge, then the prompt */
-    fd_printf(c->fd, "\x1b[0G");
-    write(c->fd, prompt, strlen(prompt));
+    fd_printf(current->fd, "\x1b[0G");
+    write(current->fd, prompt, plen);
  
      /* Now the current buffer content */
-    /* Need special handling for control characters */
-    p = 0;
-    for (i = 0; i < len; i++) {
-        if (buf[i] >= 0 && buf[i] < ' ') {
-            write(c->fd, buf + p, i - p);
-            p = i + 1;
-            fd_printf(c->fd, "\033[7m^%c\033[0m", buf[i] + '@');
+
+    /* Need special handling for control characters.
+     * If we hit 'cols', stop.
+     */
+    b = 0; /* unwritted bytes */
+    n = 0; /* How many control chars were written */
+    for (i = 0; i < chars; i++) {
+        int ch;
+        int w = utf8_tounicode(buf + b, &ch);
+        if (ch < ' ') {
+            n++;
+        }
+        if (pchars + i + n >= current->cols) {
+            break;
+        }
+        if (ch < ' ') {
+            /* A control character, so write the buffer so far */
+            write(current->fd, buf, b);
+            buf += b + w;
+            b = 0;
+            fd_printf(current->fd, "\033[7m^%c\033[0m", ch + '@');
              if (i < pos) {
-                extra++;
+                backup++;
              }
          }
+        else {
+            b += w;
+        }
      }
-    write(c->fd, buf + p, i - p);
+    write(current->fd, buf, b);
  
      /* Erase to right, move cursor to original position */
-    fd_printf(c->fd, "\x1b[0K" "\x1b[0G\x1b[%dC", (int)(pos+plen+extra));
+    fd_printf(current->fd, "\x1b[0K" "\x1b[0G\x1b[%dC", pos + pchars + backup);
  }
  
  static void set_current(struct current *current, const char *str)
@@ -272,24 +350,27 @@ static void set_current(struct current *current, const char *str)
      strncpy(current->buf, str, current->bufmax);
      current->buf[current->bufmax - 1] = 0;
      current->len = strlen(current->buf);
-    current->pos = current->len;
+    current->pos = current->chars = utf8_strlen(current->buf, current->len);
  }
  
-static int has_room(struct current *current, int chars)
+static int has_room(struct current *current, int bytes)
  {
-    return current->len + chars < current->bufmax - 1;
+    return current->len + bytes < current->bufmax - 1;
  }
  
  static int remove_char(struct current *current, int pos)
  {
-    //fprintf(stderr, "Trying to remove char at %d (pos=%d, len=%d)\n", pos, current->pos, current->len);
-    if (pos >= 0 && pos < current->len) {
+    if (pos >= 0 && pos < current->chars) {
+        int p1, p2;
+        p1 = utf8_index(current->buf, pos);
+        p2 = p1 + utf8_index(current->buf + p1, 1);
+        /* Move the null char too */
+        memmove(current->buf + p1, current->buf + p2, current->len - p2 + 1);
+        current->len -= (p2 - p1);
+        current->chars--;
          if (current->pos > pos) {
              current->pos--;
          }
-        /* Move the null char too */
-        memmove(current->buf + pos, current->buf + pos + 1, current->len - pos);
-        current->len--;
          return 1;
      }
      return 0;
@@ -297,10 +378,17 @@ static int remove_char(struct current *current, int pos)
  
  static int insert_char(struct current *current, int pos, int ch)
  {
-    if (has_room(current, 1) && pos >= 0 && pos <= current->len) {
-        memmove(current->buf+pos+1, current->buf + pos, current->len - pos);
-        current->buf[pos] = ch;
-        current->len++;
+    char buf[3];
+    int n = utf8_getchars(buf, ch);
+
+    if (has_room(current, n) && pos >= 0 && pos <= current->chars) {
+        int p1, p2;
+        p1 = utf8_index(current->buf, pos);
+        p2 = p1 + n;
+        memmove(current->buf + p2, current->buf + p1, current->len - p1);
+        memcpy(current->buf + p1, buf, n);
+        current->len += n;
+        current->chars++;
          if (current->pos >= pos) {
              current->pos++;
          }
@@ -340,6 +428,7 @@ static int completeLine(const char *prompt, struct current *current) {
                  struct current tmp = *current;
                  tmp.buf = lc.cvec[i];
                  tmp.pos = tmp.len = strlen(tmp.buf);
+                tmp.chars = utf8_strlen(tmp.buf, tmp.len);
                  refreshLine(prompt, &tmp);
              } else {
                  refreshLine(prompt, current);
@@ -399,21 +488,128 @@ static int remove_chars(struct current *current, int pos, int n)
      return removed;
  }
  
-static int fd_read(int fd)
+/**
+ * Reads a char from 'fd', waiting at most 'timeout' milliseconds.
+ *
+ * A timeout of -1 means to wait forever.
+ *
+ * Returns -1 if no char is received within the time or an error occurs.
+ */
+static int fd_read_char(int fd, int timeout)
  {
+    struct pollfd p;
      unsigned char c;
+
+    p.fd = fd;
+    p.events = POLLIN;
+
+    if (poll(&p, 1, timeout) == 0) {
+        /* timeout */
+        return -1;
+    }
      if (read(fd, &c, 1) != 1) {
          return -1;
      }
      return c;
  }
  
-#ifndef ctrl
-#define ctrl(C) ((C) - '@')
+/**
+ * Reads a complete utf-8 character
+ * and returns the unicode value, or -1 on error.
+ */
+static int fd_read(int fd)
+{
+#ifdef USE_UTF8
+    char buf[4];
+    int n;
+    int i;
+    int c;
+
+    if (read(fd, &buf[0], 1) != 1) {
+        return -1;
+    }
+    n = utf8_charlen(buf[0]);
+    if (n < 1 || n > 3) {
+        return -1;
+    }
+    for (i = 1; i < n; i++) {
+        if (read(fd, &buf[i], 1) != 1) {
+            return -1;
+        }
+    }
+    buf[n] = 0;
+    /* decode and return the character */
+    utf8_tounicode(buf, &c);
+    return c;
+#else
+    return fd_read_char(fd, -1);
  #endif
+}
+
+/* Use -ve numbers here to co-exist with normal unicode chars */
+enum {
+    SPECIAL_NONE,
+    SPECIAL_UP = -20,
+    SPECIAL_DOWN = -21,
+    SPECIAL_LEFT = -22,
+    SPECIAL_RIGHT = -23,
+    SPECIAL_DELETE = -24,
+};
+
+/**
+ * If escape (27) was received, reads subsequent
+ * chars to determine if this is a known special key.
+ *
+ * Returns SPECIAL_NONE if unrecognised, or -1 if EOF.
+ *
+ * If no additional char is received within a short time,
+ * 27 is returned.
+ */
+static int check_special(int fd)
+{
+    int c = fd_read_char(fd, 50);
+    int c2;
+
+    if (c < 0) {
+        return 27;
+    }
+
+    c2 = fd_read_char(fd, 50);
+    if (c2 < 0) {
+        return c2;
+    }
+    if (c == '[' || c == 'O') {
+        /* Potential arrow key */
+        switch (c2) {
+            case 'A':
+                return SPECIAL_UP;
+            case 'B':
+                return SPECIAL_DOWN;
+            case 'C':
+                return SPECIAL_RIGHT;
+            case 'D':
+                return SPECIAL_LEFT;
+        }
+    }
+    if (c == '[' && c2 >= '1' && c2 <= '6') {
+        /* extended escape */
+        int c3 = fd_read_char(fd, 50);
+        if (c2 == '3' && c3 == '~') {
+            /* delete char under cursor */
+            return SPECIAL_DELETE;
+        }
+        while (c3 != -1 && c3 != '~') {
+            /* .e.g \e[12~ or '\e[11;2~   discard the complete sequence */
+            c3 = fd_read_char(fd, 50);
+        }
+    }
+
+    return SPECIAL_NONE;
+}
+
+#define ctrl(C) ((C) - '@')
  
  static int linenoisePrompt(const char *prompt, struct current *current) {
-    size_t plen = strlen(prompt);
      int history_index = 0;
  
      /* The latest history entry is always our current buffer, that
@@ -424,11 +620,7 @@ static int linenoisePrompt(const char *prompt, struct current *current) {
      refreshLine(prompt, current);
  
      while(1) {
-        int ext;
-        int c;
-        int c2;
-
-        c = fd_read(current->fd);
+        int c = fd_read(current->fd);
  
  #ifndef NO_COMPLETION
          /* Only autocomplete when the callback is set. It returns < 0 when
@@ -444,7 +636,7 @@ static int linenoisePrompt(const char *prompt, struct current *current) {
  #endif
  
  process_char:
-        if (c < 0) return current->len;
+        if (c == -1) return current->len;
          switch(c) {
          case '\r':    /* enter */
              history_len--;
@@ -475,12 +667,12 @@ process_char:
              /* eat any spaces on the left */
              {
                  int pos = current->pos;
-                while (pos > 0 && current->buf[pos - 1] == ' ') {
+                while (pos > 0 && get_char(current, pos - 1) == ' ') {
                      pos--;
                  }
  
                  /* now eat any non-spaces on the left */
-                while (pos > 0 && current->buf[pos - 1] != ' ') {
+                while (pos > 0 && get_char(current, pos - 1) != ' ') {
                      pos--;
                  }
  
@@ -494,44 +686,87 @@ process_char:
                  /* Display the reverse-i-search prompt and process chars */
                  char rbuf[50];
                  char rprompt[80];
-                int i = 0;
+                int rchars = 0;
+                int rlen = 0;
+                int searchpos = history_len - 1;
+
                  rbuf[0] = 0;
                  while (1) {
+                    int n = 0;
+                    const char *p = NULL;
+                    int skipsame = 0;
+                    int searchdir = -1;
+
                      snprintf(rprompt, sizeof(rprompt), "(reverse-i-search)'%s': ", rbuf);
                      refreshLine(rprompt, current);
                      c = fd_read(current->fd);
                      if (c == ctrl('H') || c == 127) {
-                        if (i > 0) {
-                            rbuf[--i] = 0;
+                        if (rchars) {
+                            int p = utf8_index(rbuf, --rchars);
+                            rbuf[p] = 0;
+                            rlen = strlen(rbuf);
                          }
                          continue;
                      }
-                    if (c >= ' ' && c <= '~') {
-                        if (i < (int)sizeof(rbuf)) {
-                            int j;
-                            const char *p = NULL;
-                            rbuf[i++] = c;
-                            rbuf[i] = 0;
-                            /* Now search back through the history for a match */
-                            for (j = history_len - 1; j > 0; j--) {
-                                p = strstr(history[j], rbuf);
-                                if (p) {
-                                    /* Found a match. Copy it */
-                                    set_current(current,history[j]);
-                                    current->pos = p - history[j];
-                                    break;
-                                }
-                            }
-                            if (!p) {
-                                /* No match, so don't add it */
-                                rbuf[--i] = 0;
+                    if (c == 27) {
+                        c = check_special(current->fd);
+                    }
+                    if (c == ctrl('P') || c == SPECIAL_UP) {
+                        /* Search for the previous (earlier) match */
+                        if (searchpos > 0) {
+                            searchpos--;
+                        }
+                        skipsame = 1;
+                    }
+                    else if (c == ctrl('N') || c == SPECIAL_DOWN) {
+                        /* Search for the next (later) match */
+                        if (searchpos < history_len) {
+                            searchpos++;
+                        }
+                        searchdir = 1;
+                        skipsame = 1;
+                    }
+                    else if (c >= ' ') {
+                        if (rlen >= (int)sizeof(rbuf) + 3) {
+                            continue;
+                        }
+
+                        n = utf8_getchars(rbuf + rlen, c);
+                        rlen += n;
+                        rchars++;
+                        rbuf[rlen] = 0;
+
+                        /* Adding a new char resets the search location */
+                        searchpos = history_len - 1;
+                    }
+                    else {
+                        /* Exit from incremental search mode */
+                        break;
+                    }
+
+                    /* Now search through the history for a match */
+                    for (; searchpos >= 0 && searchpos < history_len; searchpos += searchdir) {
+                        p = strstr(history[searchpos], rbuf);
+                        if (p) {
+                            /* Found a match */
+                            if (skipsame && strcmp(history[searchpos], current->buf) == 0) {
+                                /* But it is identical, so skip it */
+                                continue;
                              }
+                            /* Copy the matching line and set the cursor position */
+                            set_current(current,history[searchpos]);
+                            current->pos = utf8_strlen(history[searchpos], p - history[searchpos]);
+                            break;
                          }
-                        continue;
                      }
-                    break;
+                    if (!p && n) {
+                        /* No match, so don't add it */
+                        rchars--;
+                        rlen -= n;
+                        rbuf[rlen] = 0;
+                    }
                  }
-                if (c == ctrl('G')) {
+                if (c == ctrl('G') || c == ctrl('C')) {
                      /* ctrl-g terminates the search with no effect */
                      set_current(current, "");
                      c = 0;
@@ -546,114 +781,92 @@ process_char:
              }
              break;
          case ctrl('T'):    /* ctrl-t */
-            if (current->pos > 0 && current->pos < current->len) {
-                int aux = current->buf[current->pos-1];
-                current->buf[current->pos-1] = current->buf[current->pos];
-                current->buf[current->pos] = aux;
-                if (current->pos != current->len-1) current->pos++;
+            if (current->pos > 0 && current->pos < current->chars) {
+                c = get_char(current, current->pos);
+                remove_char(current, current->pos);
+                insert_char(current, current->pos - 1, c);
                  refreshLine(prompt, current);
              }
              break;
          case ctrl('V'):    /* ctrl-v */
-            if (has_room(current, 1)) {
+            if (has_room(current, 3)) {
                  /* Insert the ^V first */
                  if (insert_char(current, current->pos, c)) {
                      refreshLine(prompt, current);
                      /* Now wait for the next char. Can insert anything except \0 */
                      c = fd_read(current->fd);
-                    if (c > 0) {
-                        /* Replace the ^V with the actual char */
-                        current->buf[current->pos - 1] = c;
-                    }
-                    else {
-                        remove_char(current, current->pos);
+
+                    /* Remove the ^V first */
+                    remove_char(current, current->pos - 1);
+                    if (c != -1) {
+                        /* Insert the actual char */
+                        insert_char(current, current->pos, c);
                      }
                      refreshLine(prompt, current);
                  }
              }
              break;
          case ctrl('B'):     /* ctrl-b */
-            goto left_arrow;
          case ctrl('F'):     /* ctrl-f */
-            goto right_arrow;
          case ctrl('P'):    /* ctrl-p */
-            c2 = 65;
-            goto up_down_arrow;
          case ctrl('N'):    /* ctrl-n */
-            c2 = 66;
-            goto up_down_arrow;
-            break;
-        case 27:    /* escape sequence */
-            c = fd_read(current->fd);
-            if (c <= 0) {
-                break;
-            }
-            c2 = fd_read(current->fd);
-            if (c <= 0) {
-                break;
+        case 27: {   /* escape sequence */
+            int dir = -1;
+            if (c == 27) {
+                c = check_special(current->fd);
              }
-            ext = (c == 91 || c == 79);
-            if (ext && c2 == 68) {
-left_arrow:
-                /* left arrow */
-                if (current->pos > 0) {
-                    current->pos--;
-                    refreshLine(prompt, current);
-                }
-            } else if (ext && c2 == 67) {
-right_arrow:
-                /* right arrow */
-                if (current->pos < current->len) {
-                    current->pos++;
-                    refreshLine(prompt, current);
-                }
-            } else if (ext && (c2 == 65 || c2 == 66)) {
-up_down_arrow:
-                /* up and down arrow: history */
-                if (history_len > 1) {
-                    /* Update the current history entry before to
-                     * overwrite it with tne next one. */
-                    free(history[history_len-1-history_index]);
-                    history[history_len-1-history_index] = strdup(current->buf);
-                    /* Show the new entry */
-                    history_index += (c2 == 65) ? 1 : -1;
-                    if (history_index < 0) {
-                        history_index = 0;
-                        break;
-                    } else if (history_index >= history_len) {
-                        history_index = history_len-1;
-                        break;
+            switch (c) {
+                case ctrl('B'):
+                case SPECIAL_LEFT:
+                    if (current->pos > 0) {
+                        current->pos--;
+                        refreshLine(prompt, current);
                      }
-                    set_current(current, history[history_len-1-history_index]);
-                    refreshLine(prompt, current);
-                }
-            } else if (c == 91 && c2 > 48 && c2 < 55) {
-                /* extended escape */
-                c = fd_read(current->fd);
-                if (c <= 0) {
                      break;
-                }
-                fd_read(current->fd);
-                if (c2 == 51 && c == 126) {
-                    /* delete char under cursor */
+                case ctrl('F'):
+                case SPECIAL_RIGHT:
+                    if (current->pos < current->chars) {
+                        current->pos++;
+                        refreshLine(prompt, current);
+                    }
+                    break;
+                case ctrl('P'):
+                case SPECIAL_UP:
+                    dir = 1;
+                case ctrl('N'):
+                case SPECIAL_DOWN:
+                    if (history_len > 1) {
+                        /* Update the current history entry before to
+                         * overwrite it with tne next one. */
+                        free(history[history_len-1-history_index]);
+                        history[history_len-1-history_index] = strdup(current->buf);
+                        /* Show the new entry */
+                        history_index += dir;
+                        if (history_index < 0) {
+                            history_index = 0;
+                            break;
+                        } else if (history_index >= history_len) {
+                            history_index = history_len-1;
+                            break;
+                        }
+                        set_current(current, history[history_len-1-history_index]);
+                        refreshLine(prompt, current);
+                    }
+                    break;
+
+                case SPECIAL_DELETE:
                      if (remove_char(current, current->pos)) {
                          refreshLine(prompt, current);
                      }
-                }
+                    break;
+            }
              }
              break;
          default:
-            /* Note that the only control character currently permitted is tab */
-            if (c == '\t' || c < 0 || c >= ' ') {
+            /* Only tab is allowed without ^V */
+            if (c == '\t' || c >= ' ') {
                  if (insert_char(current, current->pos, c)) {
-                    /* Avoid a full update of the line in the trivial case. */
-                    if (current->pos == current->len && c >= ' ' && plen + current->len < current->cols) {
-                        char ch = c;
-                        write(current->fd, &ch, 1);
-                    }
-                    else {
-                        refreshLine(prompt, current);
-                    }
+                    refreshLine(prompt, current);
                  }
              }
              break;
@@ -663,7 +876,7 @@ up_down_arrow:
              }
              break;
          case ctrl('K'): /* Ctrl+k, delete from current to end of line. */
-            if (remove_chars(current, current->pos, current->len - current->pos)) {
+            if (remove_chars(current, current->pos, current->chars - current->pos)) {
                  refreshLine(prompt, current);
              }
              break;
@@ -672,7 +885,7 @@ up_down_arrow:
              refreshLine(prompt, current);
              break;
          case ctrl('E'): /* ctrl+e, go to the end of the line */
-            current->pos = current->len;
+            current->pos = current->chars;
              refreshLine(prompt, current);
              break;
          case ctrl('L'): /* Ctrl+L, clear screen */
@@ -709,11 +922,13 @@ static int linenoiseRaw(char *buf, size_t buflen, const char *prompt) {
          current.buf = buf;
          current.bufmax = buflen;
          current.len = 0;
+        current.chars = 0;
          current.pos = 0;
-        current.cols = getColumns();
+        current.cols = 0;
  
          count = linenoisePrompt(prompt, &current);
          disableRawMode(fd);
+
          printf("\n");
      }
      return count;
diff --git a/utf8.c b/utf8.c

new file mode 100644 (file)

index 0000000..26924b4
--- /dev/null
+++ b/utf8.c
@@ -0,0 +1,115 @@
+/**
+ * UTF-8 utility functions
+ *
+ * (c) 2010 Steve Bennett <steveb@workware.net.au>
+ *
+ * See LICENCE for licence details.
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utf8.h"
+
+#ifdef USE_UTF8
+int utf8_fromunicode(char *p, unsigned short uc)
+{
+    if (uc <= 0x7f) {
+        *p = uc;
+        return 1;
+    }
+    else if (uc <= 0x7ff) {
+        *p++ = 0xc0 | ((uc & 0x7c0) >> 6);
+        *p = 0x80 | (uc & 0x3f);
+        return 2;
+    }
+    else {
+        *p++ = 0xe0 | ((uc & 0xf000) >> 12);
+        *p++ = 0x80 | ((uc & 0xfc0) >> 6);
+        *p = 0x80 | (uc & 0x3f);
+        return 3;
+    }
+}
+
+int utf8_charlen(int c)
+{
+    if ((c & 0x80) == 0) {
+        return 1;
+    }
+    if ((c & 0xe0) == 0xc0) {
+        return 2;
+    }
+    if ((c & 0xf0) == 0xe0) {
+        return 3;
+    }
+    if ((c & 0xf8) == 0xf0) {
+        return 4;
+    }
+    /* Invalid sequence */
+    return -1;
+}
+
+int utf8_strlen(const char *str, int bytelen)
+{
+    int charlen = 0;
+    if (bytelen < 0) {
+        bytelen = strlen(str);
+    }
+    while (bytelen) {
+        int c;
+        int l = utf8_tounicode(str, &c);
+        charlen++;
+        str += l;
+        bytelen -= l;
+    }
+    return charlen;
+}
+
+int utf8_index(const char *str, int index)
+{
+    const char *s = str;
+    while (index--) {
+        int c;
+        s += utf8_tounicode(s, &c);
+    }
+    return s - str;
+}
+
+int utf8_charequal(const char *s1, const char *s2)
+{
+    int c1, c2;
+
+    utf8_tounicode(s1, &c1);
+    utf8_tounicode(s2, &c2);
+
+    return c1 == c2;
+}
+
+int utf8_tounicode(const char *str, int *uc)
+{
+    unsigned const char *s = (unsigned const char *)str;
+
+    if (s[0] < 0xc0) {
+        *uc = s[0];
+        return 1;
+    }
+    if (s[0] < 0xe0) {
+        if ((s[1] & 0xc0) == 0x80) {
+            *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
+            return 2;
+        }
+    }
+    else if (s[0] < 0xf0) {
+        if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
+            *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
+            return 3;
+        }
+    }
+
+    /* Invalid sequence, so just return the byte */
+    *uc = *s;
+    return 1;
+}
+
+#endif
diff --git a/utf8.h b/utf8.h

new file mode 100644 (file)

index 0000000..564d64e
--- /dev/null
+++ b/utf8.h
@@ -0,0 +1,79 @@
+#ifndef UTF8_UTIL_H
+#define UTF8_UTIL_H
+/**
+ * UTF-8 utility functions
+ *
+ * (c) 2010 Steve Bennett <steveb@workware.net.au>
+ *
+ * See LICENCE for licence details.
+ */
+
+#ifndef USE_UTF8
+#include <ctype.h>
+
+/* No utf-8 support. 1 byte = 1 char */
+#define utf8_strlen(S, B) (B) < 0 ? (int)strlen(S) : (B)
+#define utf8_tounicode(S, CP) (*(CP) = *(S), 1)
+#define utf8_index(C, I) (I)
+#define utf8_charlen(C) 1
+
+#else
+/**
+ * Converts the given unicode codepoint (0 - 0xffff) to utf-8
+ * and stores the result at 'p'.
+ * 
+ * Returns the number of utf-8 characters (1-3).
+ */
+int utf8_fromunicode(char *p, unsigned short uc);
+
+/**
+ * Returns the length of the utf-8 sequence starting with 'c'.
+ * 
+ * Returns 1-4, or -1 if this is not a valid start byte.
+ *
+ * Note that charlen=4 is not supported by the rest of the API.
+ */
+int utf8_charlen(int c);
+
+/**
+ * Returns the number of characters in the utf-8 
+ * string of the given byte length.
+ *
+ * Any bytes which are not part of an valid utf-8
+ * sequence are treated as individual characters.
+ *
+ * The string *must* be null terminated.
+ *
+ * Does not support unicode code points > \uffff
+ */
+int utf8_strlen(const char *str, int bytelen);
+
+/**
+ * Returns the byte index of the given character in the utf-8 string.
+ * 
+ * The string *must* be null terminated.
+ *
+ * This will return the byte length of a utf-8 string
+ * if given the char length.
+ */
+int utf8_index(const char *str, int charindex);
+
+/**
+ * Returns the unicode codepoint corresponding to the
+ * utf-8 sequence 'str'.
+ * 
+ * Stores the result in *uc and returns the number of bytes
+ * consumed.
+ *
+ * If 'str' is null terminated, then an invalid utf-8 sequence
+ * at the end of the string will be returned as individual bytes.
+ *
+ * If it is not null terminated, the length *must* be checked first.
+ *
+ * Does not support unicode code points > \uffff
+ */
+int utf8_tounicode(const char *str, int *uc);
+
+#endif
+
+#endif
author	Steve Bennett <steveb@workware.net.au>
	Fri, 8 Oct 2010 12:55:20 +0000 (22:55 +1000)
committer	Steve Bennett <steveb@workware.net.au>
	Fri, 8 Apr 2011 01:48:05 +0000 (11:48 +1000)
Makefile		patch \| blob \| history
linenoise.c		patch \| blob \| history
utf8.c	[new file with mode: 0644]	patch \| blob
utf8.h	[new file with mode: 0644]	patch \| blob