]> git.lizzy.rs Git - plan9front.git/commitdiff
cpp: handle 4 byte utf sequences (21-bit runes)
authorcinap_lenrek <cinap_lenrek@felloff.net>
Thu, 24 Sep 2015 10:23:17 +0000 (12:23 +0200)
committercinap_lenrek <cinap_lenrek@felloff.net>
Thu, 24 Sep 2015 10:23:17 +0000 (12:23 +0200)
sys/src/cmd/cpp/lex.c

index e90423e93251496a90657027eb7984d8fb54c15e..226097b358f67ff379255c6b532f0c7ff8307b81 100644 (file)
@@ -29,6 +29,7 @@
 
 #define        UTF2(c)         ((c)>=0xA0 && (c)<0xE0)         /* 2-char UTF seq */
 #define        UTF3(c)         ((c)>=0xE0 && (c)<0xF0)         /* 3-char UTF seq */
+#define        UTF4(c)         ((c)>=0xF0 && (c)<0xF8)         /* 4-char UTF seq */
 
 /* character classes */
 #define        C_WS    1
@@ -259,7 +260,7 @@ expandlex(void)
                        case C_ALPH:
                                for (j=0; j<=256; j++)
                                        if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
-                                         || UTF2(j) || UTF3(j) || j=='_')
+                                         || UTF2(j) || UTF3(j) || UTF4(j) || j=='_')
                                                bigfsm[j][fp->state] = nstate;
                                continue;
                        case C_NUM:
@@ -274,7 +275,7 @@ expandlex(void)
        /* install special cases for ? (trigraphs),  \ (splicing), runes */
        for (i=0; i<MAXSTATE; i++) {
                for (j=0; j<0xFF; j++)
-                       if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) {
+                       if (j=='?' || j=='\\' || UTF2(j) || UTF3(j) || UTF4(j)) {
                                if (bigfsm[j][i]>0)
                                        bigfsm[j][i] = ~bigfsm[j][i];
                                bigfsm[j][i] &= ~QBSBIT;
@@ -393,6 +394,10 @@ gettokens(Tokenrow *trp, int reset)
                                        runelen = 3;
                                        goto reswitch;
                                }
+                               if (UTF4(c)) {
+                                       runelen = 4;
+                                       goto reswitch;
+                               }
                                error(WARNING, "Lexical botch in cpp");
                                ip += runelen;
                                runelen = 1;