17 the our_* routines are implementations for the corresponding library
18 routines. for a while, i tried to actually name them wctomb etc
19 but stopped that after i found a system which made wchar_t an
23 int our_wctomb(char *s, unsigned long wc);
24 int our_mbtowc(unsigned long *p, char *s, unsigned n);
25 int runetoisoutf(char *str, Rune *rune);
26 int fullisorune(char *str, int n);
27 int isochartorune(Rune *rune, char *str);
30 utf_in(int fd, long *notused, struct convert *out)
38 while((n = read(fd, buf+tot, N-tot)) >= 0){
40 for(i=j=0; i<=tot-UTFmax || (i<tot && (n==0 || fullrune(buf+i, tot-i))); ){
41 c = our_mbtowc(&l, buf+i, tot-i);
44 EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
60 memmove(buf, buf+i, tot);
68 utf_out(Rune *base, int n, long *notused)
75 for(r = base, p = obuf; n-- > 0; r++){
76 p += our_wctomb(p, *r);
79 write(1, obuf, p-obuf);
83 isoutf_in(int fd, long *notused, struct convert *out)
90 while((n = read(fd, buf+tot, N-tot)) >= 0){
93 if(!fullisorune(buf+i, tot-i))
95 c = isochartorune(&runes[j], buf+i);
96 if(runes[j] == Runeerror && c == 1){
98 EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
112 memmove(buf, buf+i, tot);
120 isoutf_out(Rune *base, int n, long *notused)
127 for(r = base, p = obuf; n-- > 0; r++)
128 p += runetoisoutf(p, r);
130 write(1, obuf, p-obuf);
136 Char1 = Runeself, Rune1 = Runeself,
137 Char21 = 0xA1, Rune21 = 0x0100,
138 Char22 = 0xF6, Rune22 = 0x4016,
139 Char3 = 0xFC, Rune3 = 0x10000, /* really 0x38E2E */
140 Esc = 0xBE, Bad = Runeerror
152 for(i=0; i<256; i++) {
153 u = i + (0x5E - 0xA0);
155 u = i + (0xDF - 0x7F);
157 u = i + (0x00 - 0x21);
159 u = i + (0xBE - 0x00);
166 isochartorune(Rune *rune, char *str)
175 * one character sequence
176 * 00000-0009F => 00-9F
185 * two character sequence
186 * 000A0-000FF => A0; A0-FF
188 c1 = *(uchar*)(str+1);
190 if(c1 >= Rune1 && c1 < Rune21) {
198 * two character sequence
199 * 00100-04015 => A1-F5; 21-7E/A0-FF
205 *rune = (c-Char21)*Esc + c1 + Rune21;
210 * three character sequence
211 * 04016-38E2D => A6-FB; 21-7E/A0-FF
213 c2 = U[*(uchar*)(str+2)];
217 l = (c-Char22)*Esc*Esc + c1*Esc + c2 + Rune22;
233 runetoisoutf(char *str, Rune *rune)
241 * one character sequence
242 * 00000-0009F => 00-9F
251 * two character sequence
252 * 000A0-000FF => A0; A0-FF
261 * two character sequence
262 * 00100-04015 => A1-F5; 21-7E/A0-FF
266 str[0] = c/Esc + Char21;
272 * three character sequence
273 * 04016-38E2D => A6-FB; 21-7E/A0-FF
276 str[0] = c/(Esc*Esc) + Char22;
277 str[1] = T[c/Esc%Esc];
283 fullisorune(char *str, int n)
292 if(c < Char22 || n > 2)
328 Wchar1 = (1UL<<Bit1)-1,
329 Wchar2 = (1UL<<(Bit2+Bitx))-1,
330 Wchar3 = (1UL<<(Bit3+2*Bitx))-1,
331 Wchar4 = (1UL<<(Bit4+3*Bitx))-1,
332 Wchar5 = (1UL<<(Bit5+4*Bitx))-1,
340 our_wctomb(char *s, unsigned long wc)
343 return 0; /* no shift states */
348 s[0] = T6 | ((wc >> 5*Bitx) & Mask6);
349 s[1] = Tx | ((wc >> 4*Bitx) & Maskx);
350 s[2] = Tx | ((wc >> 3*Bitx) & Maskx);
351 s[3] = Tx | ((wc >> 2*Bitx) & Maskx);
352 s[4] = Tx | ((wc >> 1*Bitx) & Maskx);
353 s[5] = Tx | (wc & Maskx);
357 s[0] = T5 | (wc >> 4*Bitx);
358 s[1] = Tx | ((wc >> 3*Bitx) & Maskx);
359 s[2] = Tx | ((wc >> 2*Bitx) & Maskx);
360 s[3] = Tx | ((wc >> 1*Bitx) & Maskx);
361 s[4] = Tx | (wc & Maskx);
366 s[0] = T4 | (wc >> 3*Bitx);
367 s[1] = Tx | ((wc >> 2*Bitx) & Maskx);
368 s[2] = Tx | ((wc >> 1*Bitx) & Maskx);
369 s[3] = Tx | (wc & Maskx);
373 s[0] = T3 | (wc >> 2*Bitx);
374 s[1] = Tx | ((wc >> 1*Bitx) & Maskx);
375 s[2] = Tx | (wc & Maskx);
380 s[0] = T2 | (wc >> 1*Bitx);
381 s[1] = Tx | (wc & Maskx);
390 our_mbtowc(unsigned long *p, char *s, unsigned n)
393 int c0, c1, c2, c3, c4, c5;
397 return 0; /* no shift states */
424 wc = ((((((((((c0 & Mask6) << Bitx) |
425 c1) << Bitx) | c2) << Bitx) |
426 c3) << Bitx) | c4) << Bitx) | c5;
433 wc = ((((((((c0 & Mask5) << Bitx) |
434 c1) << Bitx) | c2) << Bitx) |
448 wc = ((((((c0 & Mask4) << Bitx) |
449 c1) << Bitx) | c2) << Bitx) |
457 wc = ((((c0 & Mask3) << Bitx) |
471 wc = ((c0 & Mask2) << Bitx) |