7 Rune* whitespace = L" \t\n\r";
8 Rune* notwhitespace = L"^ \t\n\r";
10 // All lists start out like List structure.
11 // List itself can be used as list of int.
26 _newlist(int val, List* rest)
30 ans = (List*)emalloc(sizeof(List));
36 // Reverse a list in place
53 // The next few routines take a "character class" as argument.
54 // e.g., "a-zA-Z", or "^ \t\n"
55 // (ranges indicated by - except in first position;
56 // ^ is first position means "not in" the following class)
58 // Splitl splits s[0:n] just before first character of class cl.
59 // Answers go in (p1, n1) and (p2, n2).
60 // If no split, the whole thing goes in the first component.
61 // Note: answers contain pointers into original string.
63 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
67 p = _Strnclass(s, cl, n);
81 // Splitr splits s[0:n] just after last character of class cl.
82 // Answers go in (p1, n1) and (p2, n2).
83 // If no split, the whole thing goes in the last component.
84 // Note: answers contain pointers into original string.
86 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
90 p = _Strnrclass(s, cl, n);
105 // Splitall splits s[0:n] into parts that are separated by characters from class cl.
106 // Each part will have nonzero length.
107 // At most alen parts are found, and pointers to their starts go into
108 // the strarr array, while their lengths go into the lenarr array.
109 // The return value is the number of parts found.
111 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
118 if(s == nil || n == 0)
123 while(p < slast && i < alen) {
124 while(p < slast && _inclass(*p, cl))
128 q = _Strnclass(p, cl, slast-p);
131 assert(q > p && q <= slast);
140 // Find part of s that excludes leading and trailing whitespace,
141 // and return that part in *pans (and its length in *panslen).
143 _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
150 p = _Strnclass(s, notwhitespace, n);
152 q = _Strnrclass(s, notwhitespace, n);
161 // _Strclass returns a pointer to the first element of s that is
162 // a member of class cl, nil if none.
164 _Strclass(Rune* s, Rune* cl)
168 for(p = s; *p != 0; p++)
174 // _Strnclass returns a pointer to the first element of s[0:n] that is
175 // a member of class cl, nil if none.
177 _Strnclass(Rune* s, Rune* cl, int n)
181 for(p = s; n-- && *p != 0; p++)
187 // _Strrclass returns a pointer to the last element of s that is
188 // a member of class cl, nil if none
190 _Strrclass(Rune* s, Rune* cl)
194 if(s == nil || *s == 0)
196 p = s + runestrlen(s) - 1;
205 // _Strnrclass returns a pointer to the last element of s[0:n] that is
206 // a member of class cl, nil if none
208 _Strnrclass(Rune* s, Rune* cl, int n)
212 if(s == nil || *s == 0 || n == 0)
223 // Is c in the class cl?
225 _inclass(Rune c, Rune* cl)
242 for(i = 0; i < n; i++) {
243 if(cl[i] == '-' && i > 0 && i < n - 1) {
244 if(c >= cl[i - 1] && c <= cl[i + 1]) {
250 else if(c == cl[i]) {
260 // Is pre a prefix of s?
262 _prefix(Rune* pre, Rune* s)
272 for(k = 0; k < n; k++) {
279 // Number of runes in (null-terminated) s
285 return runestrlen(s);
288 // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
290 _Strcmp(Rune *s1, Rune *s2)
293 return (s2 == nil || *s2 == 0) ? 0 : -1;
295 return (*s1 == 0) ? 0 : 1;
296 return runestrcmp(s1, s2);
299 // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
300 // Also, do a case-insensitive match, assuming s2
301 // has no chars in [A-Z], only their lowercase versions.
302 // (This routine is used for in-place keyword lookup, where s2 is in a keyword
303 // list and s1 is some substring, possibly mixed-case, in a buffer.)
305 _Strncmpci(Rune *s1, int n1, Rune *s2)
317 if(c1 >= 'A' && c1 <= 'Z')
334 ans = _Strndup(s, runestrlen(s));
335 setmalloctag(ans, getcallerpc(&s));
339 // emalloc and copy n chars of s (assume s is at least that long),
340 // and add 0 terminator.
341 // Return nil if n==0.
343 _Strndup(Rune* s, int n)
350 memmove(ans, s, n*sizeof(Rune));
352 setmalloctag(ans, getcallerpc(&s));
355 // emalloc enough room for n Runes, plus 1 null terminator.
356 // (Not initialized to anything.)
362 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
363 setmalloctag(ans, getcallerpc(&n));
367 // emalloc and copy s+t
369 _Strdup2(Rune* s, Rune* t)
379 ans = _newstr(ns+nt);
380 p = _Stradd(ans, s, ns);
381 p = _Stradd(p, t, nt);
383 setmalloctag(ans, getcallerpc(&s));
387 // Return emalloc'd substring s[start:stop],
389 _Strsubstr(Rune* s, int start, int stop)
395 t = _Strndup(s+start, stop-start);
396 setmalloctag(t, getcallerpc(&s));
400 // Copy n chars to s1 from s2, and return s1+n
402 _Stradd(Rune* s1, Rune* s2, int n)
406 memmove(s1, s2, n*sizeof(Rune));
410 // Like strtol, but converting from Rune* string
412 #define LONG_MAX 2147483647L
413 #define LONG_MIN -2147483648L
416 _Strtol(Rune* nptr, Rune** endptr, int base)
420 int c, ovfl, v, neg, ndig;
447 if(*p=='-' || *p=='+')
459 if(p[1]=='x' || p[1]=='X'){
464 }else if(base==16 && *p=='0'){
465 if(p[1]=='x' || p[1]=='X')
467 }else if(base<0 || 36<base)
471 * Non-empty sequence of digits
478 else if('a'<=c && c<='z')
480 else if('A'<=c && c<='Z')
505 // Convert buf[0:n], bytes whose character set is chset,
506 // into a emalloc'd null-terminated Unicode string.
508 toStr(uchar* buf, int n, int chset)
518 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
519 for(i = 0; i < n; i++)
526 for(i = 0; i < n; ) {
527 i += chartorune(&ch, (char*)(buf+i));
530 ans = (Rune*)emalloc((m+1)*sizeof(Rune));
532 for(i = 0; i < n; ) {
533 i += chartorune(&ch, (char*)(buf+i));
543 setmalloctag(ans, getcallerpc(&buf));
547 // Convert buf[0:n], Unicode characters,
548 // into an emalloc'd null-terminated string in character set chset.
549 // Use Runeerror for unconvertable characters.
551 fromStr(Rune* buf, int n, int chset)
563 ans = (uchar*)emalloc(n+1);
564 lim = (chset==US_Ascii)? 127 : 255;
565 for(i = 0; i < n; i++) {
576 for(i = 0; i < n; i++) {
577 m += runetochar((char*)s, &buf[i]);
579 ans = (uchar*)emalloc(m+1);
581 for(i = 0; i < n; i++)
582 p += runetochar((char*)p, &buf[i]);
589 setmalloctag(ans, getcallerpc(&buf));