11 "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)"
12 "://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
27 u = emalloc(sizeof(URLwin));
30 u->url = estrdup(url);
33 b = emalloc(sizeof(Bytes));
34 while((n = read(fd, buf, sizeof buf)) > 0)
37 return nil; /* empty file */
44 runetobyte(Rune *r, int n)
50 s = smprint("%.*S", n, r);
52 error("malloc failed");
59 return strchr(".,:;'\")]}>!?", c) != nil;
63 emitword(Bytes *b, Rune *r, int nr)
70 s = smprint("%.*S", nr, r);
71 space = b->n > 0 && !isspace(b->b[b->n-1]) && !closingpunct(*s);
72 if(col > 0 && col+space+nr > width){
73 growbytes(b, "\n", 1);
81 growbytes(b, s, strlen(s));
88 renderrunes(Bytes *b, Rune *r)
97 emitword(b, r+wordi, i-wordi);
100 break; /* don't start with blank lines */
101 if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
102 growbytes(b, "\n", 1);
106 emitword(b, r+wordi, i-wordi);
116 emitword(b, r+wordi, i-wordi);
120 renderbytes(Bytes *b, char *fmt, ...)
126 r = runevsmprint(fmt, arg);
141 urlprog = regcomp(urlexpr);
143 error("can't compile URL regexp");
145 memset(rs, 0, sizeof rs);
146 if(regexec(urlprog, url, rs, nelem(rs)) == 0)
149 slash = strrchr(base, '/');
150 if(slash!=nil && slash>=&base[rs[0].ep-rs[0].sp])
153 base[rs[0].ep-rs[0].sp] = '\0';
158 fullurl(URLwin *u, Rune *rhref)
160 char *base, *href, *hrefbase;
164 return estrdup("NULL URL");
165 href = runetobyte(rhref, runestrlen(rhref));
166 hrefbase = baseurl(href);
168 if(hrefbase==nil && (base = baseurl(u->url))!=nil){
169 result = estrdup(base);
170 if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
171 result = eappend(result, "/", "");
176 result = eappend(result, "", href);
178 result = estrdup(href);
182 return estrdup("***unknown***");
187 render(URLwin *u, Bytes *t, Item *items, int curanchor)
204 for(il=items; il!=nil; il=il->next){
205 if(il->state & IFbrk)
206 renderbytes(t, "\n");
207 if(il->state & IFbrksp)
208 renderbytes(t, "\n");
213 if(it->state & IFwrap)
214 renderrunes(t, it->s);
216 emitword(t, it->s, runestrlen(it->s));
219 if(t->n>0 && t->b[t->n-1]!='\n')
220 renderbytes(t, "\n");
221 renderbytes(t, "=======\n");
228 href = fullurl(u, im->imsrc);
229 renderbytes(t, "[image %s]", href);
235 renderbytes(t, "[formfield]");
240 for(cell=tab->cells; cell!=nil; cell=cell->next){
241 render(u, t, cell->content, curanchor);
243 if(t->n>0 && t->b[t->n-1]!='\n')
244 renderbytes(t, "\n");
248 render(u, t, ifl->item, curanchor);
252 if(is->spkind != ISPnull)
256 error("unknown item tag %d\n", il->tag);
258 if(il->anchorid != 0 && il->anchorid!=curanchor){
259 for(a=u->docinfo->anchors; a!=nil; a=a->next)
260 if(aflag && a->index == il->anchorid){
261 href = fullurl(u, a->href);
262 renderbytes(t, "[%s]", href);
266 curanchor = il->anchorid;
269 if(t->n>0 && t->b[t->n-1]!='\n')
270 renderbytes(t, "\n");
278 t = emalloc(sizeof(Bytes));
280 render(u, t, u->items, 0);
283 write(u->outfd, (char*)t->b, t->n);
289 rendertext(URLwin *u, Bytes *b)
293 rurl = toStr((uchar*)u->url, strlen(u->url), UTF_8);
294 u->items = parsehtml(b->b, b->n, rurl, u->type, UTF_8, &u->docinfo);
302 freeurlwin(URLwin *u)
306 freedocinfo(u->docinfo);