]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/ip/httpd/hints.c
etheriwl: don't break controller on command flush timeout
[plan9front.git] / sys / src / cmd / ip / httpd / hints.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "httpd.h"
5 #include "httpsrv.h"
6
7 enum{ URLmax = 65536, HINTmax = 20 };
8 #define RECIPLOG2 1.44269504089
9
10 char **urlname;                         /* array of url strings    1,...,nurl */
11 static int nurl;
12 static uint urltab[URLmax];             /* hashstr(url)  1,...,nurl */
13 static int urlnext[URLmax];             /* index urltab of next url in chain */
14 static int urlhash[URLmax];             /* initially 0, meaning empty buckets */
15
16 typedef struct Hint {
17         ushort url;
18         uchar prob;
19 } Hint;
20 Hint *hints[URLmax];
21 uchar nhint[URLmax];
22
23 vlong
24 Bfilelen(void *vb)
25 {
26         Biobuf *b;
27         vlong n;
28
29         b = vb;
30         n = Bseek(b, 0L, 2);
31         Bseek(b, 0L, 0);
32         return n;
33 }
34
35 static uint 
36 hashstr(char* key)
37 {
38         /* asu works better than pjw for urls */
39         uchar *k = (unsigned char*)key;
40         uint h = 0;
41         while(*k!=0)
42                 h = 65599*h + *k++;
43         return h;
44 }
45
46 static int
47 urllookup(uint url)
48 {
49         /* returns +index into urltab, else -hash */
50         int j, hash;
51
52         hash = 1 + url%(URLmax-1);
53         j = urlhash[hash];
54         for(;;){
55                 if(j==0)
56                         return -hash;
57                 if(url==urltab[j])
58                         return j;
59                 j = urlnext[j];
60         }
61 }
62
63 int
64 Bage(Biobuf *b)
65 {
66         Dir *dir;
67         long mtime;
68
69         dir = dirfstat(Bfildes(b));
70         if(dir != nil)
71                 mtime = dir->mtime;
72         else
73                 mtime = 0;
74         free(dir);
75         return time(nil) - mtime;
76 }
77
78 void
79 urlinit(void)
80 {
81         static Biobuf *b = nil;
82         static vlong filelen = 0;
83         vlong newlen;
84         char *s, *arena;
85         int i, j, n;
86         uint url;
87         char *file;
88
89         if(filelen < 0)
90                 return;
91         file = "/sys/log/httpd/url";
92         if(b == nil){
93                 b = Bopen(file, OREAD); /* first time */
94                 if(b == nil){
95                         syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
96                         filelen = -1;
97                         return;
98                 }
99         }
100         newlen = Bfilelen(b); /* side effect: rewinds b */
101         if(newlen == filelen || Bage(b)<300)
102                 return;
103         filelen = newlen;
104         if(filelen < 0)
105                 return;
106         if(nurl){ /* free existing tables */
107                 free(urlname[0]); /* arena */
108                 memset(urlhash,0,sizeof urlhash);
109                 memset(urlnext,0,sizeof urlnext);
110                 nurl = 0;
111         }
112         if(urlname==nil)
113                 urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
114         arena = (char*)ezalloc(filelen);  /* enough for all the strcpy below */
115         i = 1;
116         while((s=Brdline(b,'\n'))!=0){
117                 /* read lines of the form:  999 /url/path */
118                 n = Blinelen(b) - 1;
119                 if(n>2 && s[n]=='\n'){
120                         s[n] = '\0';
121                 }else{
122                         sysfatal("missing fields or newline in url-db");
123                 }
124                 j = strtoul(s,&s,10);
125                 while(*s==' ')
126                         s++;
127                 if(i++!=j)
128                         sysfatal("url-db synchronization error");
129                 url = hashstr(s);
130                 j = urllookup(url);
131                 if(j>=0)
132                         sysfatal("duplicate url");
133                 j = -j;
134                 nurl++;
135                 if(nurl>=URLmax){
136                         syslog(0, HTTPLOG, "urlinit overflow at %s",s);
137                         free(urlname[0]); /* arena */
138                         memset(urlhash,0,sizeof urlhash);
139                         memset(urlnext,0,sizeof urlnext);
140                         nurl = 0;
141                         return;
142                 }
143                 urltab[nurl] = url;
144                 urlnext[nurl] = urlhash[j];
145                 urlhash[j] = nurl;
146                 strcpy(arena,s);
147                 urlname[nurl] = arena;
148                 arena += strlen(s)+1;
149         }
150         syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
151         /* b is held open, because namespace will be chopped */
152 }
153
154 void
155 statsinit(void)
156 {
157         static Biobuf *b = nil;
158         static vlong filelen = 0;
159         vlong newlen;
160         int iq, n, i, nstats = 0;
161         uchar *s, buf[3+HINTmax*3];  /* iq, n, (url,prob)... */
162         Hint *arena, *h;
163         char *file;
164         static void *oldarena = nil;
165
166         file = "/sys/log/httpd/pathstat";
167         if(b == nil){
168                 if(filelen == -1)
169                         return; /* if failed first time */
170                 b = Bopen(file, OREAD); /* first time */
171                 if(b == nil){
172                         syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
173                         filelen = -1;
174                         return;
175                 }
176         }
177         newlen = Bfilelen(b); /* side effect: rewinds b */
178         if(newlen == filelen || Bage(b)<300)
179                 return;
180         filelen = newlen;
181         if(oldarena){
182                 free(oldarena);
183                 memset(nhint,0,sizeof nhint);
184         }
185         arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
186         oldarena = arena;
187         for(;;){
188                 i = Bread(b,buf,3);
189                 if(i<3)
190                         break;
191                 nstats++;
192                 iq = buf[0];
193                 iq = (iq<<8) | buf[1];
194                 n = buf[2];
195                 h = arena;
196                 arena += n;
197                 hints[iq] = h;
198                 nhint[iq] = n;
199                 if(Bread(b,buf,3*n)!=3*n)
200                         sysfatal("stats read error");
201                 for(i=0; i<n; i++){
202                         s = &buf[3*i];
203                         h[i].url = (s[0]<<8) | s[1];
204                         h[i].prob = s[2];
205                 }
206         }
207         syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
208 }
209
210 void
211 urlcanon(char *url)
212 {
213         /* all the changes here can be implemented by rewriting in-place */
214         char *p, *q;
215
216         /* remove extraneous '/' in the middle and at the end */
217         p = url+1;  /* first char needs no change */
218         q = p;
219         while(q[0]){
220                 if(q[0]=='/' && q[-1]=='/'){
221                         q++;
222                         continue;
223                 }
224                 *p++ = *q++;
225         }
226         if(q[-1]=='/'){  /* trailing '/' */
227                 p[-1] = '\0';
228         }else{
229                 p[0] = '\0';
230         }
231
232         /* specific to the cm.bell-labs.com web site */
233         if(strncmp(url,"/cm/",4)==0){
234                 if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
235                         /* strip off /cm/cs */
236                         memmove(url,url+6,strlen(url+6)+1);
237                 else if(strncmp(url+4,"ms/what/wavelet",15)==0)
238                         /* /cm/ms/what */
239                         memmove(url,url+11,strlen(url+11)+1);
240         }
241 }
242
243 void
244 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
245 {
246         int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
247         char *query, *sf, etag[32], *wurl;
248         Dir *dir;
249         Hint *h, *haveh;
250
251         query = hstrdup(hc, uri);
252         urlcanon(query);
253         j = urllookup(hashstr(query));
254         if(j < 0)
255                 return;
256         query = strrchr(uri,'/');
257         if(!query)
258                 return;  /* can't happen */
259         prefix = query-uri+1;  /* = strlen(dirname)+1 */
260         h = hints[j];
261         for(i=0; i<nhint[j]; i++){
262                 if(havej > 0 && havej < URLmax){ /* exclude hints client has */
263                         haveh = hints[havej];
264                         for(havei=0; havei<nhint[havej]; havei++)
265                                 if( haveh[havei].url == h[i].url)
266                                         goto continuei;
267                 }
268                 sf = urlname[h[i].url];
269                 pr = h[i].prob;
270                 if(pr<thresh)
271                         break;
272                 n = strlen(webroot) + strlen(sf) + 1;
273                 wurl = halloc(hc, n);
274                 strcpy(wurl, webroot);
275                 strcat(wurl, sf);
276                 fd = open(wurl, OREAD);
277                 if(fd<0)
278                         continue;
279                 dir = dirfstat(fd);
280                 if(dir == nil){
281                         close(fd);
282                         continue;
283                 }
284                 close(fd);
285                 snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
286                 siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
287                 free(dir);
288                 if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
289                         sf = sf+prefix;
290                 hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
291                 newhint++;
292 continuei: ;
293         }
294         if(newhint)
295                 hprint(hout, "Fresh: have/%d\r\n", j);
296 }
297