7 enum{ URLmax = 65536, HINTmax = 20 };
8 #define RECIPLOG2 1.44269504089
10 char **urlname; /* array of url strings 1,...,nurl */
12 static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */
13 static int urlnext[URLmax]; /* index urltab of next url in chain */
14 static int urlhash[URLmax]; /* initially 0, meaning empty buckets */
38 /* asu works better than pjw for urls */
39 uchar *k = (unsigned char*)key;
49 /* returns +index into urltab, else -hash */
52 hash = 1 + url%(URLmax-1);
69 dir = dirfstat(Bfildes(b));
75 return time(nil) - mtime;
81 static Biobuf *b = nil;
82 static vlong filelen = 0;
91 file = "/sys/log/httpd/url";
93 b = Bopen(file, OREAD); /* first time */
95 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
100 newlen = Bfilelen(b); /* side effect: rewinds b */
101 if(newlen == filelen || Bage(b)<300)
106 if(nurl){ /* free existing tables */
107 free(urlname[0]); /* arena */
108 memset(urlhash,0,sizeof urlhash);
109 memset(urlnext,0,sizeof urlnext);
113 urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
114 arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */
116 while((s=Brdline(b,'\n'))!=0){
117 /* read lines of the form: 999 /url/path */
119 if(n>2 && s[n]=='\n'){
122 sysfatal("missing fields or newline in url-db");
124 j = strtoul(s,&s,10);
128 sysfatal("url-db synchronization error");
132 sysfatal("duplicate url");
136 syslog(0, HTTPLOG, "urlinit overflow at %s",s);
137 free(urlname[0]); /* arena */
138 memset(urlhash,0,sizeof urlhash);
139 memset(urlnext,0,sizeof urlnext);
144 urlnext[nurl] = urlhash[j];
147 urlname[nurl] = arena;
148 arena += strlen(s)+1;
150 syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
151 /* b is held open, because namespace will be chopped */
157 static Biobuf *b = nil;
158 static vlong filelen = 0;
160 int iq, n, i, nstats = 0;
161 uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */
164 static void *oldarena = nil;
166 file = "/sys/log/httpd/pathstat";
169 return; /* if failed first time */
170 b = Bopen(file, OREAD); /* first time */
172 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
177 newlen = Bfilelen(b); /* side effect: rewinds b */
178 if(newlen == filelen || Bage(b)<300)
183 memset(nhint,0,sizeof nhint);
185 arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
193 iq = (iq<<8) | buf[1];
199 if(Bread(b,buf,3*n)!=3*n)
200 sysfatal("stats read error");
203 h[i].url = (s[0]<<8) | s[1];
207 syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
213 /* all the changes here can be implemented by rewriting in-place */
216 /* remove extraneous '/' in the middle and at the end */
217 p = url+1; /* first char needs no change */
220 if(q[0]=='/' && q[-1]=='/'){
226 if(q[-1]=='/'){ /* trailing '/' */
232 /* specific to the cm.bell-labs.com web site */
233 if(strncmp(url,"/cm/",4)==0){
234 if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
235 /* strip off /cm/cs */
236 memmove(url,url+6,strlen(url+6)+1);
237 else if(strncmp(url+4,"ms/what/wavelet",15)==0)
239 memmove(url,url+11,strlen(url+11)+1);
244 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
246 int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
247 char *query, *sf, etag[32], *wurl;
251 query = hstrdup(hc, uri);
253 j = urllookup(hashstr(query));
256 query = strrchr(uri,'/');
258 return; /* can't happen */
259 prefix = query-uri+1; /* = strlen(dirname)+1 */
261 for(i=0; i<nhint[j]; i++){
262 if(havej > 0 && havej < URLmax){ /* exclude hints client has */
263 haveh = hints[havej];
264 for(havei=0; havei<nhint[havej]; havei++)
265 if( haveh[havei].url == h[i].url)
268 sf = urlname[h[i].url];
272 n = strlen(webroot) + strlen(sf) + 1;
273 wurl = halloc(hc, n);
274 strcpy(wurl, webroot);
276 fd = open(wurl, OREAD);
285 snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
286 siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
288 if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
290 hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
295 hprint(hout, "Fresh: have/%d\r\n", j);