]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/venti/srv/fmtbloom.c
Import sources from 2011-03-30 iso image - lib
[plan9front.git] / sys / src / cmd / venti / srv / fmtbloom.c
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4
5 Bloom b;
6
7 void
8 usage(void)
9 {
10         fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
11         threadexitsall(0);
12 }
13
14 void
15 threadmain(int argc, char *argv[])
16 {
17         Part *part;
18         char *file;
19         vlong bits, size, size2;
20         int nhash;
21         vlong nblocks;
22         
23         ventifmtinstall();
24         statsinit();
25
26         size = 0;
27         nhash = 0;
28         nblocks = 0;
29         ARGBEGIN{
30         case 'n':
31                 if(nhash || nblocks)
32                         usage();
33                 nblocks = unittoull(EARGF(usage()));
34                 break;
35         case 'N':
36                 if(nhash || nblocks)
37                         usage();
38                 nhash = unittoull(EARGF(usage()));
39                 if(nhash > BloomMaxHash){
40                         fprint(2, "maximum possible is -N %d", BloomMaxHash);
41                         usage();
42                 }
43                 break;
44         case 's':
45                 size = unittoull(ARGF());
46                 if(size == ~0)
47                         usage();
48                 break;
49         default:
50                 usage();
51                 break;
52         }ARGEND
53
54         if(argc != 1)
55                 usage();
56
57         file = argv[0];
58
59         part = initpart(file, ORDWR|ODIRECT);
60         if(part == nil)
61                 sysfatal("can't open partition %s: %r", file);
62
63         if(size == 0)
64                 size = part->size;
65         
66         if(size < 1024*1024)
67                 sysfatal("bloom filter too small");
68
69         if(size > MaxBloomSize){
70                 fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
71                         size, (vlong)MaxBloomSize);
72                 size = MaxBloomSize;
73         }
74         if(size&(size-1)){
75                 for(size2=1; size2<size; size2*=2)
76                         ;
77                 size = size2/2;
78                 fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
79         }
80
81         if(nblocks){
82                 /*
83                  * no use for more than 32 bits per block
84                  * shoot for less than 64 bits per block
85                  */
86                 size2 = size;
87                 while(size2*8 >= nblocks*64)
88                         size2 >>= 1;
89                 if(size2 != size){
90                         size = size2;
91                         fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
92                                 size/1024/1024);
93                 }
94
95                 /*
96                  * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
97                  */
98                 bits = (8*size)/nblocks;
99                 nhash = bits*7/10;
100                 if(nhash > BloomMaxHash)
101                         nhash = BloomMaxHash;
102         }
103         if(!nhash)
104                 nhash = BloomMaxHash;
105         if(bloominit(&b, size, nil) < 0)
106                 sysfatal("bloominit: %r");
107         b.nhash = nhash;
108         bits = nhash*10/7;
109         nblocks = (8*size)/bits;
110         fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
111         b.data = vtmallocz(size);
112         b.part = part;
113         if(writebloom(&b) < 0)
114                 sysfatal("writing %s: %r", file);
115         threadexitsall(0);
116 }