1 # store.py - repository store handling for Mercurial
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
14 # This avoids a collision between a file named foo and a dir named
17 if not path.startswith('data/'):
20 .replace(".hg/", ".hg.hg/")
21 .replace(".i/", ".i.hg/")
22 .replace(".d/", ".d.hg/"))
25 if not path.startswith('data/'):
28 .replace(".d.hg/", ".d/")
29 .replace(".i.hg/", ".i/")
30 .replace(".hg.hg/", ".hg/"))
32 def _buildencodefun():
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
36 for x in (range(32) + range(126, 256) + win_reserved):
37 cmap[chr(x)] = "~%02x" % x
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 cmap[chr(x)] = e + chr(x).lower()
41 for k, v in cmap.iteritems():
46 for l in xrange(1, 4):
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 lambda s: decodedir("".join(list(decode(s)))))
58 encodefilename, decodefilename = _buildencodefun()
60 def _build_lower_encodefun():
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
63 for x in (range(32) + range(126, 256) + win_reserved):
64 cmap[chr(x)] = "~%02x" % x
65 for x in range(ord("A"), ord("Z")+1):
66 cmap[chr(x)] = chr(x).lower()
67 return lambda s: "".join([cmap[c] for c in s])
69 lowerencode = _build_lower_encodefun()
71 _windows_reserved_filenames = '''con prn aux nul
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
76 for n in path.split('/'):
78 base = n.split('.')[0]
79 if base and (base in _windows_reserved_filenames):
80 # encode third letter ('aux' -> 'au~78')
81 ec = "~%02x" % ord(n[2])
82 n = n[0:2] + ec + n[3:]
84 # encode last period or space ('foo...' -> 'foo..~2e')
85 n = n[:-1] + "~%02x" % ord(n[-1])
89 MAX_PATH_LEN_IN_HGSTORE = 120
91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
92 def hybridencode(path):
93 '''encodes path with a length limit
95 Encodes all paths that begin with 'data/', according to the following.
97 Default encoding (reversible):
99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
100 characters are encoded as '~xx', where xx is the two digit hex code
101 of the character (see encodefilename).
102 Relevant path components consisting of Windows reserved filenames are
103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
105 Hashed encoding (not reversible):
107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
108 non-reversible hybrid hashing of the path is done instead.
109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
110 levels of the lowerencoded path, but not more levels than can fit into
111 _MAX_SHORTENED_DIRS_LEN.
112 Then follows the filler followed by the sha digest of the full path.
113 The filler is the beginning of the basename of the lowerencoded path
114 (the basename is everything after the last path separator). The filler
115 is as long as possible, filling in characters from the basename until
116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
117 of the basename have been taken).
118 The extension (e.g. '.i' or '.d') is preserved.
120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
123 if not path.startswith('data/'):
125 # escape directories ending with .i and .d
126 path = encodedir(path)
127 ndpath = path[len('data/'):]
128 res = 'data/' + auxencode(encodefilename(ndpath))
129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
130 digest = _sha(path).hexdigest()
131 aep = auxencode(lowerencode(ndpath))
132 _root, ext = os.path.splitext(aep)
133 parts = aep.split('/')
137 d = p[:DIR_PREFIX_LEN]
139 # Windows can't access dirs ending in period or space
141 t = '/'.join(sdirs) + '/' + d
142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
145 dirs = '/'.join(sdirs)
148 res = 'dh/' + dirs + digest + ext
149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
151 filler = basename[:space_left]
152 res = 'dh/' + dirs + filler + digest + ext
157 # files in .hg/ will be created using this mode
158 mode = os.stat(path).st_mode
159 # avoid some useless chmods
160 if (0777 & ~util.umask) == (0777 & mode):
166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
168 class basicstore(object):
169 '''base class for local repository stores'''
170 def __init__(self, path, opener, pathjoiner):
171 self.pathjoiner = pathjoiner
173 self.createmode = _calcmode(path)
174 op = opener(self.path)
175 op.createmode = self.createmode
176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
179 return self.pathjoiner(self.path, encodedir(f))
181 def _walk(self, relpath, recurse):
182 '''yields (unencoded, encoded, size)'''
183 path = self.pathjoiner(self.path, relpath)
184 striplen = len(self.path) + len(os.sep)
186 if os.path.isdir(path):
190 for f, kind, st in osutil.listdir(p, stat=True):
191 fp = self.pathjoiner(p, f)
192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
193 n = util.pconvert(fp[striplen:])
194 l.append((decodedir(n), n, st.st_size))
195 elif kind == stat.S_IFDIR and recurse:
200 return self._walk('data', True)
203 '''yields (unencoded, encoded, size)'''
204 # yield data files first
205 for x in self.datafiles():
207 # yield manifest before changelog
208 for x in reversed(self._walk('', False)):
212 return ['requires'] + _data.split()
214 class encodedstore(basicstore):
215 def __init__(self, path, opener, pathjoiner):
216 self.pathjoiner = pathjoiner
217 self.path = self.pathjoiner(path, 'store')
218 self.createmode = _calcmode(self.path)
219 op = opener(self.path)
220 op.createmode = self.createmode
221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
224 for a, b, size in self._walk('data', True):
226 a = decodefilename(a)
232 return self.pathjoiner(self.path, encodefilename(f))
235 return (['requires', '00changelog.i'] +
236 [self.pathjoiner('store', f) for f in _data.split()])
238 class fncache(object):
239 # the filename used to be partially encoded
240 # hence the encodedir/decodedir dance
241 def __init__(self, opener):
246 '''fill the entries from the fncache file'''
249 fp = self.opener('fncache', mode='r')
251 # skip nonexistent file
253 for n, line in enumerate(fp):
254 if (len(line) < 2) or (line[-1] != '\n'):
255 t = _('invalid entry in fncache, line %s') % (n + 1)
257 self.entries.add(decodedir(line[:-1]))
260 def rewrite(self, files):
261 fp = self.opener('fncache', mode='wb')
263 fp.write(encodedir(p) + '\n')
265 self.entries = set(files)
268 if self.entries is None:
270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
272 def __contains__(self, fn):
273 if self.entries is None:
275 return fn in self.entries
278 if self.entries is None:
280 return iter(self.entries)
282 class fncachestore(basicstore):
283 def __init__(self, path, opener, pathjoiner):
284 self.pathjoiner = pathjoiner
285 self.path = self.pathjoiner(path, 'store')
286 self.createmode = _calcmode(self.path)
287 op = opener(self.path)
288 op.createmode = self.createmode
292 def fncacheopener(path, mode='r', *args, **kw):
293 if (mode not in ('r', 'rb')
294 and path.startswith('data/')
295 and path not in fnc):
297 return op(hybridencode(path), mode, *args, **kw)
298 self.opener = fncacheopener
301 return self.pathjoiner(self.path, hybridencode(f))
306 pjoin = self.pathjoiner
308 for f in self.fncache:
311 st = os.stat(pjoin(spath, ef))
312 yield f, ef, st.st_size
318 # rewrite fncache to remove nonexistent entries
319 # (may be caused by rollback / strip)
320 self.fncache.rewrite(existing)
323 d = _data + ' dh fncache'
324 return (['requires', '00changelog.i'] +
325 [self.pathjoiner('store', f) for f in d.split()])
327 def store(requirements, path, opener, pathjoiner=None):
328 pathjoiner = pathjoiner or os.path.join
329 if 'store' in requirements:
330 if 'fncache' in requirements:
331 return fncachestore(path, opener, pathjoiner)
332 return encodedstore(path, opener, pathjoiner)
333 return basicstore(path, opener, pathjoiner)