sys/lib/python/mercurial/store.py

   1 # store.py - repository store handling for Mercurial
   2 #
   3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
   4 #
   5 # This software may be used and distributed according to the terms of the
   6 # GNU General Public License version 2, incorporated herein by reference.
   7
   8 from i18n import _
   9 import osutil, util
  10 import os, stat
  11
  12 _sha = util.sha1
  13
  14 # This avoids a collision between a file named foo and a dir named
  15 # foo.i or foo.d
  16 def encodedir(path):
  17     if not path.startswith('data/'):
  18         return path
  19     return (path
  20             .replace(".hg/", ".hg.hg/")
  21             .replace(".i/", ".i.hg/")
  22             .replace(".d/", ".d.hg/"))
  23
  24 def decodedir(path):
  25     if not path.startswith('data/'):
  26         return path
  27     return (path
  28             .replace(".d.hg/", ".d/")
  29             .replace(".i.hg/", ".i/")
  30             .replace(".hg.hg/", ".hg/"))
  31
  32 def _buildencodefun():
  33     e = '_'
  34     win_reserved = [ord(x) for x in '\\:*?"<>|']
  35     cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
  36     for x in (range(32) + range(126, 256) + win_reserved):
  37         cmap[chr(x)] = "~%02x" % x
  38     for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
  39         cmap[chr(x)] = e + chr(x).lower()
  40     dmap = {}
  41     for k, v in cmap.iteritems():
  42         dmap[v] = k
  43     def decode(s):
  44         i = 0
  45         while i < len(s):
  46             for l in xrange(1, 4):
  47                 try:
  48                     yield dmap[s[i:i+l]]
  49                     i += l
  50                     break
  51                 except KeyError:
  52                     pass
  53             else:
  54                 raise KeyError
  55     return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
  56             lambda s: decodedir("".join(list(decode(s)))))
  57
  58 encodefilename, decodefilename = _buildencodefun()
  59
  60 def _build_lower_encodefun():
  61     win_reserved = [ord(x) for x in '\\:*?"<>|']
  62     cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
  63     for x in (range(32) + range(126, 256) + win_reserved):
  64         cmap[chr(x)] = "~%02x" % x
  65     for x in range(ord("A"), ord("Z")+1):
  66         cmap[chr(x)] = chr(x).lower()
  67     return lambda s: "".join([cmap[c] for c in s])
  68
  69 lowerencode = _build_lower_encodefun()
  70
  71 _windows_reserved_filenames = '''con prn aux nul
  72     com1 com2 com3 com4 com5 com6 com7 com8 com9
  73     lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
  74 def auxencode(path):
  75     res = []
  76     for n in path.split('/'):
  77         if n:
  78             base = n.split('.')[0]
  79             if base and (base in _windows_reserved_filenames):
  80                 # encode third letter ('aux' -> 'au~78')
  81                 ec = "~%02x" % ord(n[2])
  82                 n = n[0:2] + ec + n[3:]
  83             if n[-1] in '. ':
  84                 # encode last period or space ('foo...' -> 'foo..~2e')
  85                 n = n[:-1] + "~%02x" % ord(n[-1])
  86         res.append(n)
  87     return '/'.join(res)
  88
  89 MAX_PATH_LEN_IN_HGSTORE = 120
  90 DIR_PREFIX_LEN = 8
  91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
  92 def hybridencode(path):
  93     '''encodes path with a length limit
  94
  95     Encodes all paths that begin with 'data/', according to the following.
  96
  97     Default encoding (reversible):
  98
  99     Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
 100     characters are encoded as '~xx', where xx is the two digit hex code
 101     of the character (see encodefilename).
 102     Relevant path components consisting of Windows reserved filenames are
 103     masked by encoding the third character ('aux' -> 'au~78', see auxencode).
 104
 105     Hashed encoding (not reversible):
 106
 107     If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
 108     non-reversible hybrid hashing of the path is done instead.
 109     This encoding uses up to DIR_PREFIX_LEN characters of all directory
 110     levels of the lowerencoded path, but not more levels than can fit into
 111     _MAX_SHORTENED_DIRS_LEN.
 112     Then follows the filler followed by the sha digest of the full path.
 113     The filler is the beginning of the basename of the lowerencoded path
 114     (the basename is everything after the last path separator). The filler
 115     is as long as possible, filling in characters from the basename until
 116     the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
 117     of the basename have been taken).
 118     The extension (e.g. '.i' or '.d') is preserved.
 119
 120     The string 'data/' at the beginning is replaced with 'dh/', if the hashed
 121     encoding was used.
 122     '''
 123     if not path.startswith('data/'):
 124         return path
 125     # escape directories ending with .i and .d
 126     path = encodedir(path)
 127     ndpath = path[len('data/'):]
 128     res = 'data/' + auxencode(encodefilename(ndpath))
 129     if len(res) > MAX_PATH_LEN_IN_HGSTORE:
 130         digest = _sha(path).hexdigest()
 131         aep = auxencode(lowerencode(ndpath))
 132         _root, ext = os.path.splitext(aep)
 133         parts = aep.split('/')
 134         basename = parts[-1]
 135         sdirs = []
 136         for p in parts[:-1]:
 137             d = p[:DIR_PREFIX_LEN]
 138             if d[-1] in '. ':
 139                 # Windows can't access dirs ending in period or space
 140                 d = d[:-1] + '_'
 141             t = '/'.join(sdirs) + '/' + d
 142             if len(t) > _MAX_SHORTENED_DIRS_LEN:
 143                 break
 144             sdirs.append(d)
 145         dirs = '/'.join(sdirs)
 146         if len(dirs) > 0:
 147             dirs += '/'
 148         res = 'dh/' + dirs + digest + ext
 149         space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
 150         if space_left > 0:
 151             filler = basename[:space_left]
 152             res = 'dh/' + dirs + filler + digest + ext
 153     return res
 154
 155 def _calcmode(path):
 156     try:
 157         # files in .hg/ will be created using this mode
 158         mode = os.stat(path).st_mode
 159             # avoid some useless chmods
 160         if (0777 & ~util.umask) == (0777 & mode):
 161             mode = None
 162     except OSError:
 163         mode = None
 164     return mode
 165
 166 _data = 'data 00manifest.d 00manifest.i 00changelog.d  00changelog.i'
 167
 168 class basicstore(object):
 169     '''base class for local repository stores'''
 170     def __init__(self, path, opener, pathjoiner):
 171         self.pathjoiner = pathjoiner
 172         self.path = path
 173         self.createmode = _calcmode(path)
 174         op = opener(self.path)
 175         op.createmode = self.createmode
 176         self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
 177
 178     def join(self, f):
 179         return self.pathjoiner(self.path, encodedir(f))
 180
 181     def _walk(self, relpath, recurse):
 182         '''yields (unencoded, encoded, size)'''
 183         path = self.pathjoiner(self.path, relpath)
 184         striplen = len(self.path) + len(os.sep)
 185         l = []
 186         if os.path.isdir(path):
 187             visit = [path]
 188             while visit:
 189                 p = visit.pop()
 190                 for f, kind, st in osutil.listdir(p, stat=True):
 191                     fp = self.pathjoiner(p, f)
 192                     if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
 193                         n = util.pconvert(fp[striplen:])
 194                         l.append((decodedir(n), n, st.st_size))
 195                     elif kind == stat.S_IFDIR and recurse:
 196                         visit.append(fp)
 197         return sorted(l)
 198
 199     def datafiles(self):
 200         return self._walk('data', True)
 201
 202     def walk(self):
 203         '''yields (unencoded, encoded, size)'''
 204         # yield data files first
 205         for x in self.datafiles():
 206             yield x
 207         # yield manifest before changelog
 208         for x in reversed(self._walk('', False)):
 209             yield x
 210
 211     def copylist(self):
 212         return ['requires'] + _data.split()
 213
 214 class encodedstore(basicstore):
 215     def __init__(self, path, opener, pathjoiner):
 216         self.pathjoiner = pathjoiner
 217         self.path = self.pathjoiner(path, 'store')
 218         self.createmode = _calcmode(self.path)
 219         op = opener(self.path)
 220         op.createmode = self.createmode
 221         self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
 222
 223     def datafiles(self):
 224         for a, b, size in self._walk('data', True):
 225             try:
 226                 a = decodefilename(a)
 227             except KeyError:
 228                 a = None
 229             yield a, b, size
 230
 231     def join(self, f):
 232         return self.pathjoiner(self.path, encodefilename(f))
 233
 234     def copylist(self):
 235         return (['requires', '00changelog.i'] +
 236                 [self.pathjoiner('store', f) for f in _data.split()])
 237
 238 class fncache(object):
 239     # the filename used to be partially encoded
 240     # hence the encodedir/decodedir dance
 241     def __init__(self, opener):
 242         self.opener = opener
 243         self.entries = None
 244
 245     def _load(self):
 246         '''fill the entries from the fncache file'''
 247         self.entries = set()
 248         try:
 249             fp = self.opener('fncache', mode='r')
 250         except IOError:
 251             # skip nonexistent file
 252             return
 253         for n, line in enumerate(fp):
 254             if (len(line) < 2) or (line[-1] != '\n'):
 255                 t = _('invalid entry in fncache, line %s') % (n + 1)
 256                 raise util.Abort(t)
 257             self.entries.add(decodedir(line[:-1]))
 258         fp.close()
 259
 260     def rewrite(self, files):
 261         fp = self.opener('fncache', mode='wb')
 262         for p in files:
 263             fp.write(encodedir(p) + '\n')
 264         fp.close()
 265         self.entries = set(files)
 266
 267     def add(self, fn):
 268         if self.entries is None:
 269             self._load()
 270         self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
 271
 272     def __contains__(self, fn):
 273         if self.entries is None:
 274             self._load()
 275         return fn in self.entries
 276
 277     def __iter__(self):
 278         if self.entries is None:
 279             self._load()
 280         return iter(self.entries)
 281
 282 class fncachestore(basicstore):
 283     def __init__(self, path, opener, pathjoiner):
 284         self.pathjoiner = pathjoiner
 285         self.path = self.pathjoiner(path, 'store')
 286         self.createmode = _calcmode(self.path)
 287         op = opener(self.path)
 288         op.createmode = self.createmode
 289         fnc = fncache(op)
 290         self.fncache = fnc
 291
 292         def fncacheopener(path, mode='r', *args, **kw):
 293             if (mode not in ('r', 'rb')
 294                 and path.startswith('data/')
 295                 and path not in fnc):
 296                     fnc.add(path)
 297             return op(hybridencode(path), mode, *args, **kw)
 298         self.opener = fncacheopener
 299
 300     def join(self, f):
 301         return self.pathjoiner(self.path, hybridencode(f))
 302
 303     def datafiles(self):
 304         rewrite = False
 305         existing = []
 306         pjoin = self.pathjoiner
 307         spath = self.path
 308         for f in self.fncache:
 309             ef = hybridencode(f)
 310             try:
 311                 st = os.stat(pjoin(spath, ef))
 312                 yield f, ef, st.st_size
 313                 existing.append(f)
 314             except OSError:
 315                 # nonexistent entry
 316                 rewrite = True
 317         if rewrite:
 318             # rewrite fncache to remove nonexistent entries
 319             # (may be caused by rollback / strip)
 320             self.fncache.rewrite(existing)
 321
 322     def copylist(self):
 323         d = _data + ' dh fncache'
 324         return (['requires', '00changelog.i'] +
 325                 [self.pathjoiner('store', f) for f in d.split()])
 326
 327 def store(requirements, path, opener, pathjoiner=None):
 328     pathjoiner = pathjoiner or os.path.join
 329     if 'store' in requirements:
 330         if 'fncache' in requirements:
 331             return fncachestore(path, opener, pathjoiner)
 332         return encodedstore(path, opener, pathjoiner)
 333     return basicstore(path, opener, pathjoiner)