sys/lib/python/mercurial/minirst.py

   1 # minirst.py - minimal reStructuredText parser
   2 #
   3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
   4 #
   5 # This software may be used and distributed according to the terms of the
   6 # GNU General Public License version 2, incorporated herein by reference.
   7
   8 """simplified reStructuredText parser.
   9
  10 This parser knows just enough about reStructuredText to parse the
  11 Mercurial docstrings.
  12
  13 It cheats in a major way: nested blocks are not really nested. They
  14 are just indented blocks that look like they are nested. This relies
  15 on the user to keep the right indentation for the blocks.
  16
  17 It only supports a small subset of reStructuredText:
  18
  19 - paragraphs
  20
  21 - definition lists (must use '  ' to indent definitions)
  22
  23 - lists (items must start with '-')
  24
  25 - field lists (colons cannot be escaped)
  26
  27 - literal blocks
  28
  29 - option lists (supports only long options without arguments)
  30
  31 - inline markup is not recognized at all.
  32 """
  33
  34 import re, sys, textwrap
  35
  36
  37 def findblocks(text):
  38     """Find continuous blocks of lines in text.
  39
  40     Returns a list of dictionaries representing the blocks. Each block
  41     has an 'indent' field and a 'lines' field.
  42     """
  43     blocks = [[]]
  44     lines = text.splitlines()
  45     for line in lines:
  46         if line.strip():
  47             blocks[-1].append(line)
  48         elif blocks[-1]:
  49             blocks.append([])
  50     if not blocks[-1]:
  51         del blocks[-1]
  52
  53     for i, block in enumerate(blocks):
  54         indent = min((len(l) - len(l.lstrip())) for l in block)
  55         blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
  56     return blocks
  57
  58
  59 def findliteralblocks(blocks):
  60     """Finds literal blocks and adds a 'type' field to the blocks.
  61
  62     Literal blocks are given the type 'literal', all other blocks are
  63     given type the 'paragraph'.
  64     """
  65     i = 0
  66     while i < len(blocks):
  67         # Searching for a block that looks like this:
  68         #
  69         # +------------------------------+
  70         # | paragraph                    |
  71         # | (ends with "::")             |
  72         # +------------------------------+
  73         #    +---------------------------+
  74         #    | indented literal block    |
  75         #    +---------------------------+
  76         blocks[i]['type'] = 'paragraph'
  77         if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
  78             indent = blocks[i]['indent']
  79             adjustment = blocks[i+1]['indent'] - indent
  80
  81             if blocks[i]['lines'] == ['::']:
  82                 # Expanded form: remove block
  83                 del blocks[i]
  84                 i -= 1
  85             elif blocks[i]['lines'][-1].endswith(' ::'):
  86                 # Partially minimized form: remove space and both
  87                 # colons.
  88                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
  89             else:
  90                 # Fully minimized form: remove just one colon.
  91                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
  92
  93             # List items are formatted with a hanging indent. We must
  94             # correct for this here while we still have the original
  95             # information on the indentation of the subsequent literal
  96             # blocks available.
  97             if blocks[i]['lines'][0].startswith('- '):
  98                 indent += 2
  99                 adjustment -= 2
 100
 101             # Mark the following indented blocks.
 102             while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
 103                 blocks[i+1]['type'] = 'literal'
 104                 blocks[i+1]['indent'] -= adjustment
 105                 i += 1
 106         i += 1
 107     return blocks
 108
 109
 110 def findsections(blocks):
 111     """Finds sections.
 112
 113     The blocks must have a 'type' field, i.e., they should have been
 114     run through findliteralblocks first.
 115     """
 116     for block in blocks:
 117         # Searching for a block that looks like this:
 118         #
 119         # +------------------------------+
 120         # | Section title                |
 121         # | -------------                |
 122         # +------------------------------+
 123         if (block['type'] == 'paragraph' and
 124             len(block['lines']) == 2 and
 125             block['lines'][1] == '-' * len(block['lines'][0])):
 126             block['type'] = 'section'
 127     return blocks
 128
 129
 130 def findbulletlists(blocks):
 131     """Finds bullet lists.
 132
 133     The blocks must have a 'type' field, i.e., they should have been
 134     run through findliteralblocks first.
 135     """
 136     i = 0
 137     while i < len(blocks):
 138         # Searching for a paragraph that looks like this:
 139         #
 140         # +------+-----------------------+
 141         # | "- " | list item             |
 142         # +------| (body elements)+      |
 143         #        +-----------------------+
 144         if (blocks[i]['type'] == 'paragraph' and
 145             blocks[i]['lines'][0].startswith('- ')):
 146             items = []
 147             for line in blocks[i]['lines']:
 148                 if line.startswith('- '):
 149                     items.append(dict(type='bullet', lines=[],
 150                                       indent=blocks[i]['indent']))
 151                     line = line[2:]
 152                 items[-1]['lines'].append(line)
 153             blocks[i:i+1] = items
 154             i += len(items) - 1
 155         i += 1
 156     return blocks
 157
 158
 159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
 160 def findoptionlists(blocks):
 161     """Finds option lists.
 162
 163     The blocks must have a 'type' field, i.e., they should have been
 164     run through findliteralblocks first.
 165     """
 166     i = 0
 167     while i < len(blocks):
 168         # Searching for a paragraph that looks like this:
 169         #
 170         # +----------------------------+-------------+
 171         # | "--" option "  "           | description |
 172         # +-------+--------------------+             |
 173         #         | (body elements)+                 |
 174         #         +----------------------------------+
 175         if (blocks[i]['type'] == 'paragraph' and
 176             _optionre.match(blocks[i]['lines'][0])):
 177             options = []
 178             for line in blocks[i]['lines']:
 179                 m = _optionre.match(line)
 180                 if m:
 181                     option, arg, rest = m.groups()
 182                     width = len(option) + len(arg)
 183                     options.append(dict(type='option', lines=[],
 184                                         indent=blocks[i]['indent'],
 185                                         width=width))
 186                 options[-1]['lines'].append(line)
 187             blocks[i:i+1] = options
 188             i += len(options) - 1
 189         i += 1
 190     return blocks
 191
 192
 193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
 194 def findfieldlists(blocks):
 195     """Finds fields lists.
 196
 197     The blocks must have a 'type' field, i.e., they should have been
 198     run through findliteralblocks first.
 199     """
 200     i = 0
 201     while i < len(blocks):
 202         # Searching for a paragraph that looks like this:
 203         #
 204         #
 205         # +--------------------+----------------------+
 206         # | ":" field name ":" | field body           |
 207         # +-------+------------+                      |
 208         #         | (body elements)+                  |
 209         #         +-----------------------------------+
 210         if (blocks[i]['type'] == 'paragraph' and
 211             _fieldre.match(blocks[i]['lines'][0])):
 212             indent = blocks[i]['indent']
 213             fields = []
 214             for line in blocks[i]['lines']:
 215                 m = _fieldre.match(line)
 216                 if m:
 217                     key, spaces, rest = m.groups()
 218                     width = 2 + len(key) + len(spaces)
 219                     fields.append(dict(type='field', lines=[],
 220                                        indent=indent, width=width))
 221                     # Turn ":foo: bar" into "foo   bar".
 222                     line = '%s  %s%s' % (key, spaces, rest)
 223                 fields[-1]['lines'].append(line)
 224             blocks[i:i+1] = fields
 225             i += len(fields) - 1
 226         i += 1
 227     return blocks
 228
 229
 230 def finddefinitionlists(blocks):
 231     """Finds definition lists.
 232
 233     The blocks must have a 'type' field, i.e., they should have been
 234     run through findliteralblocks first.
 235     """
 236     i = 0
 237     while i < len(blocks):
 238         # Searching for a paragraph that looks like this:
 239         #
 240         # +----------------------------+
 241         # | term                       |
 242         # +--+-------------------------+--+
 243         #    | definition                 |
 244         #    | (body elements)+           |
 245         #    +----------------------------+
 246         if (blocks[i]['type'] == 'paragraph' and
 247             len(blocks[i]['lines']) > 1 and
 248             not blocks[i]['lines'][0].startswith('  ') and
 249             blocks[i]['lines'][1].startswith('  ')):
 250             definitions = []
 251             for line in blocks[i]['lines']:
 252                 if not line.startswith('  '):
 253                     definitions.append(dict(type='definition', lines=[],
 254                                             indent=blocks[i]['indent']))
 255                 definitions[-1]['lines'].append(line)
 256                 definitions[-1]['hang'] = len(line) - len(line.lstrip())
 257             blocks[i:i+1] = definitions
 258             i += len(definitions) - 1
 259         i += 1
 260     return blocks
 261
 262
 263 def addmargins(blocks):
 264     """Adds empty blocks for vertical spacing.
 265
 266     This groups bullets, options, and definitions together with no vertical
 267     space between them, and adds an empty block between all other blocks.
 268     """
 269     i = 1
 270     while i < len(blocks):
 271         if (blocks[i]['type'] == blocks[i-1]['type'] and
 272             blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
 273             i += 1
 274         else:
 275             blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
 276             i += 2
 277     return blocks
 278
 279
 280 def formatblock(block, width):
 281     """Format a block according to width."""
 282     indent = ' ' * block['indent']
 283     if block['type'] == 'margin':
 284         return ''
 285     elif block['type'] == 'literal':
 286         indent += '  '
 287         return indent + ('\n' + indent).join(block['lines'])
 288     elif block['type'] == 'section':
 289         return indent + ('\n' + indent).join(block['lines'])
 290     elif block['type'] == 'definition':
 291         term = indent + block['lines'][0]
 292         defindent = indent + block['hang'] * ' '
 293         text = ' '.join(map(str.strip, block['lines'][1:]))
 294         return "%s\n%s" % (term, textwrap.fill(text, width=width,
 295                                                initial_indent=defindent,
 296                                                subsequent_indent=defindent))
 297     else:
 298         initindent = subindent = indent
 299         text = ' '.join(map(str.strip, block['lines']))
 300         if block['type'] == 'bullet':
 301             initindent = indent + '- '
 302             subindent = indent + '  '
 303         elif block['type'] in ('option', 'field'):
 304             subindent = indent + block['width'] * ' '
 305
 306         return textwrap.fill(text, width=width,
 307                              initial_indent=initindent,
 308                              subsequent_indent=subindent)
 309
 310
 311 def format(text, width):
 312     """Parse and format the text according to width."""
 313     blocks = findblocks(text)
 314     blocks = findliteralblocks(blocks)
 315     blocks = findsections(blocks)
 316     blocks = findbulletlists(blocks)
 317     blocks = findoptionlists(blocks)
 318     blocks = findfieldlists(blocks)
 319     blocks = finddefinitionlists(blocks)
 320     blocks = addmargins(blocks)
 321     return '\n'.join(formatblock(b, width) for b in blocks)
 322
 323
 324 if __name__ == "__main__":
 325     from pprint import pprint
 326
 327     def debug(func, blocks):
 328         blocks = func(blocks)
 329         print "*** after %s:" % func.__name__
 330         pprint(blocks)
 331         print
 332         return blocks
 333
 334     text = open(sys.argv[1]).read()
 335     blocks = debug(findblocks, text)
 336     blocks = debug(findliteralblocks, blocks)
 337     blocks = debug(findsections, blocks)
 338     blocks = debug(findbulletlists, blocks)
 339     blocks = debug(findoptionlists, blocks)
 340     blocks = debug(findfieldlists, blocks)
 341     blocks = debug(finddefinitionlists, blocks)
 342     blocks = debug(addmargins, blocks)
 343     print '\n'.join(formatblock(b, 30) for b in blocks)