]> git.lizzy.rs Git - plan9front.git/blob - sys/lib/python/mercurial/minirst.py
/sys/lib/dist/mkfile: test for .git directory
[plan9front.git] / sys / lib / python / mercurial / minirst.py
1 # minirst.py - minimal reStructuredText parser
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
7
8 """simplified reStructuredText parser.
9
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
12
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
16
17 It only supports a small subset of reStructuredText:
18
19 - paragraphs
20
21 - definition lists (must use '  ' to indent definitions)
22
23 - lists (items must start with '-')
24
25 - field lists (colons cannot be escaped)
26
27 - literal blocks
28
29 - option lists (supports only long options without arguments)
30
31 - inline markup is not recognized at all.
32 """
33
34 import re, sys, textwrap
35
36
37 def findblocks(text):
38     """Find continuous blocks of lines in text.
39
40     Returns a list of dictionaries representing the blocks. Each block
41     has an 'indent' field and a 'lines' field.
42     """
43     blocks = [[]]
44     lines = text.splitlines()
45     for line in lines:
46         if line.strip():
47             blocks[-1].append(line)
48         elif blocks[-1]:
49             blocks.append([])
50     if not blocks[-1]:
51         del blocks[-1]
52
53     for i, block in enumerate(blocks):
54         indent = min((len(l) - len(l.lstrip())) for l in block)
55         blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56     return blocks
57
58
59 def findliteralblocks(blocks):
60     """Finds literal blocks and adds a 'type' field to the blocks.
61
62     Literal blocks are given the type 'literal', all other blocks are
63     given type the 'paragraph'.
64     """
65     i = 0
66     while i < len(blocks):
67         # Searching for a block that looks like this:
68         #
69         # +------------------------------+
70         # | paragraph                    |
71         # | (ends with "::")             |
72         # +------------------------------+
73         #    +---------------------------+
74         #    | indented literal block    |
75         #    +---------------------------+
76         blocks[i]['type'] = 'paragraph'
77         if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78             indent = blocks[i]['indent']
79             adjustment = blocks[i+1]['indent'] - indent
80
81             if blocks[i]['lines'] == ['::']:
82                 # Expanded form: remove block
83                 del blocks[i]
84                 i -= 1
85             elif blocks[i]['lines'][-1].endswith(' ::'):
86                 # Partially minimized form: remove space and both
87                 # colons.
88                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89             else:
90                 # Fully minimized form: remove just one colon.
91                 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92
93             # List items are formatted with a hanging indent. We must
94             # correct for this here while we still have the original
95             # information on the indentation of the subsequent literal
96             # blocks available.
97             if blocks[i]['lines'][0].startswith('- '):
98                 indent += 2
99                 adjustment -= 2
100
101             # Mark the following indented blocks.
102             while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103                 blocks[i+1]['type'] = 'literal'
104                 blocks[i+1]['indent'] -= adjustment
105                 i += 1
106         i += 1
107     return blocks
108
109
110 def findsections(blocks):
111     """Finds sections.
112
113     The blocks must have a 'type' field, i.e., they should have been
114     run through findliteralblocks first.
115     """
116     for block in blocks:
117         # Searching for a block that looks like this:
118         #
119         # +------------------------------+
120         # | Section title                |
121         # | -------------                |
122         # +------------------------------+
123         if (block['type'] == 'paragraph' and
124             len(block['lines']) == 2 and
125             block['lines'][1] == '-' * len(block['lines'][0])):
126             block['type'] = 'section'
127     return blocks
128
129
130 def findbulletlists(blocks):
131     """Finds bullet lists.
132
133     The blocks must have a 'type' field, i.e., they should have been
134     run through findliteralblocks first.
135     """
136     i = 0
137     while i < len(blocks):
138         # Searching for a paragraph that looks like this:
139         #
140         # +------+-----------------------+
141         # | "- " | list item             |
142         # +------| (body elements)+      |
143         #        +-----------------------+
144         if (blocks[i]['type'] == 'paragraph' and
145             blocks[i]['lines'][0].startswith('- ')):
146             items = []
147             for line in blocks[i]['lines']:
148                 if line.startswith('- '):
149                     items.append(dict(type='bullet', lines=[],
150                                       indent=blocks[i]['indent']))
151                     line = line[2:]
152                 items[-1]['lines'].append(line)
153             blocks[i:i+1] = items
154             i += len(items) - 1
155         i += 1
156     return blocks
157
158
159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
160 def findoptionlists(blocks):
161     """Finds option lists.
162
163     The blocks must have a 'type' field, i.e., they should have been
164     run through findliteralblocks first.
165     """
166     i = 0
167     while i < len(blocks):
168         # Searching for a paragraph that looks like this:
169         #
170         # +----------------------------+-------------+
171         # | "--" option "  "           | description |
172         # +-------+--------------------+             |
173         #         | (body elements)+                 |
174         #         +----------------------------------+
175         if (blocks[i]['type'] == 'paragraph' and
176             _optionre.match(blocks[i]['lines'][0])):
177             options = []
178             for line in blocks[i]['lines']:
179                 m = _optionre.match(line)
180                 if m:
181                     option, arg, rest = m.groups()
182                     width = len(option) + len(arg)
183                     options.append(dict(type='option', lines=[],
184                                         indent=blocks[i]['indent'],
185                                         width=width))
186                 options[-1]['lines'].append(line)
187             blocks[i:i+1] = options
188             i += len(options) - 1
189         i += 1
190     return blocks
191
192
193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
194 def findfieldlists(blocks):
195     """Finds fields lists.
196
197     The blocks must have a 'type' field, i.e., they should have been
198     run through findliteralblocks first.
199     """
200     i = 0
201     while i < len(blocks):
202         # Searching for a paragraph that looks like this:
203         #
204         #
205         # +--------------------+----------------------+
206         # | ":" field name ":" | field body           |
207         # +-------+------------+                      |
208         #         | (body elements)+                  |
209         #         +-----------------------------------+
210         if (blocks[i]['type'] == 'paragraph' and
211             _fieldre.match(blocks[i]['lines'][0])):
212             indent = blocks[i]['indent']
213             fields = []
214             for line in blocks[i]['lines']:
215                 m = _fieldre.match(line)
216                 if m:
217                     key, spaces, rest = m.groups()
218                     width = 2 + len(key) + len(spaces)
219                     fields.append(dict(type='field', lines=[],
220                                        indent=indent, width=width))
221                     # Turn ":foo: bar" into "foo   bar".
222                     line = '%s  %s%s' % (key, spaces, rest)
223                 fields[-1]['lines'].append(line)
224             blocks[i:i+1] = fields
225             i += len(fields) - 1
226         i += 1
227     return blocks
228
229
230 def finddefinitionlists(blocks):
231     """Finds definition lists.
232
233     The blocks must have a 'type' field, i.e., they should have been
234     run through findliteralblocks first.
235     """
236     i = 0
237     while i < len(blocks):
238         # Searching for a paragraph that looks like this:
239         #
240         # +----------------------------+
241         # | term                       |
242         # +--+-------------------------+--+
243         #    | definition                 |
244         #    | (body elements)+           |
245         #    +----------------------------+
246         if (blocks[i]['type'] == 'paragraph' and
247             len(blocks[i]['lines']) > 1 and
248             not blocks[i]['lines'][0].startswith('  ') and
249             blocks[i]['lines'][1].startswith('  ')):
250             definitions = []
251             for line in blocks[i]['lines']:
252                 if not line.startswith('  '):
253                     definitions.append(dict(type='definition', lines=[],
254                                             indent=blocks[i]['indent']))
255                 definitions[-1]['lines'].append(line)
256                 definitions[-1]['hang'] = len(line) - len(line.lstrip())
257             blocks[i:i+1] = definitions
258             i += len(definitions) - 1
259         i += 1
260     return blocks
261
262
263 def addmargins(blocks):
264     """Adds empty blocks for vertical spacing.
265
266     This groups bullets, options, and definitions together with no vertical
267     space between them, and adds an empty block between all other blocks.
268     """
269     i = 1
270     while i < len(blocks):
271         if (blocks[i]['type'] == blocks[i-1]['type'] and
272             blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
273             i += 1
274         else:
275             blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
276             i += 2
277     return blocks
278
279
280 def formatblock(block, width):
281     """Format a block according to width."""
282     indent = ' ' * block['indent']
283     if block['type'] == 'margin':
284         return ''
285     elif block['type'] == 'literal':
286         indent += '  '
287         return indent + ('\n' + indent).join(block['lines'])
288     elif block['type'] == 'section':
289         return indent + ('\n' + indent).join(block['lines'])
290     elif block['type'] == 'definition':
291         term = indent + block['lines'][0]
292         defindent = indent + block['hang'] * ' '
293         text = ' '.join(map(str.strip, block['lines'][1:]))
294         return "%s\n%s" % (term, textwrap.fill(text, width=width,
295                                                initial_indent=defindent,
296                                                subsequent_indent=defindent))
297     else:
298         initindent = subindent = indent
299         text = ' '.join(map(str.strip, block['lines']))
300         if block['type'] == 'bullet':
301             initindent = indent + '- '
302             subindent = indent + '  '
303         elif block['type'] in ('option', 'field'):
304             subindent = indent + block['width'] * ' '
305
306         return textwrap.fill(text, width=width,
307                              initial_indent=initindent,
308                              subsequent_indent=subindent)
309
310
311 def format(text, width):
312     """Parse and format the text according to width."""
313     blocks = findblocks(text)
314     blocks = findliteralblocks(blocks)
315     blocks = findsections(blocks)
316     blocks = findbulletlists(blocks)
317     blocks = findoptionlists(blocks)
318     blocks = findfieldlists(blocks)
319     blocks = finddefinitionlists(blocks)
320     blocks = addmargins(blocks)
321     return '\n'.join(formatblock(b, width) for b in blocks)
322
323
324 if __name__ == "__main__":
325     from pprint import pprint
326
327     def debug(func, blocks):
328         blocks = func(blocks)
329         print "*** after %s:" % func.__name__
330         pprint(blocks)
331         print
332         return blocks
333
334     text = open(sys.argv[1]).read()
335     blocks = debug(findblocks, text)
336     blocks = debug(findliteralblocks, blocks)
337     blocks = debug(findsections, blocks)
338     blocks = debug(findbulletlists, blocks)
339     blocks = debug(findoptionlists, blocks)
340     blocks = debug(findfieldlists, blocks)
341     blocks = debug(finddefinitionlists, blocks)
342     blocks = debug(addmargins, blocks)
343     print '\n'.join(formatblock(b, 30) for b in blocks)