1 # minirst.py - minimal reStructuredText parser
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
8 """simplified reStructuredText parser.
10 This parser knows just enough about reStructuredText to parse the
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
17 It only supports a small subset of reStructuredText:
21 - definition lists (must use ' ' to indent definitions)
23 - lists (items must start with '-')
25 - field lists (colons cannot be escaped)
29 - option lists (supports only long options without arguments)
31 - inline markup is not recognized at all.
34 import re, sys, textwrap
38 """Find continuous blocks of lines in text.
40 Returns a list of dictionaries representing the blocks. Each block
41 has an 'indent' field and a 'lines' field.
44 lines = text.splitlines()
47 blocks[-1].append(line)
53 for i, block in enumerate(blocks):
54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
59 def findliteralblocks(blocks):
60 """Finds literal blocks and adds a 'type' field to the blocks.
62 Literal blocks are given the type 'literal', all other blocks are
63 given type the 'paragraph'.
66 while i < len(blocks):
67 # Searching for a block that looks like this:
69 # +------------------------------+
71 # | (ends with "::") |
72 # +------------------------------+
73 # +---------------------------+
74 # | indented literal block |
75 # +---------------------------+
76 blocks[i]['type'] = 'paragraph'
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 indent = blocks[i]['indent']
79 adjustment = blocks[i+1]['indent'] - indent
81 if blocks[i]['lines'] == ['::']:
82 # Expanded form: remove block
85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 # Partially minimized form: remove space and both
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
90 # Fully minimized form: remove just one colon.
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
93 # List items are formatted with a hanging indent. We must
94 # correct for this here while we still have the original
95 # information on the indentation of the subsequent literal
97 if blocks[i]['lines'][0].startswith('- '):
101 # Mark the following indented blocks.
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 blocks[i+1]['type'] = 'literal'
104 blocks[i+1]['indent'] -= adjustment
110 def findsections(blocks):
113 The blocks must have a 'type' field, i.e., they should have been
114 run through findliteralblocks first.
117 # Searching for a block that looks like this:
119 # +------------------------------+
122 # +------------------------------+
123 if (block['type'] == 'paragraph' and
124 len(block['lines']) == 2 and
125 block['lines'][1] == '-' * len(block['lines'][0])):
126 block['type'] = 'section'
130 def findbulletlists(blocks):
131 """Finds bullet lists.
133 The blocks must have a 'type' field, i.e., they should have been
134 run through findliteralblocks first.
137 while i < len(blocks):
138 # Searching for a paragraph that looks like this:
140 # +------+-----------------------+
141 # | "- " | list item |
142 # +------| (body elements)+ |
143 # +-----------------------+
144 if (blocks[i]['type'] == 'paragraph' and
145 blocks[i]['lines'][0].startswith('- ')):
147 for line in blocks[i]['lines']:
148 if line.startswith('- '):
149 items.append(dict(type='bullet', lines=[],
150 indent=blocks[i]['indent']))
152 items[-1]['lines'].append(line)
153 blocks[i:i+1] = items
159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
160 def findoptionlists(blocks):
161 """Finds option lists.
163 The blocks must have a 'type' field, i.e., they should have been
164 run through findliteralblocks first.
167 while i < len(blocks):
168 # Searching for a paragraph that looks like this:
170 # +----------------------------+-------------+
171 # | "--" option " " | description |
172 # +-------+--------------------+ |
173 # | (body elements)+ |
174 # +----------------------------------+
175 if (blocks[i]['type'] == 'paragraph' and
176 _optionre.match(blocks[i]['lines'][0])):
178 for line in blocks[i]['lines']:
179 m = _optionre.match(line)
181 option, arg, rest = m.groups()
182 width = len(option) + len(arg)
183 options.append(dict(type='option', lines=[],
184 indent=blocks[i]['indent'],
186 options[-1]['lines'].append(line)
187 blocks[i:i+1] = options
188 i += len(options) - 1
193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
194 def findfieldlists(blocks):
195 """Finds fields lists.
197 The blocks must have a 'type' field, i.e., they should have been
198 run through findliteralblocks first.
201 while i < len(blocks):
202 # Searching for a paragraph that looks like this:
205 # +--------------------+----------------------+
206 # | ":" field name ":" | field body |
207 # +-------+------------+ |
208 # | (body elements)+ |
209 # +-----------------------------------+
210 if (blocks[i]['type'] == 'paragraph' and
211 _fieldre.match(blocks[i]['lines'][0])):
212 indent = blocks[i]['indent']
214 for line in blocks[i]['lines']:
215 m = _fieldre.match(line)
217 key, spaces, rest = m.groups()
218 width = 2 + len(key) + len(spaces)
219 fields.append(dict(type='field', lines=[],
220 indent=indent, width=width))
221 # Turn ":foo: bar" into "foo bar".
222 line = '%s %s%s' % (key, spaces, rest)
223 fields[-1]['lines'].append(line)
224 blocks[i:i+1] = fields
230 def finddefinitionlists(blocks):
231 """Finds definition lists.
233 The blocks must have a 'type' field, i.e., they should have been
234 run through findliteralblocks first.
237 while i < len(blocks):
238 # Searching for a paragraph that looks like this:
240 # +----------------------------+
242 # +--+-------------------------+--+
244 # | (body elements)+ |
245 # +----------------------------+
246 if (blocks[i]['type'] == 'paragraph' and
247 len(blocks[i]['lines']) > 1 and
248 not blocks[i]['lines'][0].startswith(' ') and
249 blocks[i]['lines'][1].startswith(' ')):
251 for line in blocks[i]['lines']:
252 if not line.startswith(' '):
253 definitions.append(dict(type='definition', lines=[],
254 indent=blocks[i]['indent']))
255 definitions[-1]['lines'].append(line)
256 definitions[-1]['hang'] = len(line) - len(line.lstrip())
257 blocks[i:i+1] = definitions
258 i += len(definitions) - 1
263 def addmargins(blocks):
264 """Adds empty blocks for vertical spacing.
266 This groups bullets, options, and definitions together with no vertical
267 space between them, and adds an empty block between all other blocks.
270 while i < len(blocks):
271 if (blocks[i]['type'] == blocks[i-1]['type'] and
272 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
275 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
280 def formatblock(block, width):
281 """Format a block according to width."""
282 indent = ' ' * block['indent']
283 if block['type'] == 'margin':
285 elif block['type'] == 'literal':
287 return indent + ('\n' + indent).join(block['lines'])
288 elif block['type'] == 'section':
289 return indent + ('\n' + indent).join(block['lines'])
290 elif block['type'] == 'definition':
291 term = indent + block['lines'][0]
292 defindent = indent + block['hang'] * ' '
293 text = ' '.join(map(str.strip, block['lines'][1:]))
294 return "%s\n%s" % (term, textwrap.fill(text, width=width,
295 initial_indent=defindent,
296 subsequent_indent=defindent))
298 initindent = subindent = indent
299 text = ' '.join(map(str.strip, block['lines']))
300 if block['type'] == 'bullet':
301 initindent = indent + '- '
302 subindent = indent + ' '
303 elif block['type'] in ('option', 'field'):
304 subindent = indent + block['width'] * ' '
306 return textwrap.fill(text, width=width,
307 initial_indent=initindent,
308 subsequent_indent=subindent)
311 def format(text, width):
312 """Parse and format the text according to width."""
313 blocks = findblocks(text)
314 blocks = findliteralblocks(blocks)
315 blocks = findsections(blocks)
316 blocks = findbulletlists(blocks)
317 blocks = findoptionlists(blocks)
318 blocks = findfieldlists(blocks)
319 blocks = finddefinitionlists(blocks)
320 blocks = addmargins(blocks)
321 return '\n'.join(formatblock(b, width) for b in blocks)
324 if __name__ == "__main__":
325 from pprint import pprint
327 def debug(func, blocks):
328 blocks = func(blocks)
329 print "*** after %s:" % func.__name__
334 text = open(sys.argv[1]).read()
335 blocks = debug(findblocks, text)
336 blocks = debug(findliteralblocks, blocks)
337 blocks = debug(findsections, blocks)
338 blocks = debug(findbulletlists, blocks)
339 blocks = debug(findoptionlists, blocks)
340 blocks = debug(findfieldlists, blocks)
341 blocks = debug(finddefinitionlists, blocks)
342 blocks = debug(addmargins, blocks)
343 print '\n'.join(formatblock(b, 30) for b in blocks)