Please file new bugs on Launchpad: Invirt or XVM (if you're not sure which, just pick one)

Context Navigation

source: trunk/packages/pyyaml/lib/yaml/emitter.py @ 1274

Last change on this file since 1274 was 898, checked in by hartmans, 16 years ago
Add pyyaml and libyaml packages backported from lenny. There is discussion about how these should go in the repository; these are added in this form in order to make forward progress.
File size: 43.3 KB

Line
1
2	# Emitter expects events obeying the following grammar:
3	# stream ::= STREAM-START document* STREAM-END
4	# document ::= DOCUMENT-START node DOCUMENT-END
5	# node ::= SCALAR \| sequence \| mapping
6	# sequence ::= SEQUENCE-START node* SEQUENCE-END
7	# mapping ::= MAPPING-START (node node)* MAPPING-END
8
9	__all__ = ['Emitter', 'EmitterError']
10
11	from error import YAMLError
12	from events import *
13
14	import re
15
16	class EmitterError(YAMLError):
17	pass
18
19	class ScalarAnalysis(object):
20	def __init__(self, scalar, empty, multiline,
21	allow_flow_plain, allow_block_plain,
22	allow_single_quoted, allow_double_quoted,
23	allow_block):
24	self.scalar = scalar
25	self.empty = empty
26	self.multiline = multiline
27	self.allow_flow_plain = allow_flow_plain
28	self.allow_block_plain = allow_block_plain
29	self.allow_single_quoted = allow_single_quoted
30	self.allow_double_quoted = allow_double_quoted
31	self.allow_block = allow_block
32
33	class Emitter(object):
34
35	DEFAULT_TAG_PREFIXES = {
36	u'!' : u'!',
37	u'tag:yaml.org,2002:' : u'!!',
38	}
39
40	def __init__(self, stream, canonical=None, indent=None, width=None,
41	allow_unicode=None, line_break=None):
42
43	# The stream should have the methods `write` and possibly `flush`.
44	self.stream = stream
45
46	# Encoding can be overriden by STREAM-START.
47	self.encoding = None
48
49	# Emitter is a state machine with a stack of states to handle nested
50	# structures.
51	self.states = []
52	self.state = self.expect_stream_start
53
54	# Current event and the event queue.
55	self.events = []
56	self.event = None
57
58	# The current indentation level and the stack of previous indents.
59	self.indents = []
60	self.indent = None
61
62	# Flow level.
63	self.flow_level = 0
64
65	# Contexts.
66	self.root_context = False
67	self.sequence_context = False
68	self.mapping_context = False
69	self.simple_key_context = False
70
71	# Characteristics of the last emitted character:
72	# - current position.
73	# - is it a whitespace?
74	# - is it an indention character
75	# (indentation space, '-', '?', or ':')?
76	self.line = 0
77	self.column = 0
78	self.whitespace = True
79	self.indention = True
80
81	# Formatting details.
82	self.canonical = canonical
83	self.allow_unicode = allow_unicode
84	self.best_indent = 2
85	if indent and 1 < indent < 10:
86	self.best_indent = indent
87	self.best_width = 80
88	if width and width > self.best_indent*2:
89	self.best_width = width
90	self.best_line_break = u'\n'
91	if line_break in [u'\r', u'\n', u'\r\n']:
92	self.best_line_break = line_break
93
94	# Tag prefixes.
95	self.tag_prefixes = None
96
97	# Prepared anchor and tag.
98	self.prepared_anchor = None
99	self.prepared_tag = None
100
101	# Scalar analysis and style.
102	self.analysis = None
103	self.style = None
104
105	def emit(self, event):
106	self.events.append(event)
107	while not self.need_more_events():
108	self.event = self.events.pop(0)
109	self.state()
110	self.event = None
111
112	# In some cases, we wait for a few next events before emitting.
113
114	def need_more_events(self):
115	if not self.events:
116	return True
117	event = self.events[0]
118	if isinstance(event, DocumentStartEvent):
119	return self.need_events(1)
120	elif isinstance(event, SequenceStartEvent):
121	return self.need_events(2)
122	elif isinstance(event, MappingStartEvent):
123	return self.need_events(3)
124	else:
125	return False
126
127	def need_events(self, count):
128	level = 0
129	for event in self.events[1:]:
130	if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
131	level += 1
132	elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
133	level -= 1
134	elif isinstance(event, StreamEndEvent):
135	level = -1
136	if level < 0:
137	return False
138	return (len(self.events) < count+1)
139
140	def increase_indent(self, flow=False, indentless=False):
141	self.indents.append(self.indent)
142	if self.indent is None:
143	if flow:
144	self.indent = self.best_indent
145	else:
146	self.indent = 0
147	elif not indentless:
148	self.indent += self.best_indent
149
150	# States.
151
152	# Stream handlers.
153
154	def expect_stream_start(self):
155	if isinstance(self.event, StreamStartEvent):
156	if self.event.encoding:
157	self.encoding = self.event.encoding
158	self.write_stream_start()
159	self.state = self.expect_first_document_start
160	else:
161	raise EmitterError("expected StreamStartEvent, but got %s"
162	% self.event)
163
164	def expect_nothing(self):
165	raise EmitterError("expected nothing, but got %s" % self.event)
166
167	# Document handlers.
168
169	def expect_first_document_start(self):
170	return self.expect_document_start(first=True)
171
172	def expect_document_start(self, first=False):
173	if isinstance(self.event, DocumentStartEvent):
174	if self.event.version:
175	version_text = self.prepare_version(self.event.version)
176	self.write_version_directive(version_text)
177	self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
178	if self.event.tags:
179	handles = self.event.tags.keys()
180	handles.sort()
181	for handle in handles:
182	prefix = self.event.tags[handle]
183	self.tag_prefixes[prefix] = handle
184	handle_text = self.prepare_tag_handle(handle)
185	prefix_text = self.prepare_tag_prefix(prefix)
186	self.write_tag_directive(handle_text, prefix_text)
187	implicit = (first and not self.event.explicit and not self.canonical
188	and not self.event.version and not self.event.tags
189	and not self.check_empty_document())
190	if not implicit:
191	self.write_indent()
192	self.write_indicator(u'---', True)
193	if self.canonical:
194	self.write_indent()
195	self.state = self.expect_document_root
196	elif isinstance(self.event, StreamEndEvent):
197	self.write_stream_end()
198	self.state = self.expect_nothing
199	else:
200	raise EmitterError("expected DocumentStartEvent, but got %s"
201	% self.event)
202
203	def expect_document_end(self):
204	if isinstance(self.event, DocumentEndEvent):
205	self.write_indent()
206	if self.event.explicit:
207	self.write_indicator(u'...', True)
208	self.write_indent()
209	self.flush_stream()
210	self.state = self.expect_document_start
211	else:
212	raise EmitterError("expected DocumentEndEvent, but got %s"
213	% self.event)
214
215	def expect_document_root(self):
216	self.states.append(self.expect_document_end)
217	self.expect_node(root=True)
218
219	# Node handlers.
220
221	def expect_node(self, root=False, sequence=False, mapping=False,
222	simple_key=False):
223	self.root_context = root
224	self.sequence_context = sequence
225	self.mapping_context = mapping
226	self.simple_key_context = simple_key
227	if isinstance(self.event, AliasEvent):
228	self.expect_alias()
229	elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
230	self.process_anchor(u'&')
231	self.process_tag()
232	if isinstance(self.event, ScalarEvent):
233	self.expect_scalar()
234	elif isinstance(self.event, SequenceStartEvent):
235	if self.flow_level or self.canonical or self.event.flow_style \
236	or self.check_empty_sequence():
237	self.expect_flow_sequence()
238	else:
239	self.expect_block_sequence()
240	elif isinstance(self.event, MappingStartEvent):
241	if self.flow_level or self.canonical or self.event.flow_style \
242	or self.check_empty_mapping():
243	self.expect_flow_mapping()
244	else:
245	self.expect_block_mapping()
246	else:
247	raise EmitterError("expected NodeEvent, but got %s" % self.event)
248
249	def expect_alias(self):
250	if self.event.anchor is None:
251	raise EmitterError("anchor is not specified for alias")
252	self.process_anchor(u'*')
253	self.state = self.states.pop()
254
255	def expect_scalar(self):
256	self.increase_indent(flow=True)
257	self.process_scalar()
258	self.indent = self.indents.pop()
259	self.state = self.states.pop()
260
261	# Flow sequence handlers.
262
263	def expect_flow_sequence(self):
264	self.write_indicator(u'[', True, whitespace=True)
265	self.flow_level += 1
266	self.increase_indent(flow=True)
267	self.state = self.expect_first_flow_sequence_item
268
269	def expect_first_flow_sequence_item(self):
270	if isinstance(self.event, SequenceEndEvent):
271	self.indent = self.indents.pop()
272	self.flow_level -= 1
273	self.write_indicator(u']', False)
274	self.state = self.states.pop()
275	else:
276	if self.canonical or self.column > self.best_width:
277	self.write_indent()
278	self.states.append(self.expect_flow_sequence_item)
279	self.expect_node(sequence=True)
280
281	def expect_flow_sequence_item(self):
282	if isinstance(self.event, SequenceEndEvent):
283	self.indent = self.indents.pop()
284	self.flow_level -= 1
285	if self.canonical:
286	self.write_indicator(u',', False)
287	self.write_indent()
288	self.write_indicator(u']', False)
289	self.state = self.states.pop()
290	else:
291	self.write_indicator(u',', False)
292	if self.canonical or self.column > self.best_width:
293	self.write_indent()
294	self.states.append(self.expect_flow_sequence_item)
295	self.expect_node(sequence=True)
296
297	# Flow mapping handlers.
298
299	def expect_flow_mapping(self):
300	self.write_indicator(u'{', True, whitespace=True)
301	self.flow_level += 1
302	self.increase_indent(flow=True)
303	self.state = self.expect_first_flow_mapping_key
304
305	def expect_first_flow_mapping_key(self):
306	if isinstance(self.event, MappingEndEvent):
307	self.indent = self.indents.pop()
308	self.flow_level -= 1
309	self.write_indicator(u'}', False)
310	self.state = self.states.pop()
311	else:
312	if self.canonical or self.column > self.best_width:
313	self.write_indent()
314	if not self.canonical and self.check_simple_key():
315	self.states.append(self.expect_flow_mapping_simple_value)
316	self.expect_node(mapping=True, simple_key=True)
317	else:
318	self.write_indicator(u'?', True)
319	self.states.append(self.expect_flow_mapping_value)
320	self.expect_node(mapping=True)
321
322	def expect_flow_mapping_key(self):
323	if isinstance(self.event, MappingEndEvent):
324	self.indent = self.indents.pop()
325	self.flow_level -= 1
326	if self.canonical:
327	self.write_indicator(u',', False)
328	self.write_indent()
329	self.write_indicator(u'}', False)
330	self.state = self.states.pop()
331	else:
332	self.write_indicator(u',', False)
333	if self.canonical or self.column > self.best_width:
334	self.write_indent()
335	if not self.canonical and self.check_simple_key():
336	self.states.append(self.expect_flow_mapping_simple_value)
337	self.expect_node(mapping=True, simple_key=True)
338	else:
339	self.write_indicator(u'?', True)
340	self.states.append(self.expect_flow_mapping_value)
341	self.expect_node(mapping=True)
342
343	def expect_flow_mapping_simple_value(self):
344	self.write_indicator(u':', False)
345	self.states.append(self.expect_flow_mapping_key)
346	self.expect_node(mapping=True)
347
348	def expect_flow_mapping_value(self):
349	if self.canonical or self.column > self.best_width:
350	self.write_indent()
351	self.write_indicator(u':', True)
352	self.states.append(self.expect_flow_mapping_key)
353	self.expect_node(mapping=True)
354
355	# Block sequence handlers.
356
357	def expect_block_sequence(self):
358	indentless = (self.mapping_context and not self.indention)
359	self.increase_indent(flow=False, indentless=indentless)
360	self.state = self.expect_first_block_sequence_item
361
362	def expect_first_block_sequence_item(self):
363	return self.expect_block_sequence_item(first=True)
364
365	def expect_block_sequence_item(self, first=False):
366	if not first and isinstance(self.event, SequenceEndEvent):
367	self.indent = self.indents.pop()
368	self.state = self.states.pop()
369	else:
370	self.write_indent()
371	self.write_indicator(u'-', True, indention=True)
372	self.states.append(self.expect_block_sequence_item)
373	self.expect_node(sequence=True)
374
375	# Block mapping handlers.
376
377	def expect_block_mapping(self):
378	self.increase_indent(flow=False)
379	self.state = self.expect_first_block_mapping_key
380
381	def expect_first_block_mapping_key(self):
382	return self.expect_block_mapping_key(first=True)
383
384	def expect_block_mapping_key(self, first=False):
385	if not first and isinstance(self.event, MappingEndEvent):
386	self.indent = self.indents.pop()
387	self.state = self.states.pop()
388	else:
389	self.write_indent()
390	if self.check_simple_key():
391	self.states.append(self.expect_block_mapping_simple_value)
392	self.expect_node(mapping=True, simple_key=True)
393	else:
394	self.write_indicator(u'?', True, indention=True)
395	self.states.append(self.expect_block_mapping_value)
396	self.expect_node(mapping=True)
397
398	def expect_block_mapping_simple_value(self):
399	self.write_indicator(u':', False)
400	self.states.append(self.expect_block_mapping_key)
401	self.expect_node(mapping=True)
402
403	def expect_block_mapping_value(self):
404	self.write_indent()
405	self.write_indicator(u':', True, indention=True)
406	self.states.append(self.expect_block_mapping_key)
407	self.expect_node(mapping=True)
408
409	# Checkers.
410
411	def check_empty_sequence(self):
412	return (isinstance(self.event, SequenceStartEvent) and self.events
413	and isinstance(self.events[0], SequenceEndEvent))
414
415	def check_empty_mapping(self):
416	return (isinstance(self.event, MappingStartEvent) and self.events
417	and isinstance(self.events[0], MappingEndEvent))
418
419	def check_empty_document(self):
420	if not isinstance(self.event, DocumentStartEvent) or not self.events:
421	return False
422	event = self.events[0]
423	return (isinstance(event, ScalarEvent) and event.anchor is None
424	and event.tag is None and event.implicit and event.value == u'')
425
426	def check_simple_key(self):
427	length = 0
428	if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
429	if self.prepared_anchor is None:
430	self.prepared_anchor = self.prepare_anchor(self.event.anchor)
431	length += len(self.prepared_anchor)
432	if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
433	and self.event.tag is not None:
434	if self.prepared_tag is None:
435	self.prepared_tag = self.prepare_tag(self.event.tag)
436	length += len(self.prepared_tag)
437	if isinstance(self.event, ScalarEvent):
438	if self.analysis is None:
439	self.analysis = self.analyze_scalar(self.event.value)
440	length += len(self.analysis.scalar)
441	return (length < 128 and (isinstance(self.event, AliasEvent)
442	or (isinstance(self.event, ScalarEvent)
443	and not self.analysis.empty and not self.analysis.multiline)
444	or self.check_empty_sequence() or self.check_empty_mapping()))
445
446	# Anchor, Tag, and Scalar processors.
447
448	def process_anchor(self, indicator):
449	if self.event.anchor is None:
450	self.prepared_anchor = None
451	return
452	if self.prepared_anchor is None:
453	self.prepared_anchor = self.prepare_anchor(self.event.anchor)
454	if self.prepared_anchor:
455	self.write_indicator(indicator+self.prepared_anchor, True)
456	self.prepared_anchor = None
457
458	def process_tag(self):
459	tag = self.event.tag
460	if isinstance(self.event, ScalarEvent):
461	if self.style is None:
462	self.style = self.choose_scalar_style()
463	if ((not self.canonical or tag is None) and
464	((self.style == '' and self.event.implicit[0])
465	or (self.style != '' and self.event.implicit[1]))):
466	self.prepared_tag = None
467	return
468	if self.event.implicit[0] and tag is None:
469	tag = u'!'
470	self.prepared_tag = None
471	else:
472	if (not self.canonical or tag is None) and self.event.implicit:
473	self.prepared_tag = None
474	return
475	if tag is None:
476	raise EmitterError("tag is not specified")
477	if self.prepared_tag is None:
478	self.prepared_tag = self.prepare_tag(tag)
479	if self.prepared_tag:
480	self.write_indicator(self.prepared_tag, True)
481	self.prepared_tag = None
482
483	def choose_scalar_style(self):
484	if self.analysis is None:
485	self.analysis = self.analyze_scalar(self.event.value)
486	if self.event.style == '"' or self.canonical:
487	return '"'
488	if not self.event.style and self.event.implicit[0]:
489	if (not (self.simple_key_context and
490	(self.analysis.empty or self.analysis.multiline))
491	and (self.flow_level and self.analysis.allow_flow_plain
492	or (not self.flow_level and self.analysis.allow_block_plain))):
493	return ''
494	if self.event.style and self.event.style in '\|>':
495	if (not self.flow_level and not self.simple_key_context
496	and self.analysis.allow_block):
497	return self.event.style
498	if not self.event.style or self.event.style == '\'':
499	if (self.analysis.allow_single_quoted and
500	not (self.simple_key_context and self.analysis.multiline)):
501	return '\''
502	return '"'
503
504	def process_scalar(self):
505	if self.analysis is None:
506	self.analysis = self.analyze_scalar(self.event.value)
507	if self.style is None:
508	self.style = self.choose_scalar_style()
509	split = (not self.simple_key_context)
510	#if self.analysis.multiline and split \
511	# and (not self.style or self.style in '\'\"'):
512	# self.write_indent()
513	if self.style == '"':
514	self.write_double_quoted(self.analysis.scalar, split)
515	elif self.style == '\'':
516	self.write_single_quoted(self.analysis.scalar, split)
517	elif self.style == '>':
518	self.write_folded(self.analysis.scalar)
519	elif self.style == '\|':
520	self.write_literal(self.analysis.scalar)
521	else:
522	self.write_plain(self.analysis.scalar, split)
523	self.analysis = None
524	self.style = None
525
526	# Analyzers.
527
528	def prepare_version(self, version):
529	major, minor = version
530	if major != 1:
531	raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
532	return u'%d.%d' % (major, minor)
533
534	def prepare_tag_handle(self, handle):
535	if not handle:
536	raise EmitterError("tag handle must not be empty")
537	if handle[0] != u'!' or handle[-1] != u'!':
538	raise EmitterError("tag handle must start and end with '!': %r"
539	% (handle.encode('utf-8')))
540	for ch in handle[1:-1]:
541	if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
542	or ch in u'-_'):
543	raise EmitterError("invalid character %r in the tag handle: %r"
544	% (ch.encode('utf-8'), handle.encode('utf-8')))
545	return handle
546
547	def prepare_tag_prefix(self, prefix):
548	if not prefix:
549	raise EmitterError("tag prefix must not be empty")
550	chunks = []
551	start = end = 0
552	if prefix[0] == u'!':
553	end = 1
554	while end < len(prefix):
555	ch = prefix[end]
556	if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
557	or ch in u'-;/?!:@&=+$,_.~*\'()[]':
558	end += 1
559	else:
560	if start < end:
561	chunks.append(prefix[start:end])
562	start = end = end+1
563	data = ch.encode('utf-8')
564	for ch in data:
565	chunks.append(u'%%%02X' % ord(ch))
566	if start < end:
567	chunks.append(prefix[start:end])
568	return u''.join(chunks)
569
570	def prepare_tag(self, tag):
571	if not tag:
572	raise EmitterError("tag must not be empty")
573	if tag == u'!':
574	return tag
575	handle = None
576	suffix = tag
577	for prefix in self.tag_prefixes:
578	if tag.startswith(prefix) \
579	and (prefix == u'!' or len(prefix) < len(tag)):
580	handle = self.tag_prefixes[prefix]
581	suffix = tag[len(prefix):]
582	chunks = []
583	start = end = 0
584	while end < len(suffix):
585	ch = suffix[end]
586	if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
587	or ch in u'-;/?:@&=+$,_.~*\'()[]' \
588	or (ch == u'!' and handle != u'!'):
589	end += 1
590	else:
591	if start < end:
592	chunks.append(suffix[start:end])
593	start = end = end+1
594	data = ch.encode('utf-8')
595	for ch in data:
596	chunks.append(u'%%%02X' % ord(ch))
597	if start < end:
598	chunks.append(suffix[start:end])
599	suffix_text = u''.join(chunks)
600	if handle:
601	return u'%s%s' % (handle, suffix_text)
602	else:
603	return u'!<%s>' % suffix_text
604
605	def prepare_anchor(self, anchor):
606	if not anchor:
607	raise EmitterError("anchor must not be empty")
608	for ch in anchor:
609	if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
610	or ch in u'-_'):
611	raise EmitterError("invalid character %r in the anchor: %r"
612	% (ch.encode('utf-8'), anchor.encode('utf-8')))
613	return anchor
614
615	def analyze_scalar(self, scalar):
616
617	# Empty scalar is a special case.
618	if not scalar:
619	return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
620	allow_flow_plain=False, allow_block_plain=True,
621	allow_single_quoted=True, allow_double_quoted=True,
622	allow_block=False)
623
624	# Indicators and special characters.
625	block_indicators = False
626	flow_indicators = False
627	line_breaks = False
628	special_characters = False
629
630	# Whitespaces.
631	inline_spaces = False # non-space space+ non-space
632	inline_breaks = False # non-space break+ non-space
633	leading_spaces = False # ^ space+ (non-space \| $)
634	leading_breaks = False # ^ break+ (non-space \| $)
635	trailing_spaces = False # (^ \| non-space) space+ $
636	trailing_breaks = False # (^ \| non-space) break+ $
637	inline_breaks_spaces = False # non-space break+ space+ non-space
638	mixed_breaks_spaces = False # anything else
639
640	# Check document indicators.
641	if scalar.startswith(u'---') or scalar.startswith(u'...'):
642	block_indicators = True
643	flow_indicators = True
644
645	# First character or preceded by a whitespace.
646	preceeded_by_space = True
647
648	# Last character or followed by a whitespace.
649	followed_by_space = (len(scalar) == 1 or
650	scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
651
652	# The current series of whitespaces contain plain spaces.
653	spaces = False
654
655	# The current series of whitespaces contain line breaks.
656	breaks = False
657
658	# The current series of whitespaces contain a space followed by a
659	# break.
660	mixed = False
661
662	# The current series of whitespaces start at the beginning of the
663	# scalar.
664	leading = False
665
666	index = 0
667	while index < len(scalar):
668	ch = scalar[index]
669
670	# Check for indicators.
671
672	if index == 0:
673	# Leading indicators are special characters.
674	if ch in u'#,[]{}&*!\|>\'\"%@`':
675	flow_indicators = True
676	block_indicators = True
677	if ch in u'?:':
678	flow_indicators = True
679	if followed_by_space:
680	block_indicators = True
681	if ch == u'-' and followed_by_space:
682	flow_indicators = True
683	block_indicators = True
684	else:
685	# Some indicators cannot appear within a scalar as well.
686	if ch in u',?[]{}':
687	flow_indicators = True
688	if ch == u':':
689	flow_indicators = True
690	if followed_by_space:
691	block_indicators = True
692	if ch == u'#' and preceeded_by_space:
693	flow_indicators = True
694	block_indicators = True
695
696	# Check for line breaks, special, and unicode characters.
697
698	if ch in u'\n\x85\u2028\u2029':
699	line_breaks = True
700	if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
701	if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
702	or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
703	unicode_characters = True
704	if not self.allow_unicode:
705	special_characters = True
706	else:
707	special_characters = True
708
709	# Spaces, line breaks, and how they are mixed. State machine.
710
711	# Start or continue series of whitespaces.
712	if ch in u' \n\x85\u2028\u2029':
713	if spaces and breaks:
714	if ch != u' ': # break+ (space+ break+) => mixed
715	mixed = True
716	elif spaces:
717	if ch != u' ': # (space+ break+) => mixed
718	breaks = True
719	mixed = True
720	elif breaks:
721	if ch == u' ': # break+ space+
722	spaces = True
723	else:
724	leading = (index == 0)
725	if ch == u' ': # space+
726	spaces = True
727	else: # break+
728	breaks = True
729
730	# Series of whitespaces ended with a non-space.
731	elif spaces or breaks:
732	if leading:
733	if spaces and breaks:
734	mixed_breaks_spaces = True
735	elif spaces:
736	leading_spaces = True
737	elif breaks:
738	leading_breaks = True
739	else:
740	if mixed:
741	mixed_breaks_spaces = True
742	elif spaces and breaks:
743	inline_breaks_spaces = True
744	elif spaces:
745	inline_spaces = True
746	elif breaks:
747	inline_breaks = True
748	spaces = breaks = mixed = leading = False
749
750	# Series of whitespaces reach the end.
751	if (spaces or breaks) and (index == len(scalar)-1):
752	if spaces and breaks:
753	mixed_breaks_spaces = True
754	elif spaces:
755	trailing_spaces = True
756	if leading:
757	leading_spaces = True
758	elif breaks:
759	trailing_breaks = True
760	if leading:
761	leading_breaks = True
762	spaces = breaks = mixed = leading = False
763
764	# Prepare for the next character.
765	index += 1
766	preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
767	followed_by_space = (index+1 >= len(scalar) or
768	scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
769
770	# Let's decide what styles are allowed.
771	allow_flow_plain = True
772	allow_block_plain = True
773	allow_single_quoted = True
774	allow_double_quoted = True
775	allow_block = True
776
777	# Leading and trailing whitespace are bad for plain scalars. We also
778	# do not want to mess with leading whitespaces for block scalars.
779	if leading_spaces or leading_breaks or trailing_spaces:
780	allow_flow_plain = allow_block_plain = allow_block = False
781
782	# Trailing breaks are fine for block scalars, but unacceptable for
783	# plain scalars.
784	if trailing_breaks:
785	allow_flow_plain = allow_block_plain = False
786
787	# The combination of (space+ break+) is only acceptable for block
788	# scalars.
789	if inline_breaks_spaces:
790	allow_flow_plain = allow_block_plain = allow_single_quoted = False
791
792	# Mixed spaces and breaks, as well as special character are only
793	# allowed for double quoted scalars.
794	if mixed_breaks_spaces or special_characters:
795	allow_flow_plain = allow_block_plain = \
796	allow_single_quoted = allow_block = False
797
798	# We don't emit multiline plain scalars.
799	if line_breaks:
800	allow_flow_plain = allow_block_plain = False
801
802	# Flow indicators are forbidden for flow plain scalars.
803	if flow_indicators:
804	allow_flow_plain = False
805
806	# Block indicators are forbidden for block plain scalars.
807	if block_indicators:
808	allow_block_plain = False
809
810	return ScalarAnalysis(scalar=scalar,
811	empty=False, multiline=line_breaks,
812	allow_flow_plain=allow_flow_plain,
813	allow_block_plain=allow_block_plain,
814	allow_single_quoted=allow_single_quoted,
815	allow_double_quoted=allow_double_quoted,
816	allow_block=allow_block)
817
818	# Writers.
819
820	def flush_stream(self):
821	if hasattr(self.stream, 'flush'):
822	self.stream.flush()
823
824	def write_stream_start(self):
825	# Write BOM if needed.
826	if self.encoding and self.encoding.startswith('utf-16'):
827	self.stream.write(u'\xFF\xFE'.encode(self.encoding))
828
829	def write_stream_end(self):
830	self.flush_stream()
831
832	def write_indicator(self, indicator, need_whitespace,
833	whitespace=False, indention=False):
834	if self.whitespace or not need_whitespace:
835	data = indicator
836	else:
837	data = u' '+indicator
838	self.whitespace = whitespace
839	self.indention = self.indention and indention
840	self.column += len(data)
841	if self.encoding:
842	data = data.encode(self.encoding)
843	self.stream.write(data)
844
845	def write_indent(self):
846	indent = self.indent or 0
847	if not self.indention or self.column > indent \
848	or (self.column == indent and not self.whitespace):
849	self.write_line_break()
850	if self.column < indent:
851	self.whitespace = True
852	data = u' '*(indent-self.column)
853	self.column = indent
854	if self.encoding:
855	data = data.encode(self.encoding)
856	self.stream.write(data)
857
858	def write_line_break(self, data=None):
859	if data is None:
860	data = self.best_line_break
861	self.whitespace = True
862	self.indention = True
863	self.line += 1
864	self.column = 0
865	if self.encoding:
866	data = data.encode(self.encoding)
867	self.stream.write(data)
868
869	def write_version_directive(self, version_text):
870	data = u'%%YAML %s' % version_text
871	if self.encoding:
872	data = data.encode(self.encoding)
873	self.stream.write(data)
874	self.write_line_break()
875
876	def write_tag_directive(self, handle_text, prefix_text):
877	data = u'%%TAG %s %s' % (handle_text, prefix_text)
878	if self.encoding:
879	data = data.encode(self.encoding)
880	self.stream.write(data)
881	self.write_line_break()
882
883	# Scalar streams.
884
885	def write_single_quoted(self, text, split=True):
886	self.write_indicator(u'\'', True)
887	spaces = False
888	breaks = False
889	start = end = 0
890	while end <= len(text):
891	ch = None
892	if end < len(text):
893	ch = text[end]
894	if spaces:
895	if ch is None or ch != u' ':
896	if start+1 == end and self.column > self.best_width and split \
897	and start != 0 and end != len(text):
898	self.write_indent()
899	else:
900	data = text[start:end]
901	self.column += len(data)
902	if self.encoding:
903	data = data.encode(self.encoding)
904	self.stream.write(data)
905	start = end
906	elif breaks:
907	if ch is None or ch not in u'\n\x85\u2028\u2029':
908	if text[start] == u'\n':
909	self.write_line_break()
910	for br in text[start:end]:
911	if br == u'\n':
912	self.write_line_break()
913	else:
914	self.write_line_break(br)
915	self.write_indent()
916	start = end
917	else:
918	if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
919	if start < end:
920	data = text[start:end]
921	self.column += len(data)
922	if self.encoding:
923	data = data.encode(self.encoding)
924	self.stream.write(data)
925	start = end
926	if ch == u'\'':
927	data = u'\'\''
928	self.column += 2
929	if self.encoding:
930	data = data.encode(self.encoding)
931	self.stream.write(data)
932	start = end + 1
933	if ch is not None:
934	spaces = (ch == u' ')
935	breaks = (ch in u'\n\x85\u2028\u2029')
936	end += 1
937	self.write_indicator(u'\'', False)
938
939	ESCAPE_REPLACEMENTS = {
940	u'\0': u'0',
941	u'\x07': u'a',
942	u'\x08': u'b',
943	u'\x09': u't',
944	u'\x0A': u'n',
945	u'\x0B': u'v',
946	u'\x0C': u'f',
947	u'\x0D': u'r',
948	u'\x1B': u'e',
949	u'\"': u'\"',
950	u'\\': u'\\',
951	u'\x85': u'N',
952	u'\xA0': u'_',
953	u'\u2028': u'L',
954	u'\u2029': u'P',
955	}
956
957	def write_double_quoted(self, text, split=True):
958	self.write_indicator(u'"', True)
959	start = end = 0
960	while end <= len(text):
961	ch = None
962	if end < len(text):
963	ch = text[end]
964	if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
965	or not (u'\x20' <= ch <= u'\x7E'
966	or (self.allow_unicode
967	and (u'\xA0' <= ch <= u'\uD7FF'
968	or u'\uE000' <= ch <= u'\uFFFD'))):
969	if start < end:
970	data = text[start:end]
971	self.column += len(data)
972	if self.encoding:
973	data = data.encode(self.encoding)
974	self.stream.write(data)
975	start = end
976	if ch is not None:
977	if ch in self.ESCAPE_REPLACEMENTS:
978	data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
979	elif ch <= u'\xFF':
980	data = u'\\x%02X' % ord(ch)
981	elif ch <= u'\uFFFF':
982	data = u'\\u%04X' % ord(ch)
983	else:
984	data = u'\\U%08X' % ord(ch)
985	self.column += len(data)
986	if self.encoding:
987	data = data.encode(self.encoding)
988	self.stream.write(data)
989	start = end+1
990	if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
991	and self.column+(end-start) > self.best_width and split:
992	data = text[start:end]+u'\\'
993	if start < end:
994	start = end
995	self.column += len(data)
996	if self.encoding:
997	data = data.encode(self.encoding)
998	self.stream.write(data)
999	self.write_indent()
1000	self.whitespace = False
1001	self.indention = False
1002	if text[start] == u' ':
1003	data = u'\\'
1004	self.column += len(data)
1005	if self.encoding:
1006	data = data.encode(self.encoding)
1007	self.stream.write(data)
1008	end += 1
1009	self.write_indicator(u'"', False)
1010
1011	def determine_chomp(self, text):
1012	tail = text[-2:]
1013	while len(tail) < 2:
1014	tail = u' '+tail
1015	if tail[-1] in u'\n\x85\u2028\u2029':
1016	if tail[-2] in u'\n\x85\u2028\u2029':
1017	return u'+'
1018	else:
1019	return u''
1020	else:
1021	return u'-'
1022
1023	def write_folded(self, text):
1024	chomp = self.determine_chomp(text)
1025	self.write_indicator(u'>'+chomp, True)
1026	self.write_indent()
1027	leading_space = False
1028	spaces = False
1029	breaks = False
1030	start = end = 0
1031	while end <= len(text):
1032	ch = None
1033	if end < len(text):
1034	ch = text[end]
1035	if breaks:
1036	if ch is None or ch not in u'\n\x85\u2028\u2029':
1037	if not leading_space and ch is not None and ch != u' ' \
1038	and text[start] == u'\n':
1039	self.write_line_break()
1040	leading_space = (ch == u' ')
1041	for br in text[start:end]:
1042	if br == u'\n':
1043	self.write_line_break()
1044	else:
1045	self.write_line_break(br)
1046	if ch is not None:
1047	self.write_indent()
1048	start = end
1049	elif spaces:
1050	if ch != u' ':
1051	if start+1 == end and self.column > self.best_width:
1052	self.write_indent()
1053	else:
1054	data = text[start:end]
1055	self.column += len(data)
1056	if self.encoding:
1057	data = data.encode(self.encoding)
1058	self.stream.write(data)
1059	start = end
1060	else:
1061	if ch is None or ch in u' \n\x85\u2028\u2029':
1062	data = text[start:end]
1063	if self.encoding:
1064	data = data.encode(self.encoding)
1065	self.stream.write(data)
1066	if ch is None:
1067	self.write_line_break()
1068	start = end
1069	if ch is not None:
1070	breaks = (ch in u'\n\x85\u2028\u2029')
1071	spaces = (ch == u' ')
1072	end += 1
1073
1074	def write_literal(self, text):
1075	chomp = self.determine_chomp(text)
1076	self.write_indicator(u'\|'+chomp, True)
1077	self.write_indent()
1078	breaks = False
1079	start = end = 0
1080	while end <= len(text):
1081	ch = None
1082	if end < len(text):
1083	ch = text[end]
1084	if breaks:
1085	if ch is None or ch not in u'\n\x85\u2028\u2029':
1086	for br in text[start:end]:
1087	if br == u'\n':
1088	self.write_line_break()
1089	else:
1090	self.write_line_break(br)
1091	if ch is not None:
1092	self.write_indent()
1093	start = end
1094	else:
1095	if ch is None or ch in u'\n\x85\u2028\u2029':
1096	data = text[start:end]
1097	if self.encoding:
1098	data = data.encode(self.encoding)
1099	self.stream.write(data)
1100	if ch is None:
1101	self.write_line_break()
1102	start = end
1103	if ch is not None:
1104	breaks = (ch in u'\n\x85\u2028\u2029')
1105	end += 1
1106
1107	def write_plain(self, text, split=True):
1108	if not text:
1109	return
1110	if not self.whitespace:
1111	data = u' '
1112	self.column += len(data)
1113	if self.encoding:
1114	data = data.encode(self.encoding)
1115	self.stream.write(data)
1116	self.writespace = False
1117	self.indention = False
1118	spaces = False
1119	breaks = False
1120	start = end = 0
1121	while end <= len(text):
1122	ch = None
1123	if end < len(text):
1124	ch = text[end]
1125	if spaces:
1126	if ch != u' ':
1127	if start+1 == end and self.column > self.best_width and split:
1128	self.write_indent()
1129	self.writespace = False
1130	self.indention = False
1131	else:
1132	data = text[start:end]
1133	self.column += len(data)
1134	if self.encoding:
1135	data = data.encode(self.encoding)
1136	self.stream.write(data)
1137	start = end
1138	elif breaks:
1139	if ch not in u'\n\x85\u2028\u2029':
1140	if text[start] == u'\n':
1141	self.write_line_break()
1142	for br in text[start:end]:
1143	if br == u'\n':
1144	self.write_line_break()
1145	else:
1146	self.write_line_break(br)
1147	self.write_indent()
1148	self.whitespace = False
1149	self.indention = False
1150	start = end
1151	else:
1152	if ch is None or ch in u' \n\x85\u2028\u2029':
1153	data = text[start:end]
1154	self.column += len(data)
1155	if self.encoding:
1156	data = data.encode(self.encoding)
1157	self.stream.write(data)
1158	start = end
1159	if ch is not None:
1160	spaces = (ch == u' ')
1161	breaks = (ch in u'\n\x85\u2028\u2029')
1162	end += 1
1163

Note: See TracBrowser for help on using the repository browser.

Download in other formats: