Please file new bugs on Launchpad: Invirt or XVM (if you're not sure which, just pick one)

Context Navigation

source: trunk/packages/pyyaml/lib/yaml/emitter.py @ 1937

Last change on this file since 1937 was 898, checked in by hartmans, 16 years ago
Add pyyaml and libyaml packages backported from lenny. There is discussion about how these should go in the repository; these are added in this form in order to make forward progress.
File size: 43.3 KB

Rev	Line
[898]	1
	2	# Emitter expects events obeying the following grammar:
	3	# stream ::= STREAM-START document* STREAM-END
	4	# document ::= DOCUMENT-START node DOCUMENT-END
	5	# node ::= SCALAR \| sequence \| mapping
	6	# sequence ::= SEQUENCE-START node* SEQUENCE-END
	7	# mapping ::= MAPPING-START (node node)* MAPPING-END
	8
	9	__all__ = ['Emitter', 'EmitterError']
	10
	11	from error import YAMLError
	12	from events import *
	13
	14	import re
	15
	16	class EmitterError(YAMLError):
	17	pass
	18
	19	class ScalarAnalysis(object):
	20	def __init__(self, scalar, empty, multiline,
	21	allow_flow_plain, allow_block_plain,
	22	allow_single_quoted, allow_double_quoted,
	23	allow_block):
	24	self.scalar = scalar
	25	self.empty = empty
	26	self.multiline = multiline
	27	self.allow_flow_plain = allow_flow_plain
	28	self.allow_block_plain = allow_block_plain
	29	self.allow_single_quoted = allow_single_quoted
	30	self.allow_double_quoted = allow_double_quoted
	31	self.allow_block = allow_block
	32
	33	class Emitter(object):
	34
	35	DEFAULT_TAG_PREFIXES = {
	36	u'!' : u'!',
	37	u'tag:yaml.org,2002:' : u'!!',
	38	}
	39
	40	def __init__(self, stream, canonical=None, indent=None, width=None,
	41	allow_unicode=None, line_break=None):
	42
	43	# The stream should have the methods `write` and possibly `flush`.
	44	self.stream = stream
	45
	46	# Encoding can be overriden by STREAM-START.
	47	self.encoding = None
	48
	49	# Emitter is a state machine with a stack of states to handle nested
	50	# structures.
	51	self.states = []
	52	self.state = self.expect_stream_start
	53
	54	# Current event and the event queue.
	55	self.events = []
	56	self.event = None
	57
	58	# The current indentation level and the stack of previous indents.
	59	self.indents = []
	60	self.indent = None
	61
	62	# Flow level.
	63	self.flow_level = 0
	64
	65	# Contexts.
	66	self.root_context = False
	67	self.sequence_context = False
	68	self.mapping_context = False
	69	self.simple_key_context = False
	70
	71	# Characteristics of the last emitted character:
	72	# - current position.
	73	# - is it a whitespace?
	74	# - is it an indention character
	75	# (indentation space, '-', '?', or ':')?
	76	self.line = 0
	77	self.column = 0
	78	self.whitespace = True
	79	self.indention = True
	80
	81	# Formatting details.
	82	self.canonical = canonical
	83	self.allow_unicode = allow_unicode
	84	self.best_indent = 2
	85	if indent and 1 < indent < 10:
	86	self.best_indent = indent
	87	self.best_width = 80
	88	if width and width > self.best_indent*2:
	89	self.best_width = width
	90	self.best_line_break = u'\n'
	91	if line_break in [u'\r', u'\n', u'\r\n']:
	92	self.best_line_break = line_break
	93
	94	# Tag prefixes.
	95	self.tag_prefixes = None
	96
	97	# Prepared anchor and tag.
	98	self.prepared_anchor = None
	99	self.prepared_tag = None
	100
	101	# Scalar analysis and style.
	102	self.analysis = None
	103	self.style = None
	104
	105	def emit(self, event):
	106	self.events.append(event)
	107	while not self.need_more_events():
	108	self.event = self.events.pop(0)
	109	self.state()
	110	self.event = None
	111
	112	# In some cases, we wait for a few next events before emitting.
	113
	114	def need_more_events(self):
	115	if not self.events:
	116	return True
	117	event = self.events[0]
	118	if isinstance(event, DocumentStartEvent):
	119	return self.need_events(1)
	120	elif isinstance(event, SequenceStartEvent):
	121	return self.need_events(2)
	122	elif isinstance(event, MappingStartEvent):
	123	return self.need_events(3)
	124	else:
	125	return False
	126
	127	def need_events(self, count):
	128	level = 0
	129	for event in self.events[1:]:
	130	if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
	131	level += 1
	132	elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
	133	level -= 1
	134	elif isinstance(event, StreamEndEvent):
	135	level = -1
	136	if level < 0:
	137	return False
	138	return (len(self.events) < count+1)
	139
	140	def increase_indent(self, flow=False, indentless=False):
	141	self.indents.append(self.indent)
	142	if self.indent is None:
	143	if flow:
	144	self.indent = self.best_indent
	145	else:
	146	self.indent = 0
	147	elif not indentless:
	148	self.indent += self.best_indent
	149
	150	# States.
	151
	152	# Stream handlers.
	153
	154	def expect_stream_start(self):
	155	if isinstance(self.event, StreamStartEvent):
	156	if self.event.encoding:
	157	self.encoding = self.event.encoding
	158	self.write_stream_start()
	159	self.state = self.expect_first_document_start
	160	else:
	161	raise EmitterError("expected StreamStartEvent, but got %s"
	162	% self.event)
	163
	164	def expect_nothing(self):
	165	raise EmitterError("expected nothing, but got %s" % self.event)
	166
	167	# Document handlers.
	168
	169	def expect_first_document_start(self):
	170	return self.expect_document_start(first=True)
	171
	172	def expect_document_start(self, first=False):
	173	if isinstance(self.event, DocumentStartEvent):
	174	if self.event.version:
	175	version_text = self.prepare_version(self.event.version)
	176	self.write_version_directive(version_text)
	177	self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
	178	if self.event.tags:
	179	handles = self.event.tags.keys()
	180	handles.sort()
	181	for handle in handles:
	182	prefix = self.event.tags[handle]
	183	self.tag_prefixes[prefix] = handle
	184	handle_text = self.prepare_tag_handle(handle)
	185	prefix_text = self.prepare_tag_prefix(prefix)
	186	self.write_tag_directive(handle_text, prefix_text)
	187	implicit = (first and not self.event.explicit and not self.canonical
	188	and not self.event.version and not self.event.tags
	189	and not self.check_empty_document())
	190	if not implicit:
	191	self.write_indent()
	192	self.write_indicator(u'---', True)
	193	if self.canonical:
	194	self.write_indent()
	195	self.state = self.expect_document_root
	196	elif isinstance(self.event, StreamEndEvent):
	197	self.write_stream_end()
	198	self.state = self.expect_nothing
	199	else:
	200	raise EmitterError("expected DocumentStartEvent, but got %s"
	201	% self.event)
	202
	203	def expect_document_end(self):
	204	if isinstance(self.event, DocumentEndEvent):
	205	self.write_indent()
	206	if self.event.explicit:
	207	self.write_indicator(u'...', True)
	208	self.write_indent()
	209	self.flush_stream()
	210	self.state = self.expect_document_start
	211	else:
	212	raise EmitterError("expected DocumentEndEvent, but got %s"
	213	% self.event)
	214
	215	def expect_document_root(self):
	216	self.states.append(self.expect_document_end)
	217	self.expect_node(root=True)
	218
	219	# Node handlers.
	220
	221	def expect_node(self, root=False, sequence=False, mapping=False,
	222	simple_key=False):
	223	self.root_context = root
	224	self.sequence_context = sequence
	225	self.mapping_context = mapping
	226	self.simple_key_context = simple_key
	227	if isinstance(self.event, AliasEvent):
	228	self.expect_alias()
	229	elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
	230	self.process_anchor(u'&')
	231	self.process_tag()
	232	if isinstance(self.event, ScalarEvent):
	233	self.expect_scalar()
	234	elif isinstance(self.event, SequenceStartEvent):
	235	if self.flow_level or self.canonical or self.event.flow_style \
	236	or self.check_empty_sequence():
	237	self.expect_flow_sequence()
	238	else:
	239	self.expect_block_sequence()
	240	elif isinstance(self.event, MappingStartEvent):
	241	if self.flow_level or self.canonical or self.event.flow_style \
	242	or self.check_empty_mapping():
	243	self.expect_flow_mapping()
	244	else:
	245	self.expect_block_mapping()
	246	else:
	247	raise EmitterError("expected NodeEvent, but got %s" % self.event)
	248
	249	def expect_alias(self):
	250	if self.event.anchor is None:
	251	raise EmitterError("anchor is not specified for alias")
	252	self.process_anchor(u'*')
	253	self.state = self.states.pop()
	254
	255	def expect_scalar(self):
	256	self.increase_indent(flow=True)
	257	self.process_scalar()
	258	self.indent = self.indents.pop()
	259	self.state = self.states.pop()
	260
	261	# Flow sequence handlers.
	262
	263	def expect_flow_sequence(self):
	264	self.write_indicator(u'[', True, whitespace=True)
	265	self.flow_level += 1
	266	self.increase_indent(flow=True)
	267	self.state = self.expect_first_flow_sequence_item
	268
	269	def expect_first_flow_sequence_item(self):
	270	if isinstance(self.event, SequenceEndEvent):
	271	self.indent = self.indents.pop()
	272	self.flow_level -= 1
	273	self.write_indicator(u']', False)
	274	self.state = self.states.pop()
	275	else:
	276	if self.canonical or self.column > self.best_width:
	277	self.write_indent()
	278	self.states.append(self.expect_flow_sequence_item)
	279	self.expect_node(sequence=True)
	280
	281	def expect_flow_sequence_item(self):
	282	if isinstance(self.event, SequenceEndEvent):
	283	self.indent = self.indents.pop()
	284	self.flow_level -= 1
	285	if self.canonical:
	286	self.write_indicator(u',', False)
	287	self.write_indent()
	288	self.write_indicator(u']', False)
	289	self.state = self.states.pop()
	290	else:
	291	self.write_indicator(u',', False)
	292	if self.canonical or self.column > self.best_width:
	293	self.write_indent()
	294	self.states.append(self.expect_flow_sequence_item)
	295	self.expect_node(sequence=True)
	296
	297	# Flow mapping handlers.
	298
	299	def expect_flow_mapping(self):
	300	self.write_indicator(u'{', True, whitespace=True)
	301	self.flow_level += 1
	302	self.increase_indent(flow=True)
	303	self.state = self.expect_first_flow_mapping_key
	304
	305	def expect_first_flow_mapping_key(self):
	306	if isinstance(self.event, MappingEndEvent):
	307	self.indent = self.indents.pop()
	308	self.flow_level -= 1
	309	self.write_indicator(u'}', False)
	310	self.state = self.states.pop()
	311	else:
	312	if self.canonical or self.column > self.best_width:
	313	self.write_indent()
	314	if not self.canonical and self.check_simple_key():
	315	self.states.append(self.expect_flow_mapping_simple_value)
	316	self.expect_node(mapping=True, simple_key=True)
	317	else:
	318	self.write_indicator(u'?', True)
	319	self.states.append(self.expect_flow_mapping_value)
	320	self.expect_node(mapping=True)
	321
	322	def expect_flow_mapping_key(self):
	323	if isinstance(self.event, MappingEndEvent):
	324	self.indent = self.indents.pop()
	325	self.flow_level -= 1
	326	if self.canonical:
	327	self.write_indicator(u',', False)
	328	self.write_indent()
	329	self.write_indicator(u'}', False)
	330	self.state = self.states.pop()
	331	else:
	332	self.write_indicator(u',', False)
	333	if self.canonical or self.column > self.best_width:
	334	self.write_indent()
	335	if not self.canonical and self.check_simple_key():
	336	self.states.append(self.expect_flow_mapping_simple_value)
	337	self.expect_node(mapping=True, simple_key=True)
	338	else:
	339	self.write_indicator(u'?', True)
	340	self.states.append(self.expect_flow_mapping_value)
	341	self.expect_node(mapping=True)
	342
	343	def expect_flow_mapping_simple_value(self):
	344	self.write_indicator(u':', False)
	345	self.states.append(self.expect_flow_mapping_key)
	346	self.expect_node(mapping=True)
	347
	348	def expect_flow_mapping_value(self):
	349	if self.canonical or self.column > self.best_width:
	350	self.write_indent()
	351	self.write_indicator(u':', True)
	352	self.states.append(self.expect_flow_mapping_key)
	353	self.expect_node(mapping=True)
	354
	355	# Block sequence handlers.
	356
	357	def expect_block_sequence(self):
	358	indentless = (self.mapping_context and not self.indention)
	359	self.increase_indent(flow=False, indentless=indentless)
	360	self.state = self.expect_first_block_sequence_item
	361
	362	def expect_first_block_sequence_item(self):
	363	return self.expect_block_sequence_item(first=True)
	364
	365	def expect_block_sequence_item(self, first=False):
	366	if not first and isinstance(self.event, SequenceEndEvent):
	367	self.indent = self.indents.pop()
	368	self.state = self.states.pop()
	369	else:
	370	self.write_indent()
	371	self.write_indicator(u'-', True, indention=True)
	372	self.states.append(self.expect_block_sequence_item)
	373	self.expect_node(sequence=True)
	374
	375	# Block mapping handlers.
	376
	377	def expect_block_mapping(self):
	378	self.increase_indent(flow=False)
	379	self.state = self.expect_first_block_mapping_key
	380
	381	def expect_first_block_mapping_key(self):
	382	return self.expect_block_mapping_key(first=True)
	383
	384	def expect_block_mapping_key(self, first=False):
	385	if not first and isinstance(self.event, MappingEndEvent):
	386	self.indent = self.indents.pop()
	387	self.state = self.states.pop()
	388	else:
	389	self.write_indent()
	390	if self.check_simple_key():
	391	self.states.append(self.expect_block_mapping_simple_value)
	392	self.expect_node(mapping=True, simple_key=True)
	393	else:
	394	self.write_indicator(u'?', True, indention=True)
	395	self.states.append(self.expect_block_mapping_value)
	396	self.expect_node(mapping=True)
	397
	398	def expect_block_mapping_simple_value(self):
	399	self.write_indicator(u':', False)
	400	self.states.append(self.expect_block_mapping_key)
	401	self.expect_node(mapping=True)
	402
	403	def expect_block_mapping_value(self):
	404	self.write_indent()
	405	self.write_indicator(u':', True, indention=True)
	406	self.states.append(self.expect_block_mapping_key)
	407	self.expect_node(mapping=True)
	408
	409	# Checkers.
	410
	411	def check_empty_sequence(self):
	412	return (isinstance(self.event, SequenceStartEvent) and self.events
	413	and isinstance(self.events[0], SequenceEndEvent))
	414
	415	def check_empty_mapping(self):
	416	return (isinstance(self.event, MappingStartEvent) and self.events
	417	and isinstance(self.events[0], MappingEndEvent))
	418
	419	def check_empty_document(self):
	420	if not isinstance(self.event, DocumentStartEvent) or not self.events:
	421	return False
	422	event = self.events[0]
	423	return (isinstance(event, ScalarEvent) and event.anchor is None
	424	and event.tag is None and event.implicit and event.value == u'')
	425
	426	def check_simple_key(self):
	427	length = 0
	428	if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
	429	if self.prepared_anchor is None:
	430	self.prepared_anchor = self.prepare_anchor(self.event.anchor)
	431	length += len(self.prepared_anchor)
	432	if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
	433	and self.event.tag is not None:
	434	if self.prepared_tag is None:
	435	self.prepared_tag = self.prepare_tag(self.event.tag)
	436	length += len(self.prepared_tag)
	437	if isinstance(self.event, ScalarEvent):
	438	if self.analysis is None:
	439	self.analysis = self.analyze_scalar(self.event.value)
	440	length += len(self.analysis.scalar)
	441	return (length < 128 and (isinstance(self.event, AliasEvent)
	442	or (isinstance(self.event, ScalarEvent)
	443	and not self.analysis.empty and not self.analysis.multiline)
	444	or self.check_empty_sequence() or self.check_empty_mapping()))
	445
	446	# Anchor, Tag, and Scalar processors.
	447
	448	def process_anchor(self, indicator):
	449	if self.event.anchor is None:
	450	self.prepared_anchor = None
	451	return
	452	if self.prepared_anchor is None:
	453	self.prepared_anchor = self.prepare_anchor(self.event.anchor)
	454	if self.prepared_anchor:
	455	self.write_indicator(indicator+self.prepared_anchor, True)
	456	self.prepared_anchor = None
	457
	458	def process_tag(self):
	459	tag = self.event.tag
	460	if isinstance(self.event, ScalarEvent):
	461	if self.style is None:
	462	self.style = self.choose_scalar_style()
	463	if ((not self.canonical or tag is None) and
	464	((self.style == '' and self.event.implicit[0])
	465	or (self.style != '' and self.event.implicit[1]))):
	466	self.prepared_tag = None
	467	return
	468	if self.event.implicit[0] and tag is None:
	469	tag = u'!'
	470	self.prepared_tag = None
	471	else:
	472	if (not self.canonical or tag is None) and self.event.implicit:
	473	self.prepared_tag = None
	474	return
	475	if tag is None:
	476	raise EmitterError("tag is not specified")
	477	if self.prepared_tag is None:
	478	self.prepared_tag = self.prepare_tag(tag)
	479	if self.prepared_tag:
	480	self.write_indicator(self.prepared_tag, True)
	481	self.prepared_tag = None
	482
	483	def choose_scalar_style(self):
	484	if self.analysis is None:
	485	self.analysis = self.analyze_scalar(self.event.value)
	486	if self.event.style == '"' or self.canonical:
	487	return '"'
	488	if not self.event.style and self.event.implicit[0]:
	489	if (not (self.simple_key_context and
	490	(self.analysis.empty or self.analysis.multiline))
	491	and (self.flow_level and self.analysis.allow_flow_plain
	492	or (not self.flow_level and self.analysis.allow_block_plain))):
	493	return ''
	494	if self.event.style and self.event.style in '\|>':
	495	if (not self.flow_level and not self.simple_key_context
	496	and self.analysis.allow_block):
	497	return self.event.style
	498	if not self.event.style or self.event.style == '\'':
	499	if (self.analysis.allow_single_quoted and
	500	not (self.simple_key_context and self.analysis.multiline)):
	501	return '\''
	502	return '"'
	503
	504	def process_scalar(self):
	505	if self.analysis is None:
	506	self.analysis = self.analyze_scalar(self.event.value)
	507	if self.style is None:
	508	self.style = self.choose_scalar_style()
	509	split = (not self.simple_key_context)
	510	#if self.analysis.multiline and split \
	511	# and (not self.style or self.style in '\'\"'):
	512	# self.write_indent()
	513	if self.style == '"':
	514	self.write_double_quoted(self.analysis.scalar, split)
	515	elif self.style == '\'':
	516	self.write_single_quoted(self.analysis.scalar, split)
	517	elif self.style == '>':
	518	self.write_folded(self.analysis.scalar)
	519	elif self.style == '\|':
	520	self.write_literal(self.analysis.scalar)
	521	else:
	522	self.write_plain(self.analysis.scalar, split)
	523	self.analysis = None
	524	self.style = None
	525
	526	# Analyzers.
	527
	528	def prepare_version(self, version):
	529	major, minor = version
	530	if major != 1:
	531	raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
	532	return u'%d.%d' % (major, minor)
	533
	534	def prepare_tag_handle(self, handle):
	535	if not handle:
	536	raise EmitterError("tag handle must not be empty")
	537	if handle[0] != u'!' or handle[-1] != u'!':
	538	raise EmitterError("tag handle must start and end with '!': %r"
	539	% (handle.encode('utf-8')))
	540	for ch in handle[1:-1]:
	541	if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
	542	or ch in u'-_'):
	543	raise EmitterError("invalid character %r in the tag handle: %r"
	544	% (ch.encode('utf-8'), handle.encode('utf-8')))
	545	return handle
	546
	547	def prepare_tag_prefix(self, prefix):
	548	if not prefix:
	549	raise EmitterError("tag prefix must not be empty")
	550	chunks = []
	551	start = end = 0
	552	if prefix[0] == u'!':
	553	end = 1
	554	while end < len(prefix):
	555	ch = prefix[end]
	556	if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
	557	or ch in u'-;/?!:@&=+$,_.~*\'()[]':
	558	end += 1
	559	else:
	560	if start < end:
	561	chunks.append(prefix[start:end])
	562	start = end = end+1
	563	data = ch.encode('utf-8')
	564	for ch in data:
	565	chunks.append(u'%%%02X' % ord(ch))
	566	if start < end:
	567	chunks.append(prefix[start:end])
	568	return u''.join(chunks)
	569
	570	def prepare_tag(self, tag):
	571	if not tag:
	572	raise EmitterError("tag must not be empty")
	573	if tag == u'!':
	574	return tag
	575	handle = None
	576	suffix = tag
	577	for prefix in self.tag_prefixes:
	578	if tag.startswith(prefix) \
	579	and (prefix == u'!' or len(prefix) < len(tag)):
	580	handle = self.tag_prefixes[prefix]
	581	suffix = tag[len(prefix):]
	582	chunks = []
	583	start = end = 0
	584	while end < len(suffix):
	585	ch = suffix[end]
	586	if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
	587	or ch in u'-;/?:@&=+$,_.~*\'()[]' \
	588	or (ch == u'!' and handle != u'!'):
	589	end += 1
	590	else:
	591	if start < end:
	592	chunks.append(suffix[start:end])
	593	start = end = end+1
	594	data = ch.encode('utf-8')
	595	for ch in data:
	596	chunks.append(u'%%%02X' % ord(ch))
	597	if start < end:
	598	chunks.append(suffix[start:end])
	599	suffix_text = u''.join(chunks)
	600	if handle:
	601	return u'%s%s' % (handle, suffix_text)
	602	else:
	603	return u'!<%s>' % suffix_text
	604
	605	def prepare_anchor(self, anchor):
	606	if not anchor:
	607	raise EmitterError("anchor must not be empty")
	608	for ch in anchor:
	609	if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
	610	or ch in u'-_'):
	611	raise EmitterError("invalid character %r in the anchor: %r"
	612	% (ch.encode('utf-8'), anchor.encode('utf-8')))
	613	return anchor
	614
	615	def analyze_scalar(self, scalar):
	616
	617	# Empty scalar is a special case.
	618	if not scalar:
	619	return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
	620	allow_flow_plain=False, allow_block_plain=True,
	621	allow_single_quoted=True, allow_double_quoted=True,
	622	allow_block=False)
	623
	624	# Indicators and special characters.
	625	block_indicators = False
	626	flow_indicators = False
	627	line_breaks = False
	628	special_characters = False
	629
	630	# Whitespaces.
	631	inline_spaces = False # non-space space+ non-space
	632	inline_breaks = False # non-space break+ non-space
	633	leading_spaces = False # ^ space+ (non-space \| $)
	634	leading_breaks = False # ^ break+ (non-space \| $)
	635	trailing_spaces = False # (^ \| non-space) space+ $
	636	trailing_breaks = False # (^ \| non-space) break+ $
	637	inline_breaks_spaces = False # non-space break+ space+ non-space
	638	mixed_breaks_spaces = False # anything else
	639
	640	# Check document indicators.
	641	if scalar.startswith(u'---') or scalar.startswith(u'...'):
	642	block_indicators = True
	643	flow_indicators = True
	644
	645	# First character or preceded by a whitespace.
	646	preceeded_by_space = True
	647
	648	# Last character or followed by a whitespace.
	649	followed_by_space = (len(scalar) == 1 or
	650	scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
	651
	652	# The current series of whitespaces contain plain spaces.
	653	spaces = False
	654
	655	# The current series of whitespaces contain line breaks.
	656	breaks = False
	657
	658	# The current series of whitespaces contain a space followed by a
	659	# break.
	660	mixed = False
	661
	662	# The current series of whitespaces start at the beginning of the
	663	# scalar.
	664	leading = False
	665
	666	index = 0
	667	while index < len(scalar):
	668	ch = scalar[index]
	669
	670	# Check for indicators.
	671
	672	if index == 0:
	673	# Leading indicators are special characters.
	674	if ch in u'#,[]{}&*!\|>\'\"%@`':
	675	flow_indicators = True
	676	block_indicators = True
	677	if ch in u'?:':
	678	flow_indicators = True
	679	if followed_by_space:
	680	block_indicators = True
	681	if ch == u'-' and followed_by_space:
	682	flow_indicators = True
	683	block_indicators = True
	684	else:
	685	# Some indicators cannot appear within a scalar as well.
	686	if ch in u',?[]{}':
	687	flow_indicators = True
	688	if ch == u':':
	689	flow_indicators = True
	690	if followed_by_space:
	691	block_indicators = True
	692	if ch == u'#' and preceeded_by_space:
	693	flow_indicators = True
	694	block_indicators = True
	695
	696	# Check for line breaks, special, and unicode characters.
	697
	698	if ch in u'\n\x85\u2028\u2029':
	699	line_breaks = True
	700	if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
	701	if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
	702	or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
	703	unicode_characters = True
	704	if not self.allow_unicode:
	705	special_characters = True
	706	else:
	707	special_characters = True
	708
	709	# Spaces, line breaks, and how they are mixed. State machine.
	710
	711	# Start or continue series of whitespaces.
	712	if ch in u' \n\x85\u2028\u2029':
	713	if spaces and breaks:
	714	if ch != u' ': # break+ (space+ break+) => mixed
	715	mixed = True
	716	elif spaces:
	717	if ch != u' ': # (space+ break+) => mixed
	718	breaks = True
	719	mixed = True
	720	elif breaks:
	721	if ch == u' ': # break+ space+
	722	spaces = True
	723	else:
	724	leading = (index == 0)
	725	if ch == u' ': # space+
	726	spaces = True
	727	else: # break+
	728	breaks = True
	729
	730	# Series of whitespaces ended with a non-space.
	731	elif spaces or breaks:
	732	if leading:
	733	if spaces and breaks:
	734	mixed_breaks_spaces = True
	735	elif spaces:
	736	leading_spaces = True
	737	elif breaks:
	738	leading_breaks = True
	739	else:
	740	if mixed:
	741	mixed_breaks_spaces = True
	742	elif spaces and breaks:
	743	inline_breaks_spaces = True
	744	elif spaces:
	745	inline_spaces = True
	746	elif breaks:
	747	inline_breaks = True
	748	spaces = breaks = mixed = leading = False
	749
	750	# Series of whitespaces reach the end.
	751	if (spaces or breaks) and (index == len(scalar)-1):
	752	if spaces and breaks:
	753	mixed_breaks_spaces = True
	754	elif spaces:
	755	trailing_spaces = True
	756	if leading:
	757	leading_spaces = True
	758	elif breaks:
	759	trailing_breaks = True
	760	if leading:
	761	leading_breaks = True
	762	spaces = breaks = mixed = leading = False
	763
	764	# Prepare for the next character.
	765	index += 1
	766	preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
	767	followed_by_space = (index+1 >= len(scalar) or
	768	scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
	769
	770	# Let's decide what styles are allowed.
	771	allow_flow_plain = True
	772	allow_block_plain = True
	773	allow_single_quoted = True
	774	allow_double_quoted = True
	775	allow_block = True
	776
	777	# Leading and trailing whitespace are bad for plain scalars. We also
	778	# do not want to mess with leading whitespaces for block scalars.
	779	if leading_spaces or leading_breaks or trailing_spaces:
	780	allow_flow_plain = allow_block_plain = allow_block = False
	781
	782	# Trailing breaks are fine for block scalars, but unacceptable for
	783	# plain scalars.
	784	if trailing_breaks:
	785	allow_flow_plain = allow_block_plain = False
	786
	787	# The combination of (space+ break+) is only acceptable for block
	788	# scalars.
	789	if inline_breaks_spaces:
	790	allow_flow_plain = allow_block_plain = allow_single_quoted = False
	791
	792	# Mixed spaces and breaks, as well as special character are only
	793	# allowed for double quoted scalars.
	794	if mixed_breaks_spaces or special_characters:
	795	allow_flow_plain = allow_block_plain = \
	796	allow_single_quoted = allow_block = False
	797
	798	# We don't emit multiline plain scalars.
	799	if line_breaks:
	800	allow_flow_plain = allow_block_plain = False
	801
	802	# Flow indicators are forbidden for flow plain scalars.
	803	if flow_indicators:
	804	allow_flow_plain = False
	805
	806	# Block indicators are forbidden for block plain scalars.
	807	if block_indicators:
	808	allow_block_plain = False
	809
	810	return ScalarAnalysis(scalar=scalar,
	811	empty=False, multiline=line_breaks,
	812	allow_flow_plain=allow_flow_plain,
	813	allow_block_plain=allow_block_plain,
	814	allow_single_quoted=allow_single_quoted,
	815	allow_double_quoted=allow_double_quoted,
	816	allow_block=allow_block)
	817
	818	# Writers.
	819
	820	def flush_stream(self):
	821	if hasattr(self.stream, 'flush'):
	822	self.stream.flush()
	823
	824	def write_stream_start(self):
	825	# Write BOM if needed.
	826	if self.encoding and self.encoding.startswith('utf-16'):
	827	self.stream.write(u'\xFF\xFE'.encode(self.encoding))
	828
	829	def write_stream_end(self):
	830	self.flush_stream()
	831
	832	def write_indicator(self, indicator, need_whitespace,
	833	whitespace=False, indention=False):
	834	if self.whitespace or not need_whitespace:
	835	data = indicator
	836	else:
	837	data = u' '+indicator
	838	self.whitespace = whitespace
	839	self.indention = self.indention and indention
	840	self.column += len(data)
	841	if self.encoding:
	842	data = data.encode(self.encoding)
	843	self.stream.write(data)
	844
	845	def write_indent(self):
	846	indent = self.indent or 0
	847	if not self.indention or self.column > indent \
	848	or (self.column == indent and not self.whitespace):
	849	self.write_line_break()
	850	if self.column < indent:
	851	self.whitespace = True
	852	data = u' '*(indent-self.column)
	853	self.column = indent
	854	if self.encoding:
	855	data = data.encode(self.encoding)
	856	self.stream.write(data)
	857
	858	def write_line_break(self, data=None):
	859	if data is None:
	860	data = self.best_line_break
	861	self.whitespace = True
	862	self.indention = True
	863	self.line += 1
	864	self.column = 0
	865	if self.encoding:
	866	data = data.encode(self.encoding)
	867	self.stream.write(data)
	868
	869	def write_version_directive(self, version_text):
	870	data = u'%%YAML %s' % version_text
	871	if self.encoding:
	872	data = data.encode(self.encoding)
	873	self.stream.write(data)
	874	self.write_line_break()
	875
	876	def write_tag_directive(self, handle_text, prefix_text):
	877	data = u'%%TAG %s %s' % (handle_text, prefix_text)
	878	if self.encoding:
	879	data = data.encode(self.encoding)
	880	self.stream.write(data)
	881	self.write_line_break()
	882
	883	# Scalar streams.
	884
	885	def write_single_quoted(self, text, split=True):
	886	self.write_indicator(u'\'', True)
	887	spaces = False
	888	breaks = False
	889	start = end = 0
	890	while end <= len(text):
	891	ch = None
	892	if end < len(text):
	893	ch = text[end]
	894	if spaces:
	895	if ch is None or ch != u' ':
	896	if start+1 == end and self.column > self.best_width and split \
	897	and start != 0 and end != len(text):
	898	self.write_indent()
	899	else:
	900	data = text[start:end]
	901	self.column += len(data)
	902	if self.encoding:
	903	data = data.encode(self.encoding)
	904	self.stream.write(data)
	905	start = end
	906	elif breaks:
	907	if ch is None or ch not in u'\n\x85\u2028\u2029':
	908	if text[start] == u'\n':
	909	self.write_line_break()
	910	for br in text[start:end]:
	911	if br == u'\n':
	912	self.write_line_break()
	913	else:
	914	self.write_line_break(br)
	915	self.write_indent()
	916	start = end
	917	else:
	918	if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
	919	if start < end:
	920	data = text[start:end]
	921	self.column += len(data)
	922	if self.encoding:
	923	data = data.encode(self.encoding)
	924	self.stream.write(data)
	925	start = end
	926	if ch == u'\'':
	927	data = u'\'\''
	928	self.column += 2
	929	if self.encoding:
	930	data = data.encode(self.encoding)
	931	self.stream.write(data)
	932	start = end + 1
	933	if ch is not None:
	934	spaces = (ch == u' ')
	935	breaks = (ch in u'\n\x85\u2028\u2029')
	936	end += 1
	937	self.write_indicator(u'\'', False)
	938
	939	ESCAPE_REPLACEMENTS = {
	940	u'\0': u'0',
	941	u'\x07': u'a',
	942	u'\x08': u'b',
	943	u'\x09': u't',
	944	u'\x0A': u'n',
	945	u'\x0B': u'v',
	946	u'\x0C': u'f',
	947	u'\x0D': u'r',
	948	u'\x1B': u'e',
	949	u'\"': u'\"',
	950	u'\\': u'\\',
	951	u'\x85': u'N',
	952	u'\xA0': u'_',
	953	u'\u2028': u'L',
	954	u'\u2029': u'P',
	955	}
	956
	957	def write_double_quoted(self, text, split=True):
	958	self.write_indicator(u'"', True)
	959	start = end = 0
	960	while end <= len(text):
	961	ch = None
	962	if end < len(text):
	963	ch = text[end]
	964	if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
	965	or not (u'\x20' <= ch <= u'\x7E'
	966	or (self.allow_unicode
	967	and (u'\xA0' <= ch <= u'\uD7FF'
	968	or u'\uE000' <= ch <= u'\uFFFD'))):
	969	if start < end:
	970	data = text[start:end]
	971	self.column += len(data)
	972	if self.encoding:
	973	data = data.encode(self.encoding)
	974	self.stream.write(data)
	975	start = end
	976	if ch is not None:
	977	if ch in self.ESCAPE_REPLACEMENTS:
	978	data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
	979	elif ch <= u'\xFF':
	980	data = u'\\x%02X' % ord(ch)
	981	elif ch <= u'\uFFFF':
	982	data = u'\\u%04X' % ord(ch)
	983	else:
	984	data = u'\\U%08X' % ord(ch)
	985	self.column += len(data)
	986	if self.encoding:
	987	data = data.encode(self.encoding)
	988	self.stream.write(data)
	989	start = end+1
	990	if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
	991	and self.column+(end-start) > self.best_width and split:
	992	data = text[start:end]+u'\\'
	993	if start < end:
	994	start = end
	995	self.column += len(data)
	996	if self.encoding:
	997	data = data.encode(self.encoding)
	998	self.stream.write(data)
	999	self.write_indent()
	1000	self.whitespace = False
	1001	self.indention = False
	1002	if text[start] == u' ':
	1003	data = u'\\'
	1004	self.column += len(data)
	1005	if self.encoding:
	1006	data = data.encode(self.encoding)
	1007	self.stream.write(data)
	1008	end += 1
	1009	self.write_indicator(u'"', False)
	1010
	1011	def determine_chomp(self, text):
	1012	tail = text[-2:]
	1013	while len(tail) < 2:
	1014	tail = u' '+tail
	1015	if tail[-1] in u'\n\x85\u2028\u2029':
	1016	if tail[-2] in u'\n\x85\u2028\u2029':
	1017	return u'+'
	1018	else:
	1019	return u''
	1020	else:
	1021	return u'-'
	1022
	1023	def write_folded(self, text):
	1024	chomp = self.determine_chomp(text)
	1025	self.write_indicator(u'>'+chomp, True)
	1026	self.write_indent()
	1027	leading_space = False
	1028	spaces = False
	1029	breaks = False
	1030	start = end = 0
	1031	while end <= len(text):
	1032	ch = None
	1033	if end < len(text):
	1034	ch = text[end]
	1035	if breaks:
	1036	if ch is None or ch not in u'\n\x85\u2028\u2029':
	1037	if not leading_space and ch is not None and ch != u' ' \
	1038	and text[start] == u'\n':
	1039	self.write_line_break()
	1040	leading_space = (ch == u' ')
	1041	for br in text[start:end]:
	1042	if br == u'\n':
	1043	self.write_line_break()
	1044	else:
	1045	self.write_line_break(br)
	1046	if ch is not None:
	1047	self.write_indent()
	1048	start = end
	1049	elif spaces:
	1050	if ch != u' ':
	1051	if start+1 == end and self.column > self.best_width:
	1052	self.write_indent()
	1053	else:
	1054	data = text[start:end]
	1055	self.column += len(data)
	1056	if self.encoding:
	1057	data = data.encode(self.encoding)
	1058	self.stream.write(data)
	1059	start = end
	1060	else:
	1061	if ch is None or ch in u' \n\x85\u2028\u2029':
	1062	data = text[start:end]
	1063	if self.encoding:
	1064	data = data.encode(self.encoding)
	1065	self.stream.write(data)
	1066	if ch is None:
	1067	self.write_line_break()
	1068	start = end
	1069	if ch is not None:
	1070	breaks = (ch in u'\n\x85\u2028\u2029')
	1071	spaces = (ch == u' ')
	1072	end += 1
	1073
	1074	def write_literal(self, text):
	1075	chomp = self.determine_chomp(text)
	1076	self.write_indicator(u'\|'+chomp, True)
	1077	self.write_indent()
	1078	breaks = False
	1079	start = end = 0
	1080	while end <= len(text):
	1081	ch = None
	1082	if end < len(text):
	1083	ch = text[end]
	1084	if breaks:
	1085	if ch is None or ch not in u'\n\x85\u2028\u2029':
	1086	for br in text[start:end]:
	1087	if br == u'\n':
	1088	self.write_line_break()
	1089	else:
	1090	self.write_line_break(br)
	1091	if ch is not None:
	1092	self.write_indent()
	1093	start = end
	1094	else:
	1095	if ch is None or ch in u'\n\x85\u2028\u2029':
	1096	data = text[start:end]
	1097	if self.encoding:
	1098	data = data.encode(self.encoding)
	1099	self.stream.write(data)
	1100	if ch is None:
	1101	self.write_line_break()
	1102	start = end
	1103	if ch is not None:
	1104	breaks = (ch in u'\n\x85\u2028\u2029')
	1105	end += 1
	1106
	1107	def write_plain(self, text, split=True):
	1108	if not text:
	1109	return
	1110	if not self.whitespace:
	1111	data = u' '
	1112	self.column += len(data)
	1113	if self.encoding:
	1114	data = data.encode(self.encoding)
	1115	self.stream.write(data)
	1116	self.writespace = False
	1117	self.indention = False
	1118	spaces = False
	1119	breaks = False
	1120	start = end = 0
	1121	while end <= len(text):
	1122	ch = None
	1123	if end < len(text):
	1124	ch = text[end]
	1125	if spaces:
	1126	if ch != u' ':
	1127	if start+1 == end and self.column > self.best_width and split:
	1128	self.write_indent()
	1129	self.writespace = False
	1130	self.indention = False
	1131	else:
	1132	data = text[start:end]
	1133	self.column += len(data)
	1134	if self.encoding:
	1135	data = data.encode(self.encoding)
	1136	self.stream.write(data)
	1137	start = end
	1138	elif breaks:
	1139	if ch not in u'\n\x85\u2028\u2029':
	1140	if text[start] == u'\n':
	1141	self.write_line_break()
	1142	for br in text[start:end]:
	1143	if br == u'\n':
	1144	self.write_line_break()
	1145	else:
	1146	self.write_line_break(br)
	1147	self.write_indent()
	1148	self.whitespace = False
	1149	self.indention = False
	1150	start = end
	1151	else:
	1152	if ch is None or ch in u' \n\x85\u2028\u2029':
	1153	data = text[start:end]
	1154	self.column += len(data)
	1155	if self.encoding:
	1156	data = data.encode(self.encoding)
	1157	self.stream.write(data)
	1158	start = end
	1159	if ch is not None:
	1160	spaces = (ch == u' ')
	1161	breaks = (ch in u'\n\x85\u2028\u2029')
	1162	end += 1
	1163

Note: See TracBrowser for help on using the repository browser.

Download in other formats: