| [898] | 1 |  | 
|---|
|  | 2 | #include "yaml_private.h" | 
|---|
|  | 3 |  | 
|---|
|  | 4 | /* | 
|---|
|  | 5 | * Declarations. | 
|---|
|  | 6 | */ | 
|---|
|  | 7 |  | 
|---|
|  | 8 | static int | 
|---|
|  | 9 | yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, | 
|---|
|  | 10 | size_t offset, int value); | 
|---|
|  | 11 |  | 
|---|
|  | 12 | static int | 
|---|
|  | 13 | yaml_parser_update_raw_buffer(yaml_parser_t *parser); | 
|---|
|  | 14 |  | 
|---|
|  | 15 | static int | 
|---|
|  | 16 | yaml_parser_determine_encoding(yaml_parser_t *parser); | 
|---|
|  | 17 |  | 
|---|
|  | 18 | YAML_DECLARE(int) | 
|---|
|  | 19 | yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); | 
|---|
|  | 20 |  | 
|---|
|  | 21 | /* | 
|---|
|  | 22 | * Set the reader error and return 0. | 
|---|
|  | 23 | */ | 
|---|
|  | 24 |  | 
|---|
|  | 25 | static int | 
|---|
|  | 26 | yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, | 
|---|
|  | 27 | size_t offset, int value) | 
|---|
|  | 28 | { | 
|---|
|  | 29 | parser->error = YAML_READER_ERROR; | 
|---|
|  | 30 | parser->problem = problem; | 
|---|
|  | 31 | parser->problem_offset = offset; | 
|---|
|  | 32 | parser->problem_value = value; | 
|---|
|  | 33 |  | 
|---|
|  | 34 | return 0; | 
|---|
|  | 35 | } | 
|---|
|  | 36 |  | 
|---|
|  | 37 | /* | 
|---|
|  | 38 | * Byte order marks. | 
|---|
|  | 39 | */ | 
|---|
|  | 40 |  | 
|---|
|  | 41 | #define BOM_UTF8    "\xef\xbb\xbf" | 
|---|
|  | 42 | #define BOM_UTF16LE "\xff\xfe" | 
|---|
|  | 43 | #define BOM_UTF16BE "\xfe\xff" | 
|---|
|  | 44 |  | 
|---|
|  | 45 | /* | 
|---|
|  | 46 | * Determine the input stream encoding by checking the BOM symbol. If no BOM is | 
|---|
|  | 47 | * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. | 
|---|
|  | 48 | */ | 
|---|
|  | 49 |  | 
|---|
|  | 50 | static int | 
|---|
|  | 51 | yaml_parser_determine_encoding(yaml_parser_t *parser) | 
|---|
|  | 52 | { | 
|---|
|  | 53 | /* Ensure that we had enough bytes in the raw buffer. */ | 
|---|
|  | 54 |  | 
|---|
|  | 55 | while (!parser->eof | 
|---|
|  | 56 | && parser->raw_buffer.last - parser->raw_buffer.pointer < 3) { | 
|---|
|  | 57 | if (!yaml_parser_update_raw_buffer(parser)) { | 
|---|
|  | 58 | return 0; | 
|---|
|  | 59 | } | 
|---|
|  | 60 | } | 
|---|
|  | 61 |  | 
|---|
|  | 62 | /* Determine the encoding. */ | 
|---|
|  | 63 |  | 
|---|
|  | 64 | if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 | 
|---|
|  | 65 | && !memcmp(parser->raw_buffer.pointer, BOM_UTF16LE, 2)) { | 
|---|
|  | 66 | parser->encoding = YAML_UTF16LE_ENCODING; | 
|---|
|  | 67 | parser->raw_buffer.pointer += 2; | 
|---|
|  | 68 | parser->offset += 2; | 
|---|
|  | 69 | } | 
|---|
|  | 70 | else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 | 
|---|
|  | 71 | && !memcmp(parser->raw_buffer.pointer, BOM_UTF16BE, 2)) { | 
|---|
|  | 72 | parser->encoding = YAML_UTF16BE_ENCODING; | 
|---|
|  | 73 | parser->raw_buffer.pointer += 2; | 
|---|
|  | 74 | parser->offset += 2; | 
|---|
|  | 75 | } | 
|---|
|  | 76 | else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 3 | 
|---|
|  | 77 | && !memcmp(parser->raw_buffer.pointer, BOM_UTF8, 3)) { | 
|---|
|  | 78 | parser->encoding = YAML_UTF8_ENCODING; | 
|---|
|  | 79 | parser->raw_buffer.pointer += 3; | 
|---|
|  | 80 | parser->offset += 3; | 
|---|
|  | 81 | } | 
|---|
|  | 82 | else { | 
|---|
|  | 83 | parser->encoding = YAML_UTF8_ENCODING; | 
|---|
|  | 84 | } | 
|---|
|  | 85 |  | 
|---|
|  | 86 | return 1; | 
|---|
|  | 87 | } | 
|---|
|  | 88 |  | 
|---|
|  | 89 | /* | 
|---|
|  | 90 | * Update the raw buffer. | 
|---|
|  | 91 | */ | 
|---|
|  | 92 |  | 
|---|
|  | 93 | static int | 
|---|
|  | 94 | yaml_parser_update_raw_buffer(yaml_parser_t *parser) | 
|---|
|  | 95 | { | 
|---|
|  | 96 | size_t size_read = 0; | 
|---|
|  | 97 |  | 
|---|
|  | 98 | /* Return if the raw buffer is full. */ | 
|---|
|  | 99 |  | 
|---|
|  | 100 | if (parser->raw_buffer.start == parser->raw_buffer.pointer | 
|---|
|  | 101 | && parser->raw_buffer.last == parser->raw_buffer.end) | 
|---|
|  | 102 | return 1; | 
|---|
|  | 103 |  | 
|---|
|  | 104 | /* Return on EOF. */ | 
|---|
|  | 105 |  | 
|---|
|  | 106 | if (parser->eof) return 1; | 
|---|
|  | 107 |  | 
|---|
|  | 108 | /* Move the remaining bytes in the raw buffer to the beginning. */ | 
|---|
|  | 109 |  | 
|---|
|  | 110 | if (parser->raw_buffer.start < parser->raw_buffer.pointer | 
|---|
|  | 111 | && parser->raw_buffer.pointer < parser->raw_buffer.last) { | 
|---|
|  | 112 | memmove(parser->raw_buffer.start, parser->raw_buffer.pointer, | 
|---|
|  | 113 | parser->raw_buffer.last - parser->raw_buffer.pointer); | 
|---|
|  | 114 | } | 
|---|
|  | 115 | parser->raw_buffer.last -= | 
|---|
|  | 116 | parser->raw_buffer.pointer - parser->raw_buffer.start; | 
|---|
|  | 117 | parser->raw_buffer.pointer = parser->raw_buffer.start; | 
|---|
|  | 118 |  | 
|---|
|  | 119 | /* Call the read handler to fill the buffer. */ | 
|---|
|  | 120 |  | 
|---|
|  | 121 | if (!parser->read_handler(parser->read_handler_data, parser->raw_buffer.last, | 
|---|
|  | 122 | parser->raw_buffer.end - parser->raw_buffer.last, &size_read)) { | 
|---|
|  | 123 | return yaml_parser_set_reader_error(parser, "Input error", | 
|---|
|  | 124 | parser->offset, -1); | 
|---|
|  | 125 | } | 
|---|
|  | 126 | parser->raw_buffer.last += size_read; | 
|---|
|  | 127 | if (!size_read) { | 
|---|
|  | 128 | parser->eof = 1; | 
|---|
|  | 129 | } | 
|---|
|  | 130 |  | 
|---|
|  | 131 | return 1; | 
|---|
|  | 132 | } | 
|---|
|  | 133 |  | 
|---|
|  | 134 | /* | 
|---|
|  | 135 | * Ensure that the buffer contains at least `length` characters. | 
|---|
|  | 136 | * Return 1 on success, 0 on failure. | 
|---|
|  | 137 | * | 
|---|
|  | 138 | * The length is supposed to be significantly less that the buffer size. | 
|---|
|  | 139 | */ | 
|---|
|  | 140 |  | 
|---|
|  | 141 | YAML_DECLARE(int) | 
|---|
|  | 142 | yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) | 
|---|
|  | 143 | { | 
|---|
|  | 144 | assert(parser->read_handler);   /* Read handler must be set. */ | 
|---|
|  | 145 |  | 
|---|
|  | 146 | /* If the EOF flag is set and the raw buffer is empty, do nothing. */ | 
|---|
|  | 147 |  | 
|---|
|  | 148 | if (parser->eof && parser->raw_buffer.pointer == parser->raw_buffer.last) | 
|---|
|  | 149 | return 1; | 
|---|
|  | 150 |  | 
|---|
|  | 151 | /* Return if the buffer contains enough characters. */ | 
|---|
|  | 152 |  | 
|---|
|  | 153 | if (parser->unread >= length) | 
|---|
|  | 154 | return 1; | 
|---|
|  | 155 |  | 
|---|
|  | 156 | /* Determine the input encoding if it is not known yet. */ | 
|---|
|  | 157 |  | 
|---|
|  | 158 | if (!parser->encoding) { | 
|---|
|  | 159 | if (!yaml_parser_determine_encoding(parser)) | 
|---|
|  | 160 | return 0; | 
|---|
|  | 161 | } | 
|---|
|  | 162 |  | 
|---|
|  | 163 | /* Move the unread characters to the beginning of the buffer. */ | 
|---|
|  | 164 |  | 
|---|
|  | 165 | if (parser->buffer.start < parser->buffer.pointer | 
|---|
|  | 166 | && parser->buffer.pointer < parser->buffer.last) { | 
|---|
|  | 167 | size_t size = parser->buffer.last - parser->buffer.pointer; | 
|---|
|  | 168 | memmove(parser->buffer.start, parser->buffer.pointer, size); | 
|---|
|  | 169 | parser->buffer.pointer = parser->buffer.start; | 
|---|
|  | 170 | parser->buffer.last = parser->buffer.start + size; | 
|---|
|  | 171 | } | 
|---|
|  | 172 | else if (parser->buffer.pointer == parser->buffer.last) { | 
|---|
|  | 173 | parser->buffer.pointer = parser->buffer.start; | 
|---|
|  | 174 | parser->buffer.last = parser->buffer.start; | 
|---|
|  | 175 | } | 
|---|
|  | 176 |  | 
|---|
|  | 177 | /* Fill the buffer until it has enough characters. */ | 
|---|
|  | 178 |  | 
|---|
|  | 179 | while (parser->unread < length) | 
|---|
|  | 180 | { | 
|---|
|  | 181 | /* Fill the raw buffer. */ | 
|---|
|  | 182 |  | 
|---|
|  | 183 | if (!yaml_parser_update_raw_buffer(parser)) return 0; | 
|---|
|  | 184 |  | 
|---|
|  | 185 | /* Decode the raw buffer. */ | 
|---|
|  | 186 |  | 
|---|
|  | 187 | while (parser->raw_buffer.pointer != parser->raw_buffer.last) | 
|---|
|  | 188 | { | 
|---|
|  | 189 | unsigned int value = 0, value2 = 0; | 
|---|
|  | 190 | int incomplete = 0; | 
|---|
|  | 191 | unsigned char octet; | 
|---|
|  | 192 | unsigned int width = 0; | 
|---|
|  | 193 | int low, high; | 
|---|
|  | 194 | size_t k; | 
|---|
|  | 195 | size_t raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer; | 
|---|
|  | 196 |  | 
|---|
|  | 197 | /* Decode the next character. */ | 
|---|
|  | 198 |  | 
|---|
|  | 199 | switch (parser->encoding) | 
|---|
|  | 200 | { | 
|---|
|  | 201 | case YAML_UTF8_ENCODING: | 
|---|
|  | 202 |  | 
|---|
|  | 203 | /* | 
|---|
|  | 204 | * Decode a UTF-8 character.  Check RFC 3629 | 
|---|
|  | 205 | * (http://www.ietf.org/rfc/rfc3629.txt) for more details. | 
|---|
|  | 206 | * | 
|---|
|  | 207 | * The following table (taken from the RFC) is used for | 
|---|
|  | 208 | * decoding. | 
|---|
|  | 209 | * | 
|---|
|  | 210 | *    Char. number range |        UTF-8 octet sequence | 
|---|
|  | 211 | *      (hexadecimal)    |              (binary) | 
|---|
|  | 212 | *   --------------------+------------------------------------ | 
|---|
|  | 213 | *   0000 0000-0000 007F | 0xxxxxxx | 
|---|
|  | 214 | *   0000 0080-0000 07FF | 110xxxxx 10xxxxxx | 
|---|
|  | 215 | *   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx | 
|---|
|  | 216 | *   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | 
|---|
|  | 217 | * | 
|---|
|  | 218 | * Additionally, the characters in the range 0xD800-0xDFFF | 
|---|
|  | 219 | * are prohibited as they are reserved for use with UTF-16 | 
|---|
|  | 220 | * surrogate pairs. | 
|---|
|  | 221 | */ | 
|---|
|  | 222 |  | 
|---|
|  | 223 | /* Determine the length of the UTF-8 sequence. */ | 
|---|
|  | 224 |  | 
|---|
|  | 225 | octet = parser->raw_buffer.pointer[0]; | 
|---|
|  | 226 | width = (octet & 0x80) == 0x00 ? 1 : | 
|---|
|  | 227 | (octet & 0xE0) == 0xC0 ? 2 : | 
|---|
|  | 228 | (octet & 0xF0) == 0xE0 ? 3 : | 
|---|
|  | 229 | (octet & 0xF8) == 0xF0 ? 4 : 0; | 
|---|
|  | 230 |  | 
|---|
|  | 231 | /* Check if the leading octet is valid. */ | 
|---|
|  | 232 |  | 
|---|
|  | 233 | if (!width) | 
|---|
|  | 234 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 235 | "Invalid leading UTF-8 octet", | 
|---|
|  | 236 | parser->offset, octet); | 
|---|
|  | 237 |  | 
|---|
|  | 238 | /* Check if the raw buffer contains an incomplete character. */ | 
|---|
|  | 239 |  | 
|---|
|  | 240 | if (width > raw_unread) { | 
|---|
|  | 241 | if (parser->eof) { | 
|---|
|  | 242 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 243 | "Incomplete UTF-8 octet sequence", | 
|---|
|  | 244 | parser->offset, -1); | 
|---|
|  | 245 | } | 
|---|
|  | 246 | incomplete = 1; | 
|---|
|  | 247 | break; | 
|---|
|  | 248 | } | 
|---|
|  | 249 |  | 
|---|
|  | 250 | /* Decode the leading octet. */ | 
|---|
|  | 251 |  | 
|---|
|  | 252 | value = (octet & 0x80) == 0x00 ? octet & 0x7F : | 
|---|
|  | 253 | (octet & 0xE0) == 0xC0 ? octet & 0x1F : | 
|---|
|  | 254 | (octet & 0xF0) == 0xE0 ? octet & 0x0F : | 
|---|
|  | 255 | (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; | 
|---|
|  | 256 |  | 
|---|
|  | 257 | /* Check and decode the trailing octets. */ | 
|---|
|  | 258 |  | 
|---|
|  | 259 | for (k = 1; k < width; k ++) | 
|---|
|  | 260 | { | 
|---|
|  | 261 | octet = parser->raw_buffer.pointer[k]; | 
|---|
|  | 262 |  | 
|---|
|  | 263 | /* Check if the octet is valid. */ | 
|---|
|  | 264 |  | 
|---|
|  | 265 | if ((octet & 0xC0) != 0x80) | 
|---|
|  | 266 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 267 | "Invalid trailing UTF-8 octet", | 
|---|
|  | 268 | parser->offset+k, octet); | 
|---|
|  | 269 |  | 
|---|
|  | 270 | /* Decode the octet. */ | 
|---|
|  | 271 |  | 
|---|
|  | 272 | value = (value << 6) + (octet & 0x3F); | 
|---|
|  | 273 | } | 
|---|
|  | 274 |  | 
|---|
|  | 275 | /* Check the length of the sequence against the value. */ | 
|---|
|  | 276 |  | 
|---|
|  | 277 | if (!((width == 1) || | 
|---|
|  | 278 | (width == 2 && value >= 0x80) || | 
|---|
|  | 279 | (width == 3 && value >= 0x800) || | 
|---|
|  | 280 | (width == 4 && value >= 0x10000))) | 
|---|
|  | 281 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 282 | "Invalid length of a UTF-8 sequence", | 
|---|
|  | 283 | parser->offset, -1); | 
|---|
|  | 284 |  | 
|---|
|  | 285 | /* Check the range of the value. */ | 
|---|
|  | 286 |  | 
|---|
|  | 287 | if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) | 
|---|
|  | 288 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 289 | "Invalid Unicode character", | 
|---|
|  | 290 | parser->offset, value); | 
|---|
|  | 291 |  | 
|---|
|  | 292 | break; | 
|---|
|  | 293 |  | 
|---|
|  | 294 | case YAML_UTF16LE_ENCODING: | 
|---|
|  | 295 | case YAML_UTF16BE_ENCODING: | 
|---|
|  | 296 |  | 
|---|
|  | 297 | low = (parser->encoding == YAML_UTF16LE_ENCODING ? 0 : 1); | 
|---|
|  | 298 | high = (parser->encoding == YAML_UTF16LE_ENCODING ? 1 : 0); | 
|---|
|  | 299 |  | 
|---|
|  | 300 | /* | 
|---|
|  | 301 | * The UTF-16 encoding is not as simple as one might | 
|---|
|  | 302 | * naively think.  Check RFC 2781 | 
|---|
|  | 303 | * (http://www.ietf.org/rfc/rfc2781.txt). | 
|---|
|  | 304 | * | 
|---|
|  | 305 | * Normally, two subsequent bytes describe a Unicode | 
|---|
|  | 306 | * character.  However a special technique (called a | 
|---|
|  | 307 | * surrogate pair) is used for specifying character | 
|---|
|  | 308 | * values larger than 0xFFFF. | 
|---|
|  | 309 | * | 
|---|
|  | 310 | * A surrogate pair consists of two pseudo-characters: | 
|---|
|  | 311 | *      high surrogate area (0xD800-0xDBFF) | 
|---|
|  | 312 | *      low surrogate area (0xDC00-0xDFFF) | 
|---|
|  | 313 | * | 
|---|
|  | 314 | * The following formulas are used for decoding | 
|---|
|  | 315 | * and encoding characters using surrogate pairs: | 
|---|
|  | 316 | * | 
|---|
|  | 317 | *  U  = U' + 0x10000   (0x01 00 00 <= U <= 0x10 FF FF) | 
|---|
|  | 318 | *  U' = yyyyyyyyyyxxxxxxxxxx   (0 <= U' <= 0x0F FF FF) | 
|---|
|  | 319 | *  W1 = 110110yyyyyyyyyy | 
|---|
|  | 320 | *  W2 = 110111xxxxxxxxxx | 
|---|
|  | 321 | * | 
|---|
|  | 322 | * where U is the character value, W1 is the high surrogate | 
|---|
|  | 323 | * area, W2 is the low surrogate area. | 
|---|
|  | 324 | */ | 
|---|
|  | 325 |  | 
|---|
|  | 326 | /* Check for incomplete UTF-16 character. */ | 
|---|
|  | 327 |  | 
|---|
|  | 328 | if (raw_unread < 2) { | 
|---|
|  | 329 | if (parser->eof) { | 
|---|
|  | 330 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 331 | "Incomplete UTF-16 character", | 
|---|
|  | 332 | parser->offset, -1); | 
|---|
|  | 333 | } | 
|---|
|  | 334 | incomplete = 1; | 
|---|
|  | 335 | break; | 
|---|
|  | 336 | } | 
|---|
|  | 337 |  | 
|---|
|  | 338 | /* Get the character. */ | 
|---|
|  | 339 |  | 
|---|
|  | 340 | value = parser->raw_buffer.pointer[low] | 
|---|
|  | 341 | + (parser->raw_buffer.pointer[high] << 8); | 
|---|
|  | 342 |  | 
|---|
|  | 343 | /* Check for unexpected low surrogate area. */ | 
|---|
|  | 344 |  | 
|---|
|  | 345 | if ((value & 0xFC00) == 0xDC00) | 
|---|
|  | 346 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 347 | "Unexpected low surrogate area", | 
|---|
|  | 348 | parser->offset, value); | 
|---|
|  | 349 |  | 
|---|
|  | 350 | /* Check for a high surrogate area. */ | 
|---|
|  | 351 |  | 
|---|
|  | 352 | if ((value & 0xFC00) == 0xD800) { | 
|---|
|  | 353 |  | 
|---|
|  | 354 | width = 4; | 
|---|
|  | 355 |  | 
|---|
|  | 356 | /* Check for incomplete surrogate pair. */ | 
|---|
|  | 357 |  | 
|---|
|  | 358 | if (raw_unread < 4) { | 
|---|
|  | 359 | if (parser->eof) { | 
|---|
|  | 360 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 361 | "Incomplete UTF-16 surrogate pair", | 
|---|
|  | 362 | parser->offset, -1); | 
|---|
|  | 363 | } | 
|---|
|  | 364 | incomplete = 1; | 
|---|
|  | 365 | break; | 
|---|
|  | 366 | } | 
|---|
|  | 367 |  | 
|---|
|  | 368 | /* Get the next character. */ | 
|---|
|  | 369 |  | 
|---|
|  | 370 | value2 = parser->raw_buffer.pointer[low+2] | 
|---|
|  | 371 | + (parser->raw_buffer.pointer[high+2] << 8); | 
|---|
|  | 372 |  | 
|---|
|  | 373 | /* Check for a low surrogate area. */ | 
|---|
|  | 374 |  | 
|---|
|  | 375 | if ((value2 & 0xFC00) != 0xDC00) | 
|---|
|  | 376 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 377 | "Expected low surrogate area", | 
|---|
|  | 378 | parser->offset+2, value2); | 
|---|
|  | 379 |  | 
|---|
|  | 380 | /* Generate the value of the surrogate pair. */ | 
|---|
|  | 381 |  | 
|---|
|  | 382 | value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF); | 
|---|
|  | 383 | } | 
|---|
|  | 384 |  | 
|---|
|  | 385 | else { | 
|---|
|  | 386 | width = 2; | 
|---|
|  | 387 | } | 
|---|
|  | 388 |  | 
|---|
|  | 389 | break; | 
|---|
|  | 390 |  | 
|---|
|  | 391 | default: | 
|---|
|  | 392 | assert(1);      /* Impossible. */ | 
|---|
|  | 393 | } | 
|---|
|  | 394 |  | 
|---|
|  | 395 | /* Check if the raw buffer contains enough bytes to form a character. */ | 
|---|
|  | 396 |  | 
|---|
|  | 397 | if (incomplete) break; | 
|---|
|  | 398 |  | 
|---|
|  | 399 | /* | 
|---|
|  | 400 | * Check if the character is in the allowed range: | 
|---|
|  | 401 | *      #x9 | #xA | #xD | [#x20-#x7E]               (8 bit) | 
|---|
|  | 402 | *      | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD]    (16 bit) | 
|---|
|  | 403 | *      | [#x10000-#x10FFFF]                        (32 bit) | 
|---|
|  | 404 | */ | 
|---|
|  | 405 |  | 
|---|
|  | 406 | if (! (value == 0x09 || value == 0x0A || value == 0x0D | 
|---|
|  | 407 | || (value >= 0x20 && value <= 0x7E) | 
|---|
|  | 408 | || (value == 0x85) || (value >= 0xA0 && value <= 0xD7FF) | 
|---|
|  | 409 | || (value >= 0xE000 && value <= 0xFFFD) | 
|---|
|  | 410 | || (value >= 0x10000 && value <= 0x10FFFF))) | 
|---|
|  | 411 | return yaml_parser_set_reader_error(parser, | 
|---|
|  | 412 | "Control characters are not allowed", | 
|---|
|  | 413 | parser->offset, value); | 
|---|
|  | 414 |  | 
|---|
|  | 415 | /* Move the raw pointers. */ | 
|---|
|  | 416 |  | 
|---|
|  | 417 | parser->raw_buffer.pointer += width; | 
|---|
|  | 418 | parser->offset += width; | 
|---|
|  | 419 |  | 
|---|
|  | 420 | /* Finally put the character into the buffer. */ | 
|---|
|  | 421 |  | 
|---|
|  | 422 | /* 0000 0000-0000 007F -> 0xxxxxxx */ | 
|---|
|  | 423 | if (value <= 0x7F) { | 
|---|
|  | 424 | *(parser->buffer.last++) = value; | 
|---|
|  | 425 | } | 
|---|
|  | 426 | /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ | 
|---|
|  | 427 | else if (value <= 0x7FF) { | 
|---|
|  | 428 | *(parser->buffer.last++) = 0xC0 + (value >> 6); | 
|---|
|  | 429 | *(parser->buffer.last++) = 0x80 + (value & 0x3F); | 
|---|
|  | 430 | } | 
|---|
|  | 431 | /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ | 
|---|
|  | 432 | else if (value <= 0xFFFF) { | 
|---|
|  | 433 | *(parser->buffer.last++) = 0xE0 + (value >> 12); | 
|---|
|  | 434 | *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); | 
|---|
|  | 435 | *(parser->buffer.last++) = 0x80 + (value & 0x3F); | 
|---|
|  | 436 | } | 
|---|
|  | 437 | /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | 
|---|
|  | 438 | else { | 
|---|
|  | 439 | *(parser->buffer.last++) = 0xF0 + (value >> 18); | 
|---|
|  | 440 | *(parser->buffer.last++) = 0x80 + ((value >> 12) & 0x3F); | 
|---|
|  | 441 | *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); | 
|---|
|  | 442 | *(parser->buffer.last++) = 0x80 + (value & 0x3F); | 
|---|
|  | 443 | } | 
|---|
|  | 444 |  | 
|---|
|  | 445 | parser->unread ++; | 
|---|
|  | 446 | } | 
|---|
|  | 447 |  | 
|---|
|  | 448 | /* On EOF, put NUL into the buffer and return. */ | 
|---|
|  | 449 |  | 
|---|
|  | 450 | if (parser->eof) { | 
|---|
|  | 451 | *(parser->buffer.last++) = '\0'; | 
|---|
|  | 452 | parser->unread ++; | 
|---|
|  | 453 | return 1; | 
|---|
|  | 454 | } | 
|---|
|  | 455 |  | 
|---|
|  | 456 | } | 
|---|
|  | 457 |  | 
|---|
|  | 458 | return 1; | 
|---|
|  | 459 | } | 
|---|
|  | 460 |  | 
|---|