Actual source code: scanner.c

  2: /*
  3:  * Introduction
  4:  * ************
  5:  *
  6:  * The following notes assume that you are familiar with the YAML specification
  7:  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
  8:  * some cases we are less restrictive that it requires.
  9:  *
 10:  * The process of transforming a YAML stream into a sequence of events is
 11:  * divided on two steps: Scanning and Parsing.
 12:  *
 13:  * The Scanner transforms the input stream into a sequence of tokens, while the
 14:  * parser transform the sequence of tokens produced by the Scanner into a
 15:  * sequence of parsing events.
 16:  *
 17:  * The Scanner is rather clever and complicated. The Parser, on the contrary,
 18:  * is a straightforward implementation of a recursive-descendant parser (or,
 19:  * LL(1) parser, as it is usually called).
 20:  *
 21:  * Actually there are two issues of Scanning that might be called "clever", the
 22:  * rest is quite straightforward.  The issues are "block collection start" and
 23:  * "simple keys".  Both issues are explained below in details.
 24:  *
 25:  * Here the Scanning step is explained and implemented.  We start with the list
 26:  * of all the tokens produced by the Scanner together with short descriptions.
 27:  *
 28:  * Now, tokens:
 29:  *
 30:  *      STREAM-START(encoding)          # The stream start.
 31:  *      STREAM-END                      # The stream end.
 32:  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
 33:  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
 34:  *      DOCUMENT-START                  # '---'
 35:  *      DOCUMENT-END                    # '...'
 36:  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
 37:  *      BLOCK-MAPPING-START             # sequence or a block mapping.
 38:  *      BLOCK-END                       # Indentation decrease.
 39:  *      FLOW-SEQUENCE-START             # '['
 40:  *      FLOW-SEQUENCE-END               # ']'
 41:  *      FLOW-MAPPING-START              # '{'
 42:  *      FLOW-MAPPING-END                # '}'
 43:  *      BLOCK-ENTRY                     # '-'
 44:  *      FLOW-ENTRY                      # ','
 45:  *      KEY                             # '?' or nothing (simple keys).
 46:  *      VALUE                           # ':'
 47:  *      ALIAS(anchor)                   # '*anchor'
 48:  *      ANCHOR(anchor)                  # '&anchor'
 49:  *      TAG(handle,suffix)              # '!handle!suffix'
 50:  *      SCALAR(value,style)             # A scalar.
 51:  *
 52:  * The following two tokens are "virtual" tokens denoting the beginning and the
 53:  * end of the stream:
 54:  *
 55:  *      STREAM-START(encoding)
 56:  *      STREAM-END
 57:  *
 58:  * We pass the information about the input stream encoding with the
 59:  * STREAM-START token.
 60:  *
 61:  * The next two tokens are responsible for tags:
 62:  *
 63:  *      VERSION-DIRECTIVE(major,minor)
 64:  *      TAG-DIRECTIVE(handle,prefix)
 65:  *
 66:  * Example:
 67:  *
 68:  *      %YAML   1.1
 69:  *      %TAG    !   !foo
 70:  *      %TAG    !yaml!  tag:yaml.org,2002:
 71:  *      ---
 72:  *
 73:  * The corresponding sequence of tokens:
 74:  *
 75:  *      STREAM-START(utf-8)
 76:  *      VERSION-DIRECTIVE(1,1)
 77:  *      TAG-DIRECTIVE("!","!foo")
 78:  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
 79:  *      DOCUMENT-START
 80:  *      STREAM-END
 81:  *
 82:  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
 83:  * line.
 84:  *
 85:  * The document start and end indicators are represented by:
 86:  *
 87:  *      DOCUMENT-START
 88:  *      DOCUMENT-END
 89:  *
 90:  * Note that if a YAML stream contains an implicit document (without '---'
 91:  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
 92:  * produced.
 93:  *
 94:  * In the following examples, we present whole documents together with the
 95:  * produced tokens.
 96:  *
 97:  *      1. An implicit document:
 98:  *
 99:  *          'a scalar'
100:  *
101:  *      Tokens:
102:  *
103:  *          STREAM-START(utf-8)
104:  *          SCALAR("a scalar",single-quoted)
105:  *          STREAM-END
106:  *
107:  *      2. An explicit document:
108:  *
109:  *          ---
110:  *          'a scalar'
111:  *          ...
112:  *
113:  *      Tokens:
114:  *
115:  *          STREAM-START(utf-8)
116:  *          DOCUMENT-START
117:  *          SCALAR("a scalar",single-quoted)
118:  *          DOCUMENT-END
119:  *          STREAM-END
120:  *
121:  *      3. Several documents in a stream:
122:  *
123:  *          'a scalar'
124:  *          ---
125:  *          'another scalar'
126:  *          ---
127:  *          'yet another scalar'
128:  *
129:  *      Tokens:
130:  *
131:  *          STREAM-START(utf-8)
132:  *          SCALAR("a scalar",single-quoted)
133:  *          DOCUMENT-START
134:  *          SCALAR("another scalar",single-quoted)
135:  *          DOCUMENT-START
136:  *          SCALAR("yet another scalar",single-quoted)
137:  *          STREAM-END
138:  *
139:  * We have already introduced the SCALAR token above.  The following tokens are
140:  * used to describe aliases, anchors, tag, and scalars:
141:  *
142:  *      ALIAS(anchor)
143:  *      ANCHOR(anchor)
144:  *      TAG(handle,suffix)
145:  *      SCALAR(value,style)
146:  *
147:  * The following series of examples illustrate the usage of these tokens:
148:  *
149:  *      1. A recursive sequence:
150:  *
151:  *          &A [ *A ]
152:  *
153:  *      Tokens:
154:  *
155:  *          STREAM-START(utf-8)
156:  *          ANCHOR("A")
157:  *          FLOW-SEQUENCE-START
158:  *          ALIAS("A")
159:  *          FLOW-SEQUENCE-END
160:  *          STREAM-END
161:  *
162:  *      2. A tagged scalar:
163:  *
164:  *          !!float "3.14"  # A good approximation.
165:  *
166:  *      Tokens:
167:  *
168:  *          STREAM-START(utf-8)
169:  *          TAG("!!","float")
170:  *          SCALAR("3.14",double-quoted)
171:  *          STREAM-END
172:  *
173:  *      3. Various scalar styles:
174:  *
175:  *          --- # Implicit empty plain scalars do not produce tokens.
176:  *          --- a plain scalar
177:  *          --- 'a single-quoted scalar'
178:  *          --- "a double-quoted scalar"
179:  *          --- |-
180:  *            a literal scalar
181:  *          --- >-
182:  *            a folded
183:  *            scalar
184:  *
185:  *      Tokens:
186:  *
187:  *          STREAM-START(utf-8)
188:  *          DOCUMENT-START
189:  *          DOCUMENT-START
190:  *          SCALAR("a plain scalar",plain)
191:  *          DOCUMENT-START
192:  *          SCALAR("a single-quoted scalar",single-quoted)
193:  *          DOCUMENT-START
194:  *          SCALAR("a double-quoted scalar",double-quoted)
195:  *          DOCUMENT-START
196:  *          SCALAR("a literal scalar",literal)
197:  *          DOCUMENT-START
198:  *          SCALAR("a folded scalar",folded)
199:  *          STREAM-END
200:  *
201:  * Now it's time to review collection-related tokens. We will start with
202:  * flow collections:
203:  *
204:  *      FLOW-SEQUENCE-START
205:  *      FLOW-SEQUENCE-END
206:  *      FLOW-MAPPING-START
207:  *      FLOW-MAPPING-END
208:  *      FLOW-ENTRY
209:  *      KEY
210:  *      VALUE
211:  *
212:  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213:  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214:  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215:  * indicators '?' and ':', which are used for denoting mapping keys and values,
216:  * are represented by the KEY and VALUE tokens.
217:  *
218:  * The following examples show flow collections:
219:  *
220:  *      1. A flow sequence:
221:  *
222:  *          [item 1, item 2, item 3]
223:  *
224:  *      Tokens:
225:  *
226:  *          STREAM-START(utf-8)
227:  *          FLOW-SEQUENCE-START
228:  *          SCALAR("item 1",plain)
229:  *          FLOW-ENTRY
230:  *          SCALAR("item 2",plain)
231:  *          FLOW-ENTRY
232:  *          SCALAR("item 3",plain)
233:  *          FLOW-SEQUENCE-END
234:  *          STREAM-END
235:  *
236:  *      2. A flow mapping:
237:  *
238:  *          {
239:  *              a simple key: a value,  # Note that the KEY token is produced.
240:  *              ? a complex key: another value,
241:  *          }
242:  *
243:  *      Tokens:
244:  *
245:  *          STREAM-START(utf-8)
246:  *          FLOW-MAPPING-START
247:  *          KEY
248:  *          SCALAR("a simple key",plain)
249:  *          VALUE
250:  *          SCALAR("a value",plain)
251:  *          FLOW-ENTRY
252:  *          KEY
253:  *          SCALAR("a complex key",plain)
254:  *          VALUE
255:  *          SCALAR("another value",plain)
256:  *          FLOW-ENTRY
257:  *          FLOW-MAPPING-END
258:  *          STREAM-END
259:  *
260:  * A simple key is a key which is not denoted by the '?' indicator.  Note that
261:  * the Scanner still produce the KEY token whenever it encounters a simple key.
262:  *
263:  * For scanning block collections, the following tokens are used (note that we
264:  * repeat KEY and VALUE here):
265:  *
266:  *      BLOCK-SEQUENCE-START
267:  *      BLOCK-MAPPING-START
268:  *      BLOCK-END
269:  *      BLOCK-ENTRY
270:  *      KEY
271:  *      VALUE
272:  *
273:  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274:  * increase that precedes a block collection (cf. the INDENT token in Python).
275:  * The token BLOCK-END denote indentation decrease that ends a block collection
276:  * (cf. the DEDENT token in Python).  However YAML has some syntax peculiarities
277:  * that makes detections of these tokens more complex.
278:  *
279:  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280:  * '-', '?', and ':' correspondingly.
281:  *
282:  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283:  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284:  *
285:  *      1. Block sequences:
286:  *
287:  *          - item 1
288:  *          - item 2
289:  *          -
290:  *            - item 3.1
291:  *            - item 3.2
292:  *          -
293:  *            key 1: value 1
294:  *            key 2: value 2
295:  *
296:  *      Tokens:
297:  *
298:  *          STREAM-START(utf-8)
299:  *          BLOCK-SEQUENCE-START
300:  *          BLOCK-ENTRY
301:  *          SCALAR("item 1",plain)
302:  *          BLOCK-ENTRY
303:  *          SCALAR("item 2",plain)
304:  *          BLOCK-ENTRY
305:  *          BLOCK-SEQUENCE-START
306:  *          BLOCK-ENTRY
307:  *          SCALAR("item 3.1",plain)
308:  *          BLOCK-ENTRY
309:  *          SCALAR("item 3.2",plain)
310:  *          BLOCK-END
311:  *          BLOCK-ENTRY
312:  *          BLOCK-MAPPING-START
313:  *          KEY
314:  *          SCALAR("key 1",plain)
315:  *          VALUE
316:  *          SCALAR("value 1",plain)
317:  *          KEY
318:  *          SCALAR("key 2",plain)
319:  *          VALUE
320:  *          SCALAR("value 2",plain)
321:  *          BLOCK-END
322:  *          BLOCK-END
323:  *          STREAM-END
324:  *
325:  *      2. Block mappings:
326:  *
327:  *          a simple key: a value   # The KEY token is produced here.
328:  *          ? a complex key
329:  *          : another value
330:  *          a mapping:
331:  *            key 1: value 1
332:  *            key 2: value 2
333:  *          a sequence:
334:  *            - item 1
335:  *            - item 2
336:  *
337:  *      Tokens:
338:  *
339:  *          STREAM-START(utf-8)
340:  *          BLOCK-MAPPING-START
341:  *          KEY
342:  *          SCALAR("a simple key",plain)
343:  *          VALUE
344:  *          SCALAR("a value",plain)
345:  *          KEY
346:  *          SCALAR("a complex key",plain)
347:  *          VALUE
348:  *          SCALAR("another value",plain)
349:  *          KEY
350:  *          SCALAR("a mapping",plain)
351:  *          VALUE
352:  *          BLOCK-MAPPING-START
353:  *          KEY
354:  *          SCALAR("key 1",plain)
355:  *          VALUE
356:  *          SCALAR("value 1",plain)
357:  *          KEY
358:  *          SCALAR("key 2",plain)
359:  *          VALUE
360:  *          SCALAR("value 2",plain)
361:  *          BLOCK-END
362:  *          KEY
363:  *          SCALAR("a sequence",plain)
364:  *          VALUE
365:  *          BLOCK-SEQUENCE-START
366:  *          BLOCK-ENTRY
367:  *          SCALAR("item 1",plain)
368:  *          BLOCK-ENTRY
369:  *          SCALAR("item 2",plain)
370:  *          BLOCK-END
371:  *          BLOCK-END
372:  *          STREAM-END
373:  *
374:  * YAML does not always require to start a new block collection from a new
375:  * line.  If the current line contains only '-', '?', and ':' indicators, a new
376:  * block collection may start at the current line.  The following examples
377:  * illustrate this case:
378:  *
379:  *      1. Collections in a sequence:
380:  *
381:  *          - - item 1
382:  *            - item 2
383:  *          - key 1: value 1
384:  *            key 2: value 2
385:  *          - ? complex key
386:  *            : complex value
387:  *
388:  *      Tokens:
389:  *
390:  *          STREAM-START(utf-8)
391:  *          BLOCK-SEQUENCE-START
392:  *          BLOCK-ENTRY
393:  *          BLOCK-SEQUENCE-START
394:  *          BLOCK-ENTRY
395:  *          SCALAR("item 1",plain)
396:  *          BLOCK-ENTRY
397:  *          SCALAR("item 2",plain)
398:  *          BLOCK-END
399:  *          BLOCK-ENTRY
400:  *          BLOCK-MAPPING-START
401:  *          KEY
402:  *          SCALAR("key 1",plain)
403:  *          VALUE
404:  *          SCALAR("value 1",plain)
405:  *          KEY
406:  *          SCALAR("key 2",plain)
407:  *          VALUE
408:  *          SCALAR("value 2",plain)
409:  *          BLOCK-END
410:  *          BLOCK-ENTRY
411:  *          BLOCK-MAPPING-START
412:  *          KEY
413:  *          SCALAR("complex key")
414:  *          VALUE
415:  *          SCALAR("complex value")
416:  *          BLOCK-END
417:  *          BLOCK-END
418:  *          STREAM-END
419:  *
420:  *      2. Collections in a mapping:
421:  *
422:  *          ? a sequence
423:  *          : - item 1
424:  *            - item 2
425:  *          ? a mapping
426:  *          : key 1: value 1
427:  *            key 2: value 2
428:  *
429:  *      Tokens:
430:  *
431:  *          STREAM-START(utf-8)
432:  *          BLOCK-MAPPING-START
433:  *          KEY
434:  *          SCALAR("a sequence",plain)
435:  *          VALUE
436:  *          BLOCK-SEQUENCE-START
437:  *          BLOCK-ENTRY
438:  *          SCALAR("item 1",plain)
439:  *          BLOCK-ENTRY
440:  *          SCALAR("item 2",plain)
441:  *          BLOCK-END
442:  *          KEY
443:  *          SCALAR("a mapping",plain)
444:  *          VALUE
445:  *          BLOCK-MAPPING-START
446:  *          KEY
447:  *          SCALAR("key 1",plain)
448:  *          VALUE
449:  *          SCALAR("value 1",plain)
450:  *          KEY
451:  *          SCALAR("key 2",plain)
452:  *          VALUE
453:  *          SCALAR("value 2",plain)
454:  *          BLOCK-END
455:  *          BLOCK-END
456:  *          STREAM-END
457:  *
458:  * YAML also permits non-indented sequences if they are included into a block
459:  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
460:  *
461:  *      key:
462:  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
463:  *      - item 2
464:  *
465:  * Tokens:
466:  *
467:  *      STREAM-START(utf-8)
468:  *      BLOCK-MAPPING-START
469:  *      KEY
470:  *      SCALAR("key",plain)
471:  *      VALUE
472:  *      BLOCK-ENTRY
473:  *      SCALAR("item 1",plain)
474:  *      BLOCK-ENTRY
475:  *      SCALAR("item 2",plain)
476:  *      BLOCK-END
477:  */

479: #include "yaml_private.h"

481: /*
482:  * Ensure that the buffer contains the required number of characters.
483:  * Return 1 on success, 0 on failure (reader error or memory error).
484:  */

486: #define CACHE(parser,length)                                                    \
487:     (parser->unread >= (length)                                                 \
488:         ? 1                                                                     \
489:         : yaml_parser_update_buffer(parser, (length)))

491: /*
492:  * Advance the buffer pointer.
493:  */

495: #define SKIP(parser)                                                            \
496:      (parser->mark.index ++,                                                    \
497:       parser->mark.column ++,                                                   \
498:       parser->unread --,                                                        \
499:       parser->buffer.pointer += WIDTH(parser->buffer))

501: #define SKIP_LINE(parser)                                                       \
502:      (IS_CRLF(parser->buffer) ?                                                 \
503:       (parser->mark.index += 2,                                                 \
504:        parser->mark.column = 0,                                                 \
505:        parser->mark.line ++,                                                    \
506:        parser->unread -= 2,                                                     \
507:        parser->buffer.pointer += 2) :                                           \
508:       IS_BREAK(parser->buffer) ?                                                \
509:       (parser->mark.index ++,                                                   \
510:        parser->mark.column = 0,                                                 \
511:        parser->mark.line ++,                                                    \
512:        parser->unread --,                                                       \
513:        parser->buffer.pointer += WIDTH(parser->buffer)) : NULL)

515: /*
516:  * Copy a character to a string buffer and advance pointers.
517:  */

519: #define READ(parser,string)                                                     \
520:      (STRING_EXTEND(parser,string) ?                                            \
521:          (COPY(string,parser->buffer),                                          \
522:           parser->mark.index ++,                                                \
523:           parser->mark.column ++,                                               \
524:           parser->unread --,                                                    \
525:           1) : 0)

527: /*
528:  * Copy a line break character to a string buffer and advance pointers.
529:  */

531: #define READ_LINE(parser,string)                                                \
532:     (STRING_EXTEND(parser,string) ?                                             \
533:     (((CHECK_AT(parser->buffer,'\r',0)                                          \
534:        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
535:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
536:       parser->buffer.pointer += 2,                                              \
537:       parser->mark.index += 2,                                                  \
538:       parser->mark.column = 0,                                                  \
539:       parser->mark.line ++,                                                     \
540:       parser->unread -= 2) :                                                    \
541:      (CHECK_AT(parser->buffer,'\r',0)                                           \
542:       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
543:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
544:       parser->buffer.pointer ++,                                                \
545:       parser->mark.index ++,                                                    \
546:       parser->mark.column = 0,                                                  \
547:       parser->mark.line ++,                                                     \
548:       parser->unread --) :                                                      \
549:      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
550:       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
551:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
552:       parser->buffer.pointer += 2,                                              \
553:       parser->mark.index ++,                                                    \
554:       parser->mark.column = 0,                                                  \
555:       parser->mark.line ++,                                                     \
556:       parser->unread --) :                                                      \
557:      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
558:       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
559:       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
560:        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
561:      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
562:       *((string).pointer++) = *(parser->buffer.pointer++),                      \
563:       *((string).pointer++) = *(parser->buffer.pointer++),                      \
564:       parser->mark.index ++,                                                    \
565:       parser->mark.column = 0,                                                  \
566:       parser->mark.line ++,                                                     \
567:       parser->unread --) : 0),                                                  \
568:     1) : 0)

570: /*
571:  * Public API declarations.
572:  */

574: YAML_DECLARE(int)
575: yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);

577: /*
578:  * Error handling.
579:  */

581: static int
582: yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583:         yaml_mark_t context_mark, const char *problem);

585: /*
586:  * High-level token API.
587:  */

589: YAML_DECLARE(int)
590: yaml_parser_fetch_more_tokens(yaml_parser_t *parser);

592: static int
593: yaml_parser_fetch_next_token(yaml_parser_t *parser);

595: /*
596:  * Potential simple keys.
597:  */

599: static int
600: yaml_parser_stale_simple_keys(yaml_parser_t *parser);

602: static int
603: yaml_parser_save_simple_key(yaml_parser_t *parser);

605: static int
606: yaml_parser_remove_simple_key(yaml_parser_t *parser);

608: static int
609: yaml_parser_increase_flow_level(yaml_parser_t *parser);

611: static int
612: yaml_parser_decrease_flow_level(yaml_parser_t *parser);

614: /*
615:  * Indentation treatment.
616:  */

618: static int
619: yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620:         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);

622: static int
623: yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);

625: /*
626:  * Token fetchers.
627:  */

629: static int
630: yaml_parser_fetch_stream_start(yaml_parser_t *parser);

632: static int
633: yaml_parser_fetch_stream_end(yaml_parser_t *parser);

635: static int
636: yaml_parser_fetch_directive(yaml_parser_t *parser);

638: static int
639: yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640:         yaml_token_type_t type);

642: static int
643: yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644:         yaml_token_type_t type);

646: static int
647: yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648:         yaml_token_type_t type);

650: static int
651: yaml_parser_fetch_flow_entry(yaml_parser_t *parser);

653: static int
654: yaml_parser_fetch_block_entry(yaml_parser_t *parser);

656: static int
657: yaml_parser_fetch_key(yaml_parser_t *parser);

659: static int
660: yaml_parser_fetch_value(yaml_parser_t *parser);

662: static int
663: yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);

665: static int
666: yaml_parser_fetch_tag(yaml_parser_t *parser);

668: static int
669: yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);

671: static int
672: yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);

674: static int
675: yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);

677: /*
678:  * Token scanners.
679:  */

681: static int
682: yaml_parser_scan_to_next_token(yaml_parser_t *parser);

684: static int
685: yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);

687: static int
688: yaml_parser_scan_directive_name(yaml_parser_t *parser,
689:         yaml_mark_t start_mark, yaml_char_t **name);

691: static int
692: yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693:         yaml_mark_t start_mark, int *major, int *minor);

695: static int
696: yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697:         yaml_mark_t start_mark, int *number);

699: static int
700: yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701:         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);

703: static int
704: yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705:         yaml_token_type_t type);

707: static int
708: yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);

710: static int
711: yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712:         yaml_mark_t start_mark, yaml_char_t **handle);

714: static int
715: yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716:         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);

718: static int
719: yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720:         yaml_mark_t start_mark, yaml_string_t *string);

722: static int
723: yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724:         int literal);

726: static int
727: yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728:         int *indent, yaml_string_t *breaks,
729:         yaml_mark_t start_mark, yaml_mark_t *end_mark);

731: static int
732: yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733:         int single);

735: static int
736: yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);

738: /*
739:  * Get the next token.
740:  */

742: YAML_DECLARE(int)
743: yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
744: {
745:     assert(parser); /* Non-NULL parser object is expected. */
746:     assert(token);  /* Non-NULL token object is expected. */

748:     /* Erase the token object. */

750:     memset(token, 0, sizeof(yaml_token_t));

752:     /* No tokens after STREAM-END or error. */

754:     if (parser->stream_end_produced || parser->error) {
755:         return 1;
756:     }

758:     /* Ensure that the tokens queue contains enough tokens. */

760:     if (!parser->token_available) {
761:         if (!yaml_parser_fetch_more_tokens(parser))
762:             return 0;
763:     }

765:     /* Fetch the next token from the queue. */

767:     *token = DEQUEUE(parser, parser->tokens);
768:     parser->token_available = 0;
769:     parser->tokens_parsed ++;

771:     if (token->type == YAML_STREAM_END_TOKEN) {
772:         parser->stream_end_produced = 1;
773:     }

775:     return 1;
776: }

778: /*
779:  * Set the scanner error and return 0.
780:  */

782: static int
783: yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
784:         yaml_mark_t context_mark, const char *problem)
785: {
786:     parser->error = YAML_SCANNER_ERROR;
787:     parser->context = context;
788:     parser->context_mark = context_mark;
789:     parser->problem = problem;
790:     parser->problem_mark = parser->mark;

792:     return 0;
793: }

795: /*
796:  * Ensure that the tokens queue contains at least one token which can be
797:  * returned to the Parser.
798:  */

800: YAML_DECLARE(int)
801: yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
802: {
803:     int need_more_tokens;

805:     /* While we need more tokens to fetch, do it. */

807:     while (1)
808:     {
809:         /*
810:          * Check if we really need to fetch more tokens.
811:          */

813:         need_more_tokens = 0;

815:         if (parser->tokens.head == parser->tokens.tail)
816:         {
817:             /* Queue is empty. */

819:             need_more_tokens = 1;
820:         }
821:         else
822:         {
823:             yaml_simple_key_t *simple_key;

825:             /* Check if any potential simple key may occupy the head position. */

827:             if (!yaml_parser_stale_simple_keys(parser))
828:                 return 0;

830:             for (simple_key = parser->simple_keys.start;
831:                     simple_key != parser->simple_keys.top; simple_key++) {
832:                 if (simple_key->possible
833:                         && simple_key->token_number == parser->tokens_parsed) {
834:                     need_more_tokens = 1;
835:                     break;
836:                 }
837:             }
838:         }

840:         /* We are finished. */

842:         if (!need_more_tokens)
843:             break;

845:         /* Fetch the next token. */

847:         if (!yaml_parser_fetch_next_token(parser))
848:             return 0;
849:     }

851:     parser->token_available = 1;

853:     return 1;
854: }

856: /*
857:  * The dispatcher for token fetchers.
858:  */

860: static int
861: yaml_parser_fetch_next_token(yaml_parser_t *parser)
862: {
863:     /* Ensure that the buffer is initialized. */

865:     if (!CACHE(parser, 1))
866:         return 0;

868:     /* Check if we just started scanning.  Fetch STREAM-START then. */

870:     if (!parser->stream_start_produced)
871:         return yaml_parser_fetch_stream_start(parser);

873:     /* Eat whitespaces and comments until we reach the next token. */

875:     if (!yaml_parser_scan_to_next_token(parser))
876:         return 0;

878:     /* Remove obsolete potential simple keys. */

880:     if (!yaml_parser_stale_simple_keys(parser))
881:         return 0;

883:     /* Check the indentation level against the current column. */

885:     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
886:         return 0;

888:     /*
889:      * Ensure that the buffer contains at least 4 characters.  4 is the length
890:      * of the longest indicators ('--- ' and '... ').
891:      */

893:     if (!CACHE(parser, 4))
894:         return 0;

896:     /* Is it the end of the stream? */

898:     if (IS_Z(parser->buffer))
899:         return yaml_parser_fetch_stream_end(parser);

901:     /* Is it a directive? */

903:     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
904:         return yaml_parser_fetch_directive(parser);

906:     /* Is it the document start indicator? */

908:     if (parser->mark.column == 0
909:             && CHECK_AT(parser->buffer, '-', 0)
910:             && CHECK_AT(parser->buffer, '-', 1)
911:             && CHECK_AT(parser->buffer, '-', 2)
912:             && IS_BLANKZ_AT(parser->buffer, 3))
913:         return yaml_parser_fetch_document_indicator(parser,
914:                 YAML_DOCUMENT_START_TOKEN);

916:     /* Is it the document end indicator? */

918:     if (parser->mark.column == 0
919:             && CHECK_AT(parser->buffer, '.', 0)
920:             && CHECK_AT(parser->buffer, '.', 1)
921:             && CHECK_AT(parser->buffer, '.', 2)
922:             && IS_BLANKZ_AT(parser->buffer, 3))
923:         return yaml_parser_fetch_document_indicator(parser,
924:                 YAML_DOCUMENT_END_TOKEN);

926:     /* Is it the flow sequence start indicator? */

928:     if (CHECK(parser->buffer, '['))
929:         return yaml_parser_fetch_flow_collection_start(parser,
930:                 YAML_FLOW_SEQUENCE_START_TOKEN);

932:     /* Is it the flow mapping start indicator? */

934:     if (CHECK(parser->buffer, '{'))
935:         return yaml_parser_fetch_flow_collection_start(parser,
936:                 YAML_FLOW_MAPPING_START_TOKEN);

938:     /* Is it the flow sequence end indicator? */

940:     if (CHECK(parser->buffer, ']'))
941:         return yaml_parser_fetch_flow_collection_end(parser,
942:                 YAML_FLOW_SEQUENCE_END_TOKEN);

944:     /* Is it the flow mapping end indicator? */

946:     if (CHECK(parser->buffer, '}'))
947:         return yaml_parser_fetch_flow_collection_end(parser,
948:                 YAML_FLOW_MAPPING_END_TOKEN);

950:     /* Is it the flow entry indicator? */

952:     if (CHECK(parser->buffer, ','))
953:         return yaml_parser_fetch_flow_entry(parser);

955:     /* Is it the block entry indicator? */

957:     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
958:         return yaml_parser_fetch_block_entry(parser);

960:     /* Is it the key indicator? */

962:     if (CHECK(parser->buffer, '?')
963:             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
964:         return yaml_parser_fetch_key(parser);

966:     /* Is it the value indicator? */

968:     if (CHECK(parser->buffer, ':')
969:             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
970:         return yaml_parser_fetch_value(parser);

972:     /* Is it an alias? */

974:     if (CHECK(parser->buffer, '*'))
975:         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);

977:     /* Is it an anchor? */

979:     if (CHECK(parser->buffer, '&'))
980:         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);

982:     /* Is it a tag? */

984:     if (CHECK(parser->buffer, '!'))
985:         return yaml_parser_fetch_tag(parser);

987:     /* Is it a literal scalar? */

989:     if (CHECK(parser->buffer, '|') && !parser->flow_level)
990:         return yaml_parser_fetch_block_scalar(parser, 1);

992:     /* Is it a folded scalar? */

994:     if (CHECK(parser->buffer, '>') && !parser->flow_level)
995:         return yaml_parser_fetch_block_scalar(parser, 0);

997:     /* Is it a single-quoted scalar? */

999:     if (CHECK(parser->buffer, '\''))
1000:         return yaml_parser_fetch_flow_scalar(parser, 1);

1002:     /* Is it a double-quoted scalar? */

1004:     if (CHECK(parser->buffer, '"'))
1005:         return yaml_parser_fetch_flow_scalar(parser, 0);

1007:     /*
1008:      * Is it a plain scalar?
1009:      *
1010:      * A plain scalar may start with any non-blank characters except
1011:      *
1012:      *      '-', '?', ':', ',', '[', ']', '{', '}',
1013:      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1014:      *      '%', '@', '`'.
1015:      *
1016:      * In the block context (and, for the '-' indicator, in the flow context
1017:      * too), it may also start with the characters
1018:      *
1019:      *      '-', '?', ':'
1020:      *
1021:      * if it is followed by a non-space character.
1022:      *
1023:      * The last rule is more restrictive than the specification requires.
1024:      */

1026:     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1027:                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1028:                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1029:                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1030:                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1031:                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1032:                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1033:                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1034:                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1035:                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1036:             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1037:             (!parser->flow_level &&
1038:              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1039:              && !IS_BLANKZ_AT(parser->buffer, 1)))
1040:         return yaml_parser_fetch_plain_scalar(parser);

1042:     /*
1043:      * If we don't determine the token type so far, it is an error.
1044:      */

1046:     return yaml_parser_set_scanner_error(parser,
1047:             "while scanning for the next token", parser->mark,
1048:             "found character that cannot start any token");
1049: }

1051: /*
1052:  * Check the list of potential simple keys and remove the positions that
1053:  * cannot contain simple keys anymore.
1054:  */

1056: static int
1057: yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1058: {
1059:     yaml_simple_key_t *simple_key;

1061:     /* Check for a potential simple key for each flow level. */

1063:     for (simple_key = parser->simple_keys.start;
1064:             simple_key != parser->simple_keys.top; simple_key ++)
1065:     {
1066:         /*
1067:          * The specification requires that a simple key
1068:          *
1069:          *  - is limited to a single line,
1070:          *  - is shorter than 1024 characters.
1071:          */

1073:         if (simple_key->possible
1074:                 && (simple_key->mark.line < parser->mark.line
1075:                     || simple_key->mark.index+1024 < parser->mark.index)) {

1077:             /* Check if the potential simple key to be removed is required. */

1079:             if (simple_key->required) {
1080:                 return yaml_parser_set_scanner_error(parser,
1081:                         "while scanning a simple key", simple_key->mark,
1082:                         "could not find expected ':'");
1083:             }

1085:             simple_key->possible = 0;
1086:         }
1087:     }

1089:     return 1;
1090: }

1092: /*
1093:  * Check if a simple key may start at the current position and add it if
1094:  * needed.
1095:  */

1097: static int
1098: yaml_parser_save_simple_key(yaml_parser_t *parser)
1099: {
1100:     /*
1101:      * A simple key is required at the current position if the scanner is in
1102:      * the block context and the current column coincides with the indentation
1103:      * level.
1104:      */

1106:     int required = (!parser->flow_level
1107:             && parser->indent == (ptrdiff_t)parser->mark.column);

1109:     /*
1110:      * If the current position may start a simple key, save it.
1111:      */

1113:     if (parser->simple_key_allowed)
1114:     {
1115:         yaml_simple_key_t simple_key;
1116:         simple_key.possible = 1;
1117:         simple_key.required = required;
1118:         simple_key.token_number =
1119:             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1120:         simple_key.mark = parser->mark;

1122:         if (!yaml_parser_remove_simple_key(parser)) return 0;

1124:         *(parser->simple_keys.top-1) = simple_key;
1125:     }

1127:     return 1;
1128: }

1130: /*
1131:  * Remove a potential simple key at the current flow level.
1132:  */

1134: static int
1135: yaml_parser_remove_simple_key(yaml_parser_t *parser)
1136: {
1137:     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;

1139:     if (simple_key->possible)
1140:     {
1141:         /* If the key is required, it is an error. */

1143:         if (simple_key->required) {
1144:             return yaml_parser_set_scanner_error(parser,
1145:                     "while scanning a simple key", simple_key->mark,
1146:                     "could not find expected ':'");
1147:         }
1148:     }

1150:     /* Remove the key from the stack. */

1152:     simple_key->possible = 0;

1154:     return 1;
1155: }

1157: /*
1158:  * Increase the flow level and resize the simple key list if needed.
1159:  */

1161: static int
1162: yaml_parser_increase_flow_level(yaml_parser_t *parser)
1163: {
1164:     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };

1166:     /* Reset the simple key on the next level. */

1168:     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1169:         return 0;

1171:     /* Increase the flow level. */

1173:     if (parser->flow_level == INT_MAX) {
1174:         parser->error = YAML_MEMORY_ERROR;
1175:         return 0;
1176:     }

1178:     parser->flow_level++;

1180:     return 1;
1181: }

1183: /*
1184:  * Decrease the flow level.
1185:  */

1187: static int
1188: yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1189: {
1190:     if (parser->flow_level) {
1191:         parser->flow_level --;
1192:         (void)POP(parser, parser->simple_keys);
1193:     }

1195:     return 1;
1196: }

1198: /*
1199:  * Push the current indentation level to the stack and set the new level
1200:  * the current column is greater than the indentation level.  In this case,
1201:  * append or insert the specified token into the token queue.
1202:  *
1203:  */

1205: static int
1206: yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1207:         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1208: {
1209:     yaml_token_t token;

1211:     /* In the flow context, do nothing. */

1213:     if (parser->flow_level)
1214:         return 1;

1216:     if (parser->indent < column)
1217:     {
1218:         /*
1219:          * Push the current indentation level to the stack and set the new
1220:          * indentation level.
1221:          */

1223:         if (!PUSH(parser, parser->indents, parser->indent))
1224:             return 0;

1226:         if (column > INT_MAX) {
1227:             parser->error = YAML_MEMORY_ERROR;
1228:             return 0;
1229:         }

1231:         parser->indent = column;

1233:         /* Create a token and insert it into the queue. */

1235:         TOKEN_INIT(token, type, mark, mark);

1237:         if (number == -1) {
1238:             if (!ENQUEUE(parser, parser->tokens, token))
1239:                 return 0;
1240:         }
1241:         else {
1242:             if (!QUEUE_INSERT(parser,
1243:                         parser->tokens, number - parser->tokens_parsed, token))
1244:                 return 0;
1245:         }
1246:     }

1248:     return 1;
1249: }

1251: /*
1252:  * Pop indentation levels from the indents stack until the current level
1253:  * becomes less or equal to the column.  For each indentation level, append
1254:  * the BLOCK-END token.
1255:  */


1258: static int
1259: yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1260: {
1261:     yaml_token_t token;

1263:     /* In the flow context, do nothing. */

1265:     if (parser->flow_level)
1266:         return 1;

1268:     /* Loop through the indentation levels in the stack. */

1270:     while (parser->indent > column)
1271:     {
1272:         /* Create a token and append it to the queue. */

1274:         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);

1276:         if (!ENQUEUE(parser, parser->tokens, token))
1277:             return 0;

1279:         /* Pop the indentation level. */

1281:         parser->indent = POP(parser, parser->indents);
1282:     }

1284:     return 1;
1285: }

1287: /*
1288:  * Initialize the scanner and produce the STREAM-START token.
1289:  */

1291: static int
1292: yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1293: {
1294:     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1295:     yaml_token_t token;

1297:     /* Set the initial indentation. */

1299:     parser->indent = -1;

1301:     /* Initialize the simple key stack. */

1303:     if (!PUSH(parser, parser->simple_keys, simple_key))
1304:         return 0;

1306:     /* A simple key is allowed at the beginning of the stream. */

1308:     parser->simple_key_allowed = 1;

1310:     /* We have started. */

1312:     parser->stream_start_produced = 1;

1314:     /* Create the STREAM-START token and append it to the queue. */

1316:     STREAM_START_TOKEN_INIT(token, parser->encoding,
1317:             parser->mark, parser->mark);

1319:     if (!ENQUEUE(parser, parser->tokens, token))
1320:         return 0;

1322:     return 1;
1323: }

1325: /*
1326:  * Produce the STREAM-END token and shut down the scanner.
1327:  */

1329: static int
1330: yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1331: {
1332:     yaml_token_t token;

1334:     /* Force new line. */

1336:     if (parser->mark.column != 0) {
1337:         parser->mark.column = 0;
1338:         parser->mark.line ++;
1339:     }

1341:     /* Reset the indentation level. */

1343:     if (!yaml_parser_unroll_indent(parser, -1))
1344:         return 0;

1346:     /* Reset simple keys. */

1348:     if (!yaml_parser_remove_simple_key(parser))
1349:         return 0;

1351:     parser->simple_key_allowed = 0;

1353:     /* Create the STREAM-END token and append it to the queue. */

1355:     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);

1357:     if (!ENQUEUE(parser, parser->tokens, token))
1358:         return 0;

1360:     return 1;
1361: }

1363: /*
1364:  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1365:  */

1367: static int
1368: yaml_parser_fetch_directive(yaml_parser_t *parser)
1369: {
1370:     yaml_token_t token;

1372:     /* Reset the indentation level. */

1374:     if (!yaml_parser_unroll_indent(parser, -1))
1375:         return 0;

1377:     /* Reset simple keys. */

1379:     if (!yaml_parser_remove_simple_key(parser))
1380:         return 0;

1382:     parser->simple_key_allowed = 0;

1384:     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */

1386:     if (!yaml_parser_scan_directive(parser, &token))
1387:         return 0;

1389:     /* Append the token to the queue. */

1391:     if (!ENQUEUE(parser, parser->tokens, token)) {
1392:         yaml_token_delete(&token);
1393:         return 0;
1394:     }

1396:     return 1;
1397: }

1399: /*
1400:  * Produce the DOCUMENT-START or DOCUMENT-END token.
1401:  */

1403: static int
1404: yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1405:         yaml_token_type_t type)
1406: {
1407:     yaml_mark_t start_mark, end_mark;
1408:     yaml_token_t token;

1410:     /* Reset the indentation level. */

1412:     if (!yaml_parser_unroll_indent(parser, -1))
1413:         return 0;

1415:     /* Reset simple keys. */

1417:     if (!yaml_parser_remove_simple_key(parser))
1418:         return 0;

1420:     parser->simple_key_allowed = 0;

1422:     /* Consume the token. */

1424:     start_mark = parser->mark;

1426:     SKIP(parser);
1427:     SKIP(parser);
1428:     SKIP(parser);

1430:     end_mark = parser->mark;

1432:     /* Create the DOCUMENT-START or DOCUMENT-END token. */

1434:     TOKEN_INIT(token, type, start_mark, end_mark);

1436:     /* Append the token to the queue. */

1438:     if (!ENQUEUE(parser, parser->tokens, token))
1439:         return 0;

1441:     return 1;
1442: }

1444: /*
1445:  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1446:  */

1448: static int
1449: yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1450:         yaml_token_type_t type)
1451: {
1452:     yaml_mark_t start_mark, end_mark;
1453:     yaml_token_t token;

1455:     /* The indicators '[' and '{' may start a simple key. */

1457:     if (!yaml_parser_save_simple_key(parser))
1458:         return 0;

1460:     /* Increase the flow level. */

1462:     if (!yaml_parser_increase_flow_level(parser))
1463:         return 0;

1465:     /* A simple key may follow the indicators '[' and '{'. */

1467:     parser->simple_key_allowed = 1;

1469:     /* Consume the token. */

1471:     start_mark = parser->mark;
1472:     SKIP(parser);
1473:     end_mark = parser->mark;

1475:     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */

1477:     TOKEN_INIT(token, type, start_mark, end_mark);

1479:     /* Append the token to the queue. */

1481:     if (!ENQUEUE(parser, parser->tokens, token))
1482:         return 0;

1484:     return 1;
1485: }

1487: /*
1488:  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1489:  */

1491: static int
1492: yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1493:         yaml_token_type_t type)
1494: {
1495:     yaml_mark_t start_mark, end_mark;
1496:     yaml_token_t token;

1498:     /* Reset any potential simple key on the current flow level. */

1500:     if (!yaml_parser_remove_simple_key(parser))
1501:         return 0;

1503:     /* Decrease the flow level. */

1505:     if (!yaml_parser_decrease_flow_level(parser))
1506:         return 0;

1508:     /* No simple keys after the indicators ']' and '}'. */

1510:     parser->simple_key_allowed = 0;

1512:     /* Consume the token. */

1514:     start_mark = parser->mark;
1515:     SKIP(parser);
1516:     end_mark = parser->mark;

1518:     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */

1520:     TOKEN_INIT(token, type, start_mark, end_mark);

1522:     /* Append the token to the queue. */

1524:     if (!ENQUEUE(parser, parser->tokens, token))
1525:         return 0;

1527:     return 1;
1528: }

1530: /*
1531:  * Produce the FLOW-ENTRY token.
1532:  */

1534: static int
1535: yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1536: {
1537:     yaml_mark_t start_mark, end_mark;
1538:     yaml_token_t token;

1540:     /* Reset any potential simple keys on the current flow level. */

1542:     if (!yaml_parser_remove_simple_key(parser))
1543:         return 0;

1545:     /* Simple keys are allowed after ','. */

1547:     parser->simple_key_allowed = 1;

1549:     /* Consume the token. */

1551:     start_mark = parser->mark;
1552:     SKIP(parser);
1553:     end_mark = parser->mark;

1555:     /* Create the FLOW-ENTRY token and append it to the queue. */

1557:     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);

1559:     if (!ENQUEUE(parser, parser->tokens, token))
1560:         return 0;

1562:     return 1;
1563: }

1565: /*
1566:  * Produce the BLOCK-ENTRY token.
1567:  */

1569: static int
1570: yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1571: {
1572:     yaml_mark_t start_mark, end_mark;
1573:     yaml_token_t token;

1575:     /* Check if the scanner is in the block context. */

1577:     if (!parser->flow_level)
1578:     {
1579:         /* Check if we are allowed to start a new entry. */

1581:         if (!parser->simple_key_allowed) {
1582:             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1583:                     "block sequence entries are not allowed in this context");
1584:         }

1586:         /* Add the BLOCK-SEQUENCE-START token if needed. */

1588:         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1589:                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1590:             return 0;
1591:     }
1592:     else
1593:     {
1594:         /*
1595:          * It is an error for the '-' indicator to occur in the flow context,
1596:          * but we let the Parser detect and report about it because the Parser
1597:          * is able to point to the context.
1598:          */
1599:     }

1601:     /* Reset any potential simple keys on the current flow level. */

1603:     if (!yaml_parser_remove_simple_key(parser))
1604:         return 0;

1606:     /* Simple keys are allowed after '-'. */

1608:     parser->simple_key_allowed = 1;

1610:     /* Consume the token. */

1612:     start_mark = parser->mark;
1613:     SKIP(parser);
1614:     end_mark = parser->mark;

1616:     /* Create the BLOCK-ENTRY token and append it to the queue. */

1618:     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);

1620:     if (!ENQUEUE(parser, parser->tokens, token))
1621:         return 0;

1623:     return 1;
1624: }

1626: /*
1627:  * Produce the KEY token.
1628:  */

1630: static int
1631: yaml_parser_fetch_key(yaml_parser_t *parser)
1632: {
1633:     yaml_mark_t start_mark, end_mark;
1634:     yaml_token_t token;

1636:     /* In the block context, additional checks are required. */

1638:     if (!parser->flow_level)
1639:     {
1640:         /* Check if we are allowed to start a new key (not necessary simple). */

1642:         if (!parser->simple_key_allowed) {
1643:             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1644:                     "mapping keys are not allowed in this context");
1645:         }

1647:         /* Add the BLOCK-MAPPING-START token if needed. */

1649:         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1650:                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1651:             return 0;
1652:     }

1654:     /* Reset any potential simple keys on the current flow level. */

1656:     if (!yaml_parser_remove_simple_key(parser))
1657:         return 0;

1659:     /* Simple keys are allowed after '?' in the block context. */

1661:     parser->simple_key_allowed = (!parser->flow_level);

1663:     /* Consume the token. */

1665:     start_mark = parser->mark;
1666:     SKIP(parser);
1667:     end_mark = parser->mark;

1669:     /* Create the KEY token and append it to the queue. */

1671:     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);

1673:     if (!ENQUEUE(parser, parser->tokens, token))
1674:         return 0;

1676:     return 1;
1677: }

1679: /*
1680:  * Produce the VALUE token.
1681:  */

1683: static int
1684: yaml_parser_fetch_value(yaml_parser_t *parser)
1685: {
1686:     yaml_mark_t start_mark, end_mark;
1687:     yaml_token_t token;
1688:     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;

1690:     /* Have we found a simple key? */

1692:     if (simple_key->possible)
1693:     {

1695:         /* Create the KEY token and insert it into the queue. */

1697:         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);

1699:         if (!QUEUE_INSERT(parser, parser->tokens,
1700:                     simple_key->token_number - parser->tokens_parsed, token))
1701:             return 0;

1703:         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */

1705:         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1706:                     simple_key->token_number,
1707:                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1708:             return 0;

1710:         /* Remove the simple key. */

1712:         simple_key->possible = 0;

1714:         /* A simple key cannot follow another simple key. */

1716:         parser->simple_key_allowed = 0;
1717:     }
1718:     else
1719:     {
1720:         /* The ':' indicator follows a complex key. */

1722:         /* In the block context, extra checks are required. */

1724:         if (!parser->flow_level)
1725:         {
1726:             /* Check if we are allowed to start a complex value. */

1728:             if (!parser->simple_key_allowed) {
1729:                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1730:                         "mapping values are not allowed in this context");
1731:             }

1733:             /* Add the BLOCK-MAPPING-START token if needed. */

1735:             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1736:                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1737:                 return 0;
1738:         }

1740:         /* Simple keys after ':' are allowed in the block context. */

1742:         parser->simple_key_allowed = (!parser->flow_level);
1743:     }

1745:     /* Consume the token. */

1747:     start_mark = parser->mark;
1748:     SKIP(parser);
1749:     end_mark = parser->mark;

1751:     /* Create the VALUE token and append it to the queue. */

1753:     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);

1755:     if (!ENQUEUE(parser, parser->tokens, token))
1756:         return 0;

1758:     return 1;
1759: }

1761: /*
1762:  * Produce the ALIAS or ANCHOR token.
1763:  */

1765: static int
1766: yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1767: {
1768:     yaml_token_t token;

1770:     /* An anchor or an alias could be a simple key. */

1772:     if (!yaml_parser_save_simple_key(parser))
1773:         return 0;

1775:     /* A simple key cannot follow an anchor or an alias. */

1777:     parser->simple_key_allowed = 0;

1779:     /* Create the ALIAS or ANCHOR token and append it to the queue. */

1781:     if (!yaml_parser_scan_anchor(parser, &token, type))
1782:         return 0;

1784:     if (!ENQUEUE(parser, parser->tokens, token)) {
1785:         yaml_token_delete(&token);
1786:         return 0;
1787:     }
1788:     return 1;
1789: }

1791: /*
1792:  * Produce the TAG token.
1793:  */

1795: static int
1796: yaml_parser_fetch_tag(yaml_parser_t *parser)
1797: {
1798:     yaml_token_t token;

1800:     /* A tag could be a simple key. */

1802:     if (!yaml_parser_save_simple_key(parser))
1803:         return 0;

1805:     /* A simple key cannot follow a tag. */

1807:     parser->simple_key_allowed = 0;

1809:     /* Create the TAG token and append it to the queue. */

1811:     if (!yaml_parser_scan_tag(parser, &token))
1812:         return 0;

1814:     if (!ENQUEUE(parser, parser->tokens, token)) {
1815:         yaml_token_delete(&token);
1816:         return 0;
1817:     }

1819:     return 1;
1820: }

1822: /*
1823:  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1824:  */

1826: static int
1827: yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1828: {
1829:     yaml_token_t token;

1831:     /* Remove any potential simple keys. */

1833:     if (!yaml_parser_remove_simple_key(parser))
1834:         return 0;

1836:     /* A simple key may follow a block scalar. */

1838:     parser->simple_key_allowed = 1;

1840:     /* Create the SCALAR token and append it to the queue. */

1842:     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1843:         return 0;

1845:     if (!ENQUEUE(parser, parser->tokens, token)) {
1846:         yaml_token_delete(&token);
1847:         return 0;
1848:     }

1850:     return 1;
1851: }

1853: /*
1854:  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1855:  */

1857: static int
1858: yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1859: {
1860:     yaml_token_t token;

1862:     /* A plain scalar could be a simple key. */

1864:     if (!yaml_parser_save_simple_key(parser))
1865:         return 0;

1867:     /* A simple key cannot follow a flow scalar. */

1869:     parser->simple_key_allowed = 0;

1871:     /* Create the SCALAR token and append it to the queue. */

1873:     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1874:         return 0;

1876:     if (!ENQUEUE(parser, parser->tokens, token)) {
1877:         yaml_token_delete(&token);
1878:         return 0;
1879:     }

1881:     return 1;
1882: }

1884: /*
1885:  * Produce the SCALAR(...,plain) token.
1886:  */

1888: static int
1889: yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1890: {
1891:     yaml_token_t token;

1893:     /* A plain scalar could be a simple key. */

1895:     if (!yaml_parser_save_simple_key(parser))
1896:         return 0;

1898:     /* A simple key cannot follow a flow scalar. */

1900:     parser->simple_key_allowed = 0;

1902:     /* Create the SCALAR token and append it to the queue. */

1904:     if (!yaml_parser_scan_plain_scalar(parser, &token))
1905:         return 0;

1907:     if (!ENQUEUE(parser, parser->tokens, token)) {
1908:         yaml_token_delete(&token);
1909:         return 0;
1910:     }

1912:     return 1;
1913: }

1915: /*
1916:  * Eat whitespaces and comments until the next token is found.
1917:  */

1919: static int
1920: yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1921: {
1922:     /* Until the next token is not found. */

1924:     while (1)
1925:     {
1926:         /* Allow the BOM mark to start a line. */

1928:         if (!CACHE(parser, 1)) return 0;

1930:         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1931:             SKIP(parser);

1933:         /*
1934:          * Eat whitespaces.
1935:          *
1936:          * Tabs are allowed:
1937:          *
1938:          *  - in the flow context;
1939:          *  - in the block context, but not at the beginning of the line or
1940:          *  after '-', '?', or ':' (complex value).
1941:          */

1943:         if (!CACHE(parser, 1)) return 0;

1945:         while (CHECK(parser->buffer,' ') ||
1946:                 ((parser->flow_level || !parser->simple_key_allowed) &&
1947:                  CHECK(parser->buffer, '\t'))) {
1948:             SKIP(parser);
1949:             if (!CACHE(parser, 1)) return 0;
1950:         }

1952:         /* Eat a comment until a line break. */

1954:         if (CHECK(parser->buffer, '#')) {
1955:             while (!IS_BREAKZ(parser->buffer)) {
1956:                 SKIP(parser);
1957:                 if (!CACHE(parser, 1)) return 0;
1958:             }
1959:         }

1961:         /* If it is a line break, eat it. */

1963:         if (IS_BREAK(parser->buffer))
1964:         {
1965:             if (!CACHE(parser, 2)) return 0;
1966:             SKIP_LINE(parser);

1968:             /* In the block context, a new line may start a simple key. */

1970:             if (!parser->flow_level) {
1971:                 parser->simple_key_allowed = 1;
1972:             }
1973:         }
1974:         else
1975:         {
1976:             /* We have found a token. */

1978:             break;
1979:         }
1980:     }

1982:     return 1;
1983: }

1985: /*
1986:  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1987:  *
1988:  * Scope:
1989:  *      %YAML    1.1    # a comment \n
1990:  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1991:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
1992:  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1993:  */

1995: int
1996: yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1997: {
1998:     yaml_mark_t start_mark, end_mark;
1999:     yaml_char_t *name = NULL;
2000:     int major, minor;
2001:     yaml_char_t *handle = NULL, *prefix = NULL;

2003:     /* Eat '%'. */

2005:     start_mark = parser->mark;

2007:     SKIP(parser);

2009:     /* Scan the directive name. */

2011:     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2012:         goto error;

2014:     /* Is it a YAML directive? */

2016:     if (strcmp((char *)name, "YAML") == 0)
2017:     {
2018:         /* Scan the VERSION directive value. */

2020:         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2021:                     &major, &minor))
2022:             goto error;

2024:         end_mark = parser->mark;

2026:         /* Create a VERSION-DIRECTIVE token. */

2028:         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2029:                 start_mark, end_mark);
2030:     }

2032:     /* Is it a TAG directive? */

2034:     else if (strcmp((char *)name, "TAG") == 0)
2035:     {
2036:         /* Scan the TAG directive value. */

2038:         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2039:                     &handle, &prefix))
2040:             goto error;

2042:         end_mark = parser->mark;

2044:         /* Create a TAG-DIRECTIVE token. */

2046:         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2047:                 start_mark, end_mark);
2048:     }

2050:     /* Unknown directive. */

2052:     else
2053:     {
2054:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2055:                 start_mark, "found unknown directive name");
2056:         goto error;
2057:     }

2059:     /* Eat the rest of the line including any comments. */

2061:     if (!CACHE(parser, 1)) goto error;

2063:     while (IS_BLANK(parser->buffer)) {
2064:         SKIP(parser);
2065:         if (!CACHE(parser, 1)) goto error;
2066:     }

2068:     if (CHECK(parser->buffer, '#')) {
2069:         while (!IS_BREAKZ(parser->buffer)) {
2070:             SKIP(parser);
2071:             if (!CACHE(parser, 1)) goto error;
2072:         }
2073:     }

2075:     /* Check if we are at the end of the line. */

2077:     if (!IS_BREAKZ(parser->buffer)) {
2078:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2079:                 start_mark, "did not find expected comment or line break");
2080:         goto error;
2081:     }

2083:     /* Eat a line break. */

2085:     if (IS_BREAK(parser->buffer)) {
2086:         if (!CACHE(parser, 2)) goto error;
2087:         SKIP_LINE(parser);
2088:     }

2090:     yaml_free(name);

2092:     return 1;

2094: error:
2095:     yaml_free(prefix);
2096:     yaml_free(handle);
2097:     yaml_free(name);
2098:     return 0;
2099: }

2101: /*
2102:  * Scan the directive name.
2103:  *
2104:  * Scope:
2105:  *      %YAML   1.1     # a comment \n
2106:  *       ^^^^
2107:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2108:  *       ^^^
2109:  */

2111: static int
2112: yaml_parser_scan_directive_name(yaml_parser_t *parser,
2113:         yaml_mark_t start_mark, yaml_char_t **name)
2114: {
2115:     yaml_string_t string = NULL_STRING;

2117:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2119:     /* Consume the directive name. */

2121:     if (!CACHE(parser, 1)) goto error;

2123:     while (IS_ALPHA(parser->buffer))
2124:     {
2125:         if (!READ(parser, string)) goto error;
2126:         if (!CACHE(parser, 1)) goto error;
2127:     }

2129:     /* Check if the name is empty. */

2131:     if (string.start == string.pointer) {
2132:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2133:                 start_mark, "could not find expected directive name");
2134:         goto error;
2135:     }

2137:     /* Check for an blank character after the name. */

2139:     if (!IS_BLANKZ(parser->buffer)) {
2140:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141:                 start_mark, "found unexpected non-alphabetical character");
2142:         goto error;
2143:     }

2145:     *name = string.start;

2147:     return 1;

2149: error:
2150:     STRING_DEL(parser, string);
2151:     return 0;
2152: }

2154: /*
2155:  * Scan the value of VERSION-DIRECTIVE.
2156:  *
2157:  * Scope:
2158:  *      %YAML   1.1     # a comment \n
2159:  *           ^^^^^^
2160:  */

2162: static int
2163: yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2164:         yaml_mark_t start_mark, int *major, int *minor)
2165: {
2166:     /* Eat whitespaces. */

2168:     if (!CACHE(parser, 1)) return 0;

2170:     while (IS_BLANK(parser->buffer)) {
2171:         SKIP(parser);
2172:         if (!CACHE(parser, 1)) return 0;
2173:     }

2175:     /* Consume the major version number. */

2177:     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2178:         return 0;

2180:     /* Eat '.'. */

2182:     if (!CHECK(parser->buffer, '.')) {
2183:         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2184:                 start_mark, "did not find expected digit or '.' character");
2185:     }

2187:     SKIP(parser);

2189:     /* Consume the minor version number. */

2191:     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2192:         return 0;

2194:     return 1;
2195: }

2197: #define MAX_NUMBER_LENGTH   9

2199: /*
2200:  * Scan the version number of VERSION-DIRECTIVE.
2201:  *
2202:  * Scope:
2203:  *      %YAML   1.1     # a comment \n
2204:  *              ^
2205:  *      %YAML   1.1     # a comment \n
2206:  *                ^
2207:  */

2209: static int
2210: yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2211:         yaml_mark_t start_mark, int *number)
2212: {
2213:     int value = 0;
2214:     size_t length = 0;

2216:     /* Repeat while the next character is digit. */

2218:     if (!CACHE(parser, 1)) return 0;

2220:     while (IS_DIGIT(parser->buffer))
2221:     {
2222:         /* Check if the number is too long. */

2224:         if (++length > MAX_NUMBER_LENGTH) {
2225:             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2226:                     start_mark, "found extremely long version number");
2227:         }

2229:         value = value*10 + AS_DIGIT(parser->buffer);

2231:         SKIP(parser);

2233:         if (!CACHE(parser, 1)) return 0;
2234:     }

2236:     /* Check if the number was present. */

2238:     if (!length) {
2239:         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2240:                 start_mark, "did not find expected version number");
2241:     }

2243:     *number = value;

2245:     return 1;
2246: }

2248: /*
2249:  * Scan the value of a TAG-DIRECTIVE token.
2250:  *
2251:  * Scope:
2252:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2253:  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2254:  */

2256: static int
2257: yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2258:         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2259: {
2260:     yaml_char_t *handle_value = NULL;
2261:     yaml_char_t *prefix_value = NULL;

2263:     /* Eat whitespaces. */

2265:     if (!CACHE(parser, 1)) goto error;

2267:     while (IS_BLANK(parser->buffer)) {
2268:         SKIP(parser);
2269:         if (!CACHE(parser, 1)) goto error;
2270:     }

2272:     /* Scan a handle. */

2274:     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2275:         goto error;

2277:     /* Expect a whitespace. */

2279:     if (!CACHE(parser, 1)) goto error;

2281:     if (!IS_BLANK(parser->buffer)) {
2282:         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2283:                 start_mark, "did not find expected whitespace");
2284:         goto error;
2285:     }

2287:     /* Eat whitespaces. */

2289:     while (IS_BLANK(parser->buffer)) {
2290:         SKIP(parser);
2291:         if (!CACHE(parser, 1)) goto error;
2292:     }

2294:     /* Scan a prefix. */

2296:     if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2297:         goto error;

2299:     /* Expect a whitespace or line break. */

2301:     if (!CACHE(parser, 1)) goto error;

2303:     if (!IS_BLANKZ(parser->buffer)) {
2304:         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2305:                 start_mark, "did not find expected whitespace or line break");
2306:         goto error;
2307:     }

2309:     *handle = handle_value;
2310:     *prefix = prefix_value;

2312:     return 1;

2314: error:
2315:     yaml_free(handle_value);
2316:     yaml_free(prefix_value);
2317:     return 0;
2318: }

2320: static int
2321: yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2322:         yaml_token_type_t type)
2323: {
2324:     int length = 0;
2325:     yaml_mark_t start_mark, end_mark;
2326:     yaml_string_t string = NULL_STRING;

2328:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2330:     /* Eat the indicator character. */

2332:     start_mark = parser->mark;

2334:     SKIP(parser);

2336:     /* Consume the value. */

2338:     if (!CACHE(parser, 1)) goto error;

2340:     while (IS_ALPHA(parser->buffer)) {
2341:         if (!READ(parser, string)) goto error;
2342:         if (!CACHE(parser, 1)) goto error;
2343:         length ++;
2344:     }

2346:     end_mark = parser->mark;

2348:     /*
2349:      * Check if length of the anchor is greater than 0 and it is followed by
2350:      * a whitespace character or one of the indicators:
2351:      *
2352:      *      '?', ':', ',', ']', '}', '%', '@', '`'.
2353:      */

2355:     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2356:                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2357:                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2358:                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2359:                 || CHECK(parser->buffer, '`'))) {
2360:         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2361:                 "while scanning an anchor" : "while scanning an alias", start_mark,
2362:                 "did not find expected alphabetic or numeric character");
2363:         goto error;
2364:     }

2366:     /* Create a token. */

2368:     if (type == YAML_ANCHOR_TOKEN) {
2369:         ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2370:     }
2371:     else {
2372:         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2373:     }

2375:     return 1;

2377: error:
2378:     STRING_DEL(parser, string);
2379:     return 0;
2380: }

2382: /*
2383:  * Scan a TAG token.
2384:  */

2386: static int
2387: yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2388: {
2389:     yaml_char_t *handle = NULL;
2390:     yaml_char_t *suffix = NULL;
2391:     yaml_mark_t start_mark, end_mark;

2393:     start_mark = parser->mark;

2395:     /* Check if the tag is in the canonical form. */

2397:     if (!CACHE(parser, 2)) goto error;

2399:     if (CHECK_AT(parser->buffer, '<', 1))
2400:     {
2401:         /* Set the handle to '' */

2403:         handle = YAML_MALLOC(1);
2404:         if (!handle) goto error;
2405:         handle[0] = '\0';

2407:         /* Eat '!<' */

2409:         SKIP(parser);
2410:         SKIP(parser);

2412:         /* Consume the tag value. */

2414:         if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2415:             goto error;

2417:         /* Check for '>' and eat it. */

2419:         if (!CHECK(parser->buffer, '>')) {
2420:             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2421:                     start_mark, "did not find the expected '>'");
2422:             goto error;
2423:         }

2425:         SKIP(parser);
2426:     }
2427:     else
2428:     {
2429:         /* The tag has either the '!suffix' or the '!handle!suffix' form. */

2431:         /* First, try to scan a handle. */

2433:         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2434:             goto error;

2436:         /* Check if it is, indeed, handle. */

2438:         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2439:         {
2440:             /* Scan the suffix now. */

2442:             if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2443:                 goto error;
2444:         }
2445:         else
2446:         {
2447:             /* It wasn't a handle after all.  Scan the rest of the tag. */

2449:             if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2450:                 goto error;

2452:             /* Set the handle to '!'. */

2454:             yaml_free(handle);
2455:             handle = YAML_MALLOC(2);
2456:             if (!handle) goto error;
2457:             handle[0] = '!';
2458:             handle[1] = '\0';

2460:             /*
2461:              * A special case: the '!' tag.  Set the handle to '' and the
2462:              * suffix to '!'.
2463:              */

2465:             if (suffix[0] == '\0') {
2466:                 yaml_char_t *tmp = handle;
2467:                 handle = suffix;
2468:                 suffix = tmp;
2469:             }
2470:         }
2471:     }

2473:     /* Check the character which ends the tag. */

2475:     if (!CACHE(parser, 1)) goto error;

2477:     if (!IS_BLANKZ(parser->buffer)) {
2478:         if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2479:             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2480:                     start_mark, "did not find expected whitespace or line break");
2481:             goto error;
2482:         }
2483:     }

2485:     end_mark = parser->mark;

2487:     /* Create a token. */

2489:     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);

2491:     return 1;

2493: error:
2494:     yaml_free(handle);
2495:     yaml_free(suffix);
2496:     return 0;
2497: }

2499: /*
2500:  * Scan a tag handle.
2501:  */

2503: static int
2504: yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2505:         yaml_mark_t start_mark, yaml_char_t **handle)
2506: {
2507:     yaml_string_t string = NULL_STRING;

2509:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2511:     /* Check the initial '!' character. */

2513:     if (!CACHE(parser, 1)) goto error;

2515:     if (!CHECK(parser->buffer, '!')) {
2516:         yaml_parser_set_scanner_error(parser, directive ?
2517:                 "while scanning a tag directive" : "while scanning a tag",
2518:                 start_mark, "did not find expected '!'");
2519:         goto error;
2520:     }

2522:     /* Copy the '!' character. */

2524:     if (!READ(parser, string)) goto error;

2526:     /* Copy all subsequent alphabetical and numerical characters. */

2528:     if (!CACHE(parser, 1)) goto error;

2530:     while (IS_ALPHA(parser->buffer))
2531:     {
2532:         if (!READ(parser, string)) goto error;
2533:         if (!CACHE(parser, 1)) goto error;
2534:     }

2536:     /* Check if the trailing character is '!' and copy it. */

2538:     if (CHECK(parser->buffer, '!'))
2539:     {
2540:         if (!READ(parser, string)) goto error;
2541:     }
2542:     else
2543:     {
2544:         /*
2545:          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2546:          * directive, it's an error.  If it's a tag token, it must be a part of
2547:          * URI.
2548:          */

2550:         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2551:             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2552:                     start_mark, "did not find expected '!'");
2553:             goto error;
2554:         }
2555:     }

2557:     *handle = string.start;

2559:     return 1;

2561: error:
2562:     STRING_DEL(parser, string);
2563:     return 0;
2564: }

2566: /*
2567:  * Scan a tag.
2568:  */

2570: static int
2571: yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2572:         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2573: {
2574:     size_t length = head ? strlen((char *)head) : 0;
2575:     yaml_string_t string = NULL_STRING;

2577:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2579:     /* Resize the string to include the head. */

2581:     while ((size_t)(string.end - string.start) <= length) {
2582:         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2583:             parser->error = YAML_MEMORY_ERROR;
2584:             goto error;
2585:         }
2586:     }

2588:     /*
2589:      * Copy the head if needed.
2590:      *
2591:      * Note that we don't copy the leading '!' character.
2592:      */

2594:     if (length > 1) {
2595:         memcpy(string.start, head+1, length-1);
2596:         string.pointer += length-1;
2597:     }

2599:     /* Scan the tag. */

2601:     if (!CACHE(parser, 1)) goto error;

2603:     /*
2604:      * The set of characters that may appear in URI is as follows:
2605:      *
2606:      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2607:      *      '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2608:      *
2609:      * If we are inside a verbatim tag <...> (parameter uri_char is true)
2610:      * then also the following flow indicators are allowed:
2611:      *      ',', '[', ']'
2612:      */

2614:     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2615:             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2616:             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2617:             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2618:             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2619:             || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2620:             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2621:             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2622:             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2623:             || (uri_char && (
2624:                 CHECK(parser->buffer, ',')
2625:                 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2626:                 )
2627:             ))
2628:     {
2629:         /* Check if it is a URI-escape sequence. */

2631:         if (CHECK(parser->buffer, '%')) {
2632:             if (!STRING_EXTEND(parser, string))
2633:                 goto error;

2635:             if (!yaml_parser_scan_uri_escapes(parser,
2636:                         directive, start_mark, &string)) goto error;
2637:         }
2638:         else {
2639:             if (!READ(parser, string)) goto error;
2640:         }

2642:         length ++;
2643:         if (!CACHE(parser, 1)) goto error;
2644:     }

2646:     /* Check if the tag is non-empty. */

2648:     if (!length) {
2649:         if (!STRING_EXTEND(parser, string))
2650:             goto error;

2652:         yaml_parser_set_scanner_error(parser, directive ?
2653:                 "while parsing a %TAG directive" : "while parsing a tag",
2654:                 start_mark, "did not find expected tag URI");
2655:         goto error;
2656:     }

2658:     *uri = string.start;

2660:     return 1;

2662: error:
2663:     STRING_DEL(parser, string);
2664:     return 0;
2665: }

2667: /*
2668:  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669:  */

2671: static int
2672: yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2673:         yaml_mark_t start_mark, yaml_string_t *string)
2674: {
2675:     int width = 0;

2677:     /* Decode the required number of characters. */

2679:     do {

2681:         unsigned char octet = 0;

2683:         /* Check for a URI-escaped octet. */

2685:         if (!CACHE(parser, 3)) return 0;

2687:         if (!(CHECK(parser->buffer, '%')
2688:                     && IS_HEX_AT(parser->buffer, 1)
2689:                     && IS_HEX_AT(parser->buffer, 2))) {
2690:             return yaml_parser_set_scanner_error(parser, directive ?
2691:                     "while parsing a %TAG directive" : "while parsing a tag",
2692:                     start_mark, "did not find URI escaped octet");
2693:         }

2695:         /* Get the octet. */

2697:         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);

2699:         /* If it is the leading octet, determine the length of the UTF-8 sequence. */

2701:         if (!width)
2702:         {
2703:             width = (octet & 0x80) == 0x00 ? 1 :
2704:                     (octet & 0xE0) == 0xC0 ? 2 :
2705:                     (octet & 0xF0) == 0xE0 ? 3 :
2706:                     (octet & 0xF8) == 0xF0 ? 4 : 0;
2707:             if (!width) {
2708:                 return yaml_parser_set_scanner_error(parser, directive ?
2709:                         "while parsing a %TAG directive" : "while parsing a tag",
2710:                         start_mark, "found an incorrect leading UTF-8 octet");
2711:             }
2712:         }
2713:         else
2714:         {
2715:             /* Check if the trailing octet is correct. */

2717:             if ((octet & 0xC0) != 0x80) {
2718:                 return yaml_parser_set_scanner_error(parser, directive ?
2719:                         "while parsing a %TAG directive" : "while parsing a tag",
2720:                         start_mark, "found an incorrect trailing UTF-8 octet");
2721:             }
2722:         }

2724:         /* Copy the octet and move the pointers. */

2726:         *(string->pointer++) = octet;
2727:         SKIP(parser);
2728:         SKIP(parser);
2729:         SKIP(parser);

2731:     } while (--width);

2733:     return 1;
2734: }

2736: /*
2737:  * Scan a block scalar.
2738:  */

2740: static int
2741: yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2742:         int literal)
2743: {
2744:     yaml_mark_t start_mark;
2745:     yaml_mark_t end_mark;
2746:     yaml_string_t string = NULL_STRING;
2747:     yaml_string_t leading_break = NULL_STRING;
2748:     yaml_string_t trailing_breaks = NULL_STRING;
2749:     int chomping = 0;
2750:     int increment = 0;
2751:     int indent = 0;
2752:     int leading_blank = 0;
2753:     int trailing_blank = 0;

2755:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;

2759:     /* Eat the indicator '|' or '>'. */

2761:     start_mark = parser->mark;

2763:     SKIP(parser);

2765:     /* Scan the additional block scalar indicators. */

2767:     if (!CACHE(parser, 1)) goto error;

2769:     /* Check for a chomping indicator. */

2771:     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772:     {
2773:         /* Set the chomping method and eat the indicator. */

2775:         chomping = CHECK(parser->buffer, '+') ? +1 : -1;

2777:         SKIP(parser);

2779:         /* Check for an indentation indicator. */

2781:         if (!CACHE(parser, 1)) goto error;

2783:         if (IS_DIGIT(parser->buffer))
2784:         {
2785:             /* Check that the indentation is greater than 0. */

2787:             if (CHECK(parser->buffer, '0')) {
2788:                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789:                         start_mark, "found an indentation indicator equal to 0");
2790:                 goto error;
2791:             }

2793:             /* Get the indentation level and eat the indicator. */

2795:             increment = AS_DIGIT(parser->buffer);

2797:             SKIP(parser);
2798:         }
2799:     }

2801:     /* Do the same as above, but in the opposite order. */

2803:     else if (IS_DIGIT(parser->buffer))
2804:     {
2805:         if (CHECK(parser->buffer, '0')) {
2806:             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807:                     start_mark, "found an indentation indicator equal to 0");
2808:             goto error;
2809:         }

2811:         increment = AS_DIGIT(parser->buffer);

2813:         SKIP(parser);

2815:         if (!CACHE(parser, 1)) goto error;

2817:         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818:             chomping = CHECK(parser->buffer, '+') ? +1 : -1;

2820:             SKIP(parser);
2821:         }
2822:     }

2824:     /* Eat whitespaces and comments to the end of the line. */

2826:     if (!CACHE(parser, 1)) goto error;

2828:     while (IS_BLANK(parser->buffer)) {
2829:         SKIP(parser);
2830:         if (!CACHE(parser, 1)) goto error;
2831:     }

2833:     if (CHECK(parser->buffer, '#')) {
2834:         while (!IS_BREAKZ(parser->buffer)) {
2835:             SKIP(parser);
2836:             if (!CACHE(parser, 1)) goto error;
2837:         }
2838:     }

2840:     /* Check if we are at the end of the line. */

2842:     if (!IS_BREAKZ(parser->buffer)) {
2843:         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844:                 start_mark, "did not find expected comment or line break");
2845:         goto error;
2846:     }

2848:     /* Eat a line break. */

2850:     if (IS_BREAK(parser->buffer)) {
2851:         if (!CACHE(parser, 2)) goto error;
2852:         SKIP_LINE(parser);
2853:     }

2855:     end_mark = parser->mark;

2857:     /* Set the indentation level if it was specified. */

2859:     if (increment) {
2860:         indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861:     }

2863:     /* Scan the leading line breaks and determine the indentation level if needed. */

2865:     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866:                 start_mark, &end_mark)) goto error;

2868:     /* Scan the block scalar content. */

2870:     if (!CACHE(parser, 1)) goto error;

2872:     while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2873:     {
2874:         /*
2875:          * We are at the beginning of a non-empty line.
2876:          */

2878:         /* Is it a trailing whitespace? */

2880:         trailing_blank = IS_BLANK(parser->buffer);

2882:         /* Check if we need to fold the leading line break. */

2884:         if (!literal && (*leading_break.start == '\n')
2885:                 && !leading_blank && !trailing_blank)
2886:         {
2887:             /* Do we need to join the lines by space? */

2889:             if (*trailing_breaks.start == '\0') {
2890:                 if (!STRING_EXTEND(parser, string)) goto error;
2891:                 *(string.pointer ++) = ' ';
2892:             }

2894:             CLEAR(parser, leading_break);
2895:         }
2896:         else {
2897:             if (!JOIN(parser, string, leading_break)) goto error;
2898:             CLEAR(parser, leading_break);
2899:         }

2901:         /* Append the remaining line breaks. */

2903:         if (!JOIN(parser, string, trailing_breaks)) goto error;
2904:         CLEAR(parser, trailing_breaks);

2906:         /* Is it a leading whitespace? */

2908:         leading_blank = IS_BLANK(parser->buffer);

2910:         /* Consume the current line. */

2912:         while (!IS_BREAKZ(parser->buffer)) {
2913:             if (!READ(parser, string)) goto error;
2914:             if (!CACHE(parser, 1)) goto error;
2915:         }

2917:         /* Consume the line break. */

2919:         if (!CACHE(parser, 2)) goto error;

2921:         if (!READ_LINE(parser, leading_break)) goto error;

2923:         /* Eat the following indentation spaces and line breaks. */

2925:         if (!yaml_parser_scan_block_scalar_breaks(parser,
2926:                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927:     }

2929:     /* Chomp the tail. */

2931:     if (chomping != -1) {
2932:         if (!JOIN(parser, string, leading_break)) goto error;
2933:     }
2934:     if (chomping == 1) {
2935:         if (!JOIN(parser, string, trailing_breaks)) goto error;
2936:     }

2938:     /* Create a token. */

2940:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2941:             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2942:             start_mark, end_mark);

2944:     STRING_DEL(parser, leading_break);
2945:     STRING_DEL(parser, trailing_breaks);

2947:     return 1;

2949: error:
2950:     STRING_DEL(parser, string);
2951:     STRING_DEL(parser, leading_break);
2952:     STRING_DEL(parser, trailing_breaks);

2954:     return 0;
2955: }

2957: /*
2958:  * Scan indentation spaces and line breaks for a block scalar.  Determine the
2959:  * indentation level if needed.
2960:  */

2962: static int
2963: yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2964:         int *indent, yaml_string_t *breaks,
2965:         yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966: {
2967:     int max_indent = 0;

2969:     *end_mark = parser->mark;

2971:     /* Eat the indentation spaces and line breaks. */

2973:     while (1)
2974:     {
2975:         /* Eat the indentation spaces. */

2977:         if (!CACHE(parser, 1)) return 0;

2979:         while ((!*indent || (int)parser->mark.column < *indent)
2980:                 && IS_SPACE(parser->buffer)) {
2981:             SKIP(parser);
2982:             if (!CACHE(parser, 1)) return 0;
2983:         }

2985:         if ((int)parser->mark.column > max_indent)
2986:             max_indent = (int)parser->mark.column;

2988:         /* Check for a tab character messing the indentation. */

2990:         if ((!*indent || (int)parser->mark.column < *indent)
2991:                 && IS_TAB(parser->buffer)) {
2992:             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993:                     start_mark, "found a tab character where an indentation space is expected");
2994:         }

2996:         /* Have we found a non-empty line? */

2998:         if (!IS_BREAK(parser->buffer)) break;

3000:         /* Consume the line break. */

3002:         if (!CACHE(parser, 2)) return 0;
3003:         if (!READ_LINE(parser, *breaks)) return 0;
3004:         *end_mark = parser->mark;
3005:     }

3007:     /* Determine the indentation level if needed. */

3009:     if (!*indent) {
3010:         *indent = max_indent;
3011:         if (*indent < parser->indent + 1)
3012:             *indent = parser->indent + 1;
3013:         if (*indent < 1)
3014:             *indent = 1;
3015:     }

3017:    return 1;
3018: }

3020: /*
3021:  * Scan a quoted scalar.
3022:  */

3024: static int
3025: yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3026:         int single)
3027: {
3028:     yaml_mark_t start_mark;
3029:     yaml_mark_t end_mark;
3030:     yaml_string_t string = NULL_STRING;
3031:     yaml_string_t leading_break = NULL_STRING;
3032:     yaml_string_t trailing_breaks = NULL_STRING;
3033:     yaml_string_t whitespaces = NULL_STRING;
3034:     int leading_blanks;

3036:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039:     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;

3041:     /* Eat the left quote. */

3043:     start_mark = parser->mark;

3045:     SKIP(parser);

3047:     /* Consume the content of the quoted scalar. */

3049:     while (1)
3050:     {
3051:         /* Check that there are no document indicators at the beginning of the line. */

3053:         if (!CACHE(parser, 4)) goto error;

3055:         if (parser->mark.column == 0 &&
3056:             ((CHECK_AT(parser->buffer, '-', 0) &&
3057:               CHECK_AT(parser->buffer, '-', 1) &&
3058:               CHECK_AT(parser->buffer, '-', 2)) ||
3059:              (CHECK_AT(parser->buffer, '.', 0) &&
3060:               CHECK_AT(parser->buffer, '.', 1) &&
3061:               CHECK_AT(parser->buffer, '.', 2))) &&
3062:             IS_BLANKZ_AT(parser->buffer, 3))
3063:         {
3064:             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065:                     start_mark, "found unexpected document indicator");
3066:             goto error;
3067:         }

3069:         /* Check for EOF. */

3071:         if (IS_Z(parser->buffer)) {
3072:             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073:                     start_mark, "found unexpected end of stream");
3074:             goto error;
3075:         }

3077:         /* Consume non-blank characters. */

3079:         if (!CACHE(parser, 2)) goto error;

3081:         leading_blanks = 0;

3083:         while (!IS_BLANKZ(parser->buffer))
3084:         {
3085:             /* Check for an escaped single quote. */

3087:             if (single && CHECK_AT(parser->buffer, '\'', 0)
3088:                     && CHECK_AT(parser->buffer, '\'', 1))
3089:             {
3090:                 if (!STRING_EXTEND(parser, string)) goto error;
3091:                 *(string.pointer++) = '\'';
3092:                 SKIP(parser);
3093:                 SKIP(parser);
3094:             }

3096:             /* Check for the right quote. */

3098:             else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099:             {
3100:                 break;
3101:             }

3103:             /* Check for an escaped line break. */

3105:             else if (!single && CHECK(parser->buffer, '\\')
3106:                     && IS_BREAK_AT(parser->buffer, 1))
3107:             {
3108:                 if (!CACHE(parser, 3)) goto error;
3109:                 SKIP(parser);
3110:                 SKIP_LINE(parser);
3111:                 leading_blanks = 1;
3112:                 break;
3113:             }

3115:             /* Check for an escape sequence. */

3117:             else if (!single && CHECK(parser->buffer, '\\'))
3118:             {
3119:                 size_t code_length = 0;

3121:                 if (!STRING_EXTEND(parser, string)) goto error;

3123:                 /* Check the escape character. */

3125:                 switch (parser->buffer.pointer[1])
3126:                 {
3127:                     case '0':
3128:                         *(string.pointer++) = '\0';
3129:                         break;

3131:                     case 'a':
3132:                         *(string.pointer++) = '\x07';
3133:                         break;

3135:                     case 'b':
3136:                         *(string.pointer++) = '\x08';
3137:                         break;

3139:                     case 't':
3140:                     case '\t':
3141:                         *(string.pointer++) = '\x09';
3142:                         break;

3144:                     case 'n':
3145:                         *(string.pointer++) = '\x0A';
3146:                         break;

3148:                     case 'v':
3149:                         *(string.pointer++) = '\x0B';
3150:                         break;

3152:                     case 'f':
3153:                         *(string.pointer++) = '\x0C';
3154:                         break;

3156:                     case 'r':
3157:                         *(string.pointer++) = '\x0D';
3158:                         break;

3160:                     case 'e':
3161:                         *(string.pointer++) = '\x1B';
3162:                         break;

3164:                     case ' ':
3165:                         *(string.pointer++) = '\x20';
3166:                         break;

3168:                     case '"':
3169:                         *(string.pointer++) = '"';
3170:                         break;

3172:                     case '/':
3173:                         *(string.pointer++) = '/';
3174:                         break;

3176:                     case '\\':
3177:                         *(string.pointer++) = '\\';
3178:                         break;

3180:                     case 'N':   /* NEL (#x85) */
3181:                         *(string.pointer++) = (unsigned char)'\xC2';
3182:                         *(string.pointer++) = (unsigned char)'\x85';
3183:                         break;

3185:                     case '_':   /* #xA0 */
3186:                         *(string.pointer++) = (unsigned char)'\xC2';
3187:                         *(string.pointer++) = (unsigned char) '\xA0';
3188:                         break;

3190:                     case 'L':   /* LS (#x2028) */
3191:                         *(string.pointer++) = (unsigned char)'\xE2';
3192:                         *(string.pointer++) = (unsigned char)'\x80';
3193:                         *(string.pointer++) = (unsigned char)'\xA8';
3194:                         break;

3196:                     case 'P':   /* PS (#x2029) */
3197:                         *(string.pointer++) = (unsigned char)'\xE2';
3198:                         *(string.pointer++) = (unsigned char)'\x80';
3199:                         *(string.pointer++) = (unsigned char)'\xA9';
3200:                         break;

3202:                     case 'x':
3203:                         code_length = 2;
3204:                         break;

3206:                     case 'u':
3207:                         code_length = 4;
3208:                         break;

3210:                     case 'U':
3211:                         code_length = 8;
3212:                         break;

3214:                     default:
3215:                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216:                                 start_mark, "found unknown escape character");
3217:                         goto error;
3218:                 }

3220:                 SKIP(parser);
3221:                 SKIP(parser);

3223:                 /* Consume an arbitrary escape code. */

3225:                 if (code_length)
3226:                 {
3227:                     unsigned int value = 0;
3228:                     size_t k;

3230:                     /* Scan the character value. */

3232:                     if (!CACHE(parser, code_length)) goto error;

3234:                     for (k = 0; k < code_length; k ++) {
3235:                         if (!IS_HEX_AT(parser->buffer, k)) {
3236:                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237:                                     start_mark, "did not find expected hexadecimal number");
3238:                             goto error;
3239:                         }
3240:                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241:                     }

3243:                     /* Check the value and write the character. */

3245:                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246:                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247:                                 start_mark, "found invalid Unicode character escape code");
3248:                         goto error;
3249:                     }

3251:                     if (value <= 0x7F) {
3252:                         *(string.pointer++) = value;
3253:                     }
3254:                     else if (value <= 0x7FF) {
3255:                         *(string.pointer++) = 0xC0 + (value >> 6);
3256:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3257:                     }
3258:                     else if (value <= 0xFFFF) {
3259:                         *(string.pointer++) = 0xE0 + (value >> 12);
3260:                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3262:                     }
3263:                     else {
3264:                         *(string.pointer++) = 0xF0 + (value >> 18);
3265:                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266:                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3268:                     }

3270:                     /* Advance the pointer. */

3272:                     for (k = 0; k < code_length; k ++) {
3273:                         SKIP(parser);
3274:                     }
3275:                 }
3276:             }

3278:             else
3279:             {
3280:                 /* It is a non-escaped non-blank character. */

3282:                 if (!READ(parser, string)) goto error;
3283:             }

3285:             if (!CACHE(parser, 2)) goto error;
3286:         }

3288:         /* Check if we are at the end of the scalar. */

3290:         /* Fix for crash uninitialized value crash
3291:          * Credit for the bug and input is to OSS Fuzz
3292:          * Credit for the fix to Alex Gaynor
3293:          */
3294:         if (!CACHE(parser, 1)) goto error;
3295:         if (CHECK(parser->buffer, single ? '\'' : '"'))
3296:             break;

3298:         /* Consume blank characters. */

3300:         if (!CACHE(parser, 1)) goto error;

3302:         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3303:         {
3304:             if (IS_BLANK(parser->buffer))
3305:             {
3306:                 /* Consume a space or a tab character. */

3308:                 if (!leading_blanks) {
3309:                     if (!READ(parser, whitespaces)) goto error;
3310:                 }
3311:                 else {
3312:                     SKIP(parser);
3313:                 }
3314:             }
3315:             else
3316:             {
3317:                 if (!CACHE(parser, 2)) goto error;

3319:                 /* Check if it is a first line break. */

3321:                 if (!leading_blanks)
3322:                 {
3323:                     CLEAR(parser, whitespaces);
3324:                     if (!READ_LINE(parser, leading_break)) goto error;
3325:                     leading_blanks = 1;
3326:                 }
3327:                 else
3328:                 {
3329:                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3330:                 }
3331:             }
3332:             if (!CACHE(parser, 1)) goto error;
3333:         }

3335:         /* Join the whitespaces or fold line breaks. */

3337:         if (leading_blanks)
3338:         {
3339:             /* Do we need to fold line breaks? */

3341:             if (leading_break.start[0] == '\n') {
3342:                 if (trailing_breaks.start[0] == '\0') {
3343:                     if (!STRING_EXTEND(parser, string)) goto error;
3344:                     *(string.pointer++) = ' ';
3345:                 }
3346:                 else {
3347:                     if (!JOIN(parser, string, trailing_breaks)) goto error;
3348:                     CLEAR(parser, trailing_breaks);
3349:                 }
3350:                 CLEAR(parser, leading_break);
3351:             }
3352:             else {
3353:                 if (!JOIN(parser, string, leading_break)) goto error;
3354:                 if (!JOIN(parser, string, trailing_breaks)) goto error;
3355:                 CLEAR(parser, leading_break);
3356:                 CLEAR(parser, trailing_breaks);
3357:             }
3358:         }
3359:         else
3360:         {
3361:             if (!JOIN(parser, string, whitespaces)) goto error;
3362:             CLEAR(parser, whitespaces);
3363:         }
3364:     }

3366:     /* Eat the right quote. */

3368:     SKIP(parser);

3370:     end_mark = parser->mark;

3372:     /* Create a token. */

3374:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3375:             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3376:             start_mark, end_mark);

3378:     STRING_DEL(parser, leading_break);
3379:     STRING_DEL(parser, trailing_breaks);
3380:     STRING_DEL(parser, whitespaces);

3382:     return 1;

3384: error:
3385:     STRING_DEL(parser, string);
3386:     STRING_DEL(parser, leading_break);
3387:     STRING_DEL(parser, trailing_breaks);
3388:     STRING_DEL(parser, whitespaces);

3390:     return 0;
3391: }

3393: /*
3394:  * Scan a plain scalar.
3395:  */

3397: static int
3398: yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3399: {
3400:     yaml_mark_t start_mark;
3401:     yaml_mark_t end_mark;
3402:     yaml_string_t string = NULL_STRING;
3403:     yaml_string_t leading_break = NULL_STRING;
3404:     yaml_string_t trailing_breaks = NULL_STRING;
3405:     yaml_string_t whitespaces = NULL_STRING;
3406:     int leading_blanks = 0;
3407:     int indent = parser->indent+1;

3409:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3410:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3411:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3412:     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;

3414:     start_mark = end_mark = parser->mark;

3416:     /* Consume the content of the plain scalar. */

3418:     while (1)
3419:     {
3420:         /* Check for a document indicator. */

3422:         if (!CACHE(parser, 4)) goto error;

3424:         if (parser->mark.column == 0 &&
3425:             ((CHECK_AT(parser->buffer, '-', 0) &&
3426:               CHECK_AT(parser->buffer, '-', 1) &&
3427:               CHECK_AT(parser->buffer, '-', 2)) ||
3428:              (CHECK_AT(parser->buffer, '.', 0) &&
3429:               CHECK_AT(parser->buffer, '.', 1) &&
3430:               CHECK_AT(parser->buffer, '.', 2))) &&
3431:             IS_BLANKZ_AT(parser->buffer, 3)) break;

3433:         /* Check for a comment. */

3435:         if (CHECK(parser->buffer, '#'))
3436:             break;

3438:         /* Consume non-blank characters. */

3440:         while (!IS_BLANKZ(parser->buffer))
3441:         {
3442:             /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3443:              * This is not completely according to the spec
3444:              * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3445:              */

3447:             if (parser->flow_level
3448:                     && CHECK(parser->buffer, ':')
3449:                     && (
3450:                         CHECK_AT(parser->buffer, ',', 1)
3451:                         || CHECK_AT(parser->buffer, '?', 1)
3452:                         || CHECK_AT(parser->buffer, '[', 1)
3453:                         || CHECK_AT(parser->buffer, ']', 1)
3454:                         || CHECK_AT(parser->buffer, '{', 1)
3455:                         || CHECK_AT(parser->buffer, '}', 1)
3456:                     )
3457:                     ) {
3458:                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3459:                         start_mark, "found unexpected ':'");
3460:                 goto error;
3461:             }

3463:             /* Check for indicators that may end a plain scalar. */

3465:             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3466:                     || (parser->flow_level &&
3467:                         (CHECK(parser->buffer, ',')
3468:                          || CHECK(parser->buffer, '[')
3469:                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3470:                          || CHECK(parser->buffer, '}'))))
3471:                 break;

3473:             /* Check if we need to join whitespaces and breaks. */

3475:             if (leading_blanks || whitespaces.start != whitespaces.pointer)
3476:             {
3477:                 if (leading_blanks)
3478:                 {
3479:                     /* Do we need to fold line breaks? */

3481:                     if (leading_break.start[0] == '\n') {
3482:                         if (trailing_breaks.start[0] == '\0') {
3483:                             if (!STRING_EXTEND(parser, string)) goto error;
3484:                             *(string.pointer++) = ' ';
3485:                         }
3486:                         else {
3487:                             if (!JOIN(parser, string, trailing_breaks)) goto error;
3488:                             CLEAR(parser, trailing_breaks);
3489:                         }
3490:                         CLEAR(parser, leading_break);
3491:                     }
3492:                     else {
3493:                         if (!JOIN(parser, string, leading_break)) goto error;
3494:                         if (!JOIN(parser, string, trailing_breaks)) goto error;
3495:                         CLEAR(parser, leading_break);
3496:                         CLEAR(parser, trailing_breaks);
3497:                     }

3499:                     leading_blanks = 0;
3500:                 }
3501:                 else
3502:                 {
3503:                     if (!JOIN(parser, string, whitespaces)) goto error;
3504:                     CLEAR(parser, whitespaces);
3505:                 }
3506:             }

3508:             /* Copy the character. */

3510:             if (!READ(parser, string)) goto error;

3512:             end_mark = parser->mark;

3514:             if (!CACHE(parser, 2)) goto error;
3515:         }

3517:         /* Is it the end? */

3519:         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3520:             break;

3522:         /* Consume blank characters. */

3524:         if (!CACHE(parser, 1)) goto error;

3526:         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3527:         {
3528:             if (IS_BLANK(parser->buffer))
3529:             {
3530:                 /* Check for tab characters that abuse indentation. */

3532:                 if (leading_blanks && (int)parser->mark.column < indent
3533:                         && IS_TAB(parser->buffer)) {
3534:                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3535:                             start_mark, "found a tab character that violates indentation");
3536:                     goto error;
3537:                 }

3539:                 /* Consume a space or a tab character. */

3541:                 if (!leading_blanks) {
3542:                     if (!READ(parser, whitespaces)) goto error;
3543:                 }
3544:                 else {
3545:                     SKIP(parser);
3546:                 }
3547:             }
3548:             else
3549:             {
3550:                 if (!CACHE(parser, 2)) goto error;

3552:                 /* Check if it is a first line break. */

3554:                 if (!leading_blanks)
3555:                 {
3556:                     CLEAR(parser, whitespaces);
3557:                     if (!READ_LINE(parser, leading_break)) goto error;
3558:                     leading_blanks = 1;
3559:                 }
3560:                 else
3561:                 {
3562:                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3563:                 }
3564:             }
3565:             if (!CACHE(parser, 1)) goto error;
3566:         }

3568:         /* Check indentation level. */

3570:         if (!parser->flow_level && (int)parser->mark.column < indent)
3571:             break;
3572:     }

3574:     /* Create a token. */

3576:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3577:             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);

3579:     /* Note that we change the 'simple_key_allowed' flag. */

3581:     if (leading_blanks) {
3582:         parser->simple_key_allowed = 1;
3583:     }

3585:     STRING_DEL(parser, leading_break);
3586:     STRING_DEL(parser, trailing_breaks);
3587:     STRING_DEL(parser, whitespaces);

3589:     return 1;

3591: error:
3592:     STRING_DEL(parser, string);
3593:     STRING_DEL(parser, leading_break);
3594:     STRING_DEL(parser, trailing_breaks);
3595:     STRING_DEL(parser, whitespaces);

3597:     return 0;
3598: }