1 /+ 2 Helpers to skip over a given Ion Text token. 3 4 Authors: Harrison Ford 5 +/ 6 module mir.deser.text.skippers; 7 8 import mir.deser.text.tokenizer; 9 import mir.deser.text.tokens; 10 import mir.ion.type_code; 11 12 /+ 13 Skip over the contents of a S-Exp/Struct/List/Blob. 14 Params: 15 t = The tokenizer 16 term = The last character read from the tokenizer's input range 17 Returns: 18 A character located after the [s-exp, struct, list, blob]. 19 +/ 20 char skipContainer(ref IonTokenizer t, char term) @safe @nogc pure { 21 skipContainerInternal(t, term); 22 return t.readInput(); 23 } 24 25 /+ 26 Skip over the contents of a S-Exp/Struct/List/Blob, but do not read any character after the terminator. 27 28 Params: 29 t = The tokenizer 30 term = The last character read from the tokenizer's input range 31 +/ 32 void skipContainerInternal(ref IonTokenizer t, char term) @safe @nogc pure 33 in { 34 assert(term == ']' || term == '}' || term == ')', "Unexpected character for skipping"); 35 } do { 36 char c; 37 while (true) { 38 c = t.skipWhitespace(); 39 if (c == term) return; 40 t.expect!("a != 0", true)(c); 41 switch (c) { 42 case '"': 43 t.skipStringInternal(); 44 break; 45 case '\'': 46 if (t.isTripleQuote()) { 47 skipLongStringInternal!(true, false)(t); 48 } else { 49 t.skipSymbolQuotedInternal(); 50 } 51 break; 52 case '(': 53 skipContainerInternal(t, ')'); 54 break; 55 case '[': 56 skipContainerInternal(t, ']'); 57 break; 58 case '{': 59 c = t.peekOne(); 60 if (c == '{') { 61 t.expect!"a != 0"; 62 t.skipBlobInternal(); 63 } else if (c == '}') { 64 t.expect!"a != 0"; 65 } else { 66 skipContainerInternal(t, '}'); 67 } 68 break; 69 default: 70 break; 71 } 72 73 74 } 75 } 76 77 /+ 78 Skip over a single line comment. This will read input up until a newline or the EOF is hit. 79 Params: 80 t = The tokenizer 81 Returns: 82 true if it was able to skip over the comment. 83 +/ 84 bool skipSingleLineComment(ref IonTokenizer t) @safe @nogc pure 85 { 86 while (true) { 87 const(char) c = t.readInput(); 88 if (c == '\r') 89 { 90 auto cs = t.peekMax(1); 91 if (cs.length == 1 && cs[0] == '\n') 92 { 93 t.skipOne(); 94 } 95 return true; 96 } 97 else if (c == '\n' || c == 0) 98 { 99 return true; 100 } 101 } 102 } 103 /// Test skipping over single-line comments. 104 version(mir_ion_parser_test) unittest 105 { 106 import mir.deser.text.tokenizer : tokenizeString, testRead; 107 auto t = tokenizeString("single-line comment\r\nok"); 108 assert(t.skipSingleLineComment()); 109 110 t.testRead('o'); 111 t.testRead('k'); 112 t.testRead(0); 113 } 114 /// Test skipping of a single-line comment on the last line 115 version(mir_ion_parser_test) unittest 116 { 117 import mir.deser.text.tokenizer : tokenizeString, testRead; 118 auto t = tokenizeString("single-line comment"); 119 assert(t.skipSingleLineComment()); 120 t.testRead(0); 121 } 122 123 /+ 124 Skip over a block comment. This will read up until `*/` is hit. 125 Params: 126 t = The tokenizer 127 Returns: 128 true if the block comment was able to be skipped, false if EOF was hit 129 +/ 130 bool skipBlockComment(ref IonTokenizer t) @safe @nogc pure 131 { 132 bool foundStar = false; 133 while (true) { 134 const(char) c = t.readInput(); 135 if (foundStar && c == '/') { 136 return true; 137 } 138 if (c == 0) { 139 return false; 140 } 141 142 foundStar = c == '*'; 143 } 144 } 145 /// Test skipping of an invalid comment 146 version(mir_ion_parser_test) unittest 147 { 148 import mir.deser.text.tokenizer : tokenizeString; 149 auto t = tokenizeString("this is a string that never ends"); 150 assert(!t.skipBlockComment()); 151 } 152 /// Test skipping of a multi-line comment 153 version(mir_ion_parser_test) unittest 154 { 155 import mir.deser.text.tokenizer : tokenizeString, testRead; 156 auto t = tokenizeString("this is/ a\nmulti-line /** comment.**/ok"); 157 assert(t.skipBlockComment()); 158 159 t.testRead('o'); 160 t.testRead('k'); 161 t.testRead(0); 162 } 163 164 /+ 165 Skip over a comment (block or single-line) after reading a '/' 166 Params: 167 t = The tokenizer 168 Returns: 169 true if it was able to skip over the comment 170 +/ 171 bool skipComment(ref IonTokenizer t) @safe @nogc pure 172 { 173 if (t.input.length == 0) { 174 return false; 175 } 176 const(char) c = t.peekOne(); 177 switch(c) { 178 case '/': 179 return t.skipSingleLineComment(); 180 case '*': 181 t.skipOne(); 182 return t.skipBlockComment(); 183 default: 184 break; 185 } 186 187 return false; 188 } 189 /// Test single-line skipping 190 version(mir_ion_parser_test) unittest 191 { 192 import mir.deser.text.tokenizer : tokenizeString, testRead; 193 auto t = tokenizeString("/comment\nok"); 194 assert(t.skipComment()); 195 t.testRead('o'); 196 t.testRead('k'); 197 t.testRead(0); 198 } 199 /// Test block skipping 200 version(mir_ion_parser_test) unittest 201 { 202 import mir.deser.text.tokenizer : tokenizeString, testRead; 203 auto t = tokenizeString("*comm\nent*/ok"); 204 assert(t.skipComment()); 205 t.testRead('o'); 206 t.testRead('k'); 207 t.testRead(0); 208 } 209 /// Test false-alarm skipping 210 version(mir_ion_parser_test) unittest 211 { 212 import mir.deser.text.tokenizer : tokenizeString, testRead; 213 auto t = tokenizeString(" 0)"); 214 assert(!t.skipComment()); 215 t.testRead(' '); 216 t.testRead('0'); 217 t.testRead(')'); 218 t.testRead(0); 219 } 220 221 /+ 222 Skip any digits after the last character read. 223 Params: 224 t = The tokenizer 225 _c = The last character read from the tokenizer input range. 226 Returns: 227 A character located after the last digit skipped. 228 +/ 229 char skipDigits(ref IonTokenizer t, char _c) @safe @nogc pure 230 { 231 auto c = _c; 232 while (c.isDigit()) { 233 c = t.readInput(); 234 } 235 return c; 236 } 237 238 /+ 239 Skip over a non-[hex, binary] number. 240 Params: 241 t = The tokenizer 242 Returns: 243 A character located after the number skipped. 244 +/ 245 char skipNumber(ref IonTokenizer t) @safe @nogc pure 246 { 247 char c = t.readInput(); 248 if (c == '-') { 249 c = t.readInput(); 250 } 251 252 c = skipDigits(t, c); 253 if (c == '.') { 254 c = t.readInput(); 255 c = skipDigits(t, c); 256 } 257 258 if (c == 'd' || c == 'D' || c == 'e' || c == 'E') { 259 c = t.readInput(); 260 if (c == '+' || c == '-') { 261 c = t.readInput(); 262 } 263 c = skipDigits(t, c); 264 } 265 266 return t.expect!(t.isStopChar, true)(c); 267 } 268 /// Test skipping over numbers 269 version(mir_ion_parser_test) unittest 270 { 271 import mir.deser.text.tokenizer : tokenizeString; 272 import mir.deser.text.tokens : IonTokenizerException; 273 274 void test(string ts, char expected) { 275 auto t = tokenizeString(ts); 276 assert(t.skipNumber() == expected); 277 } 278 279 void testFail(string ts) { 280 import std.exception : assertThrown; 281 auto t = tokenizeString(ts); 282 assertThrown!IonTokenizerException(t.skipNumber()); 283 } 284 285 test("", 0); 286 test("0", 0); 287 test("-0", 0); 288 test("-1234567890,", ','); 289 test("1.2 ", ' '); 290 test("1d45\n", '\n'); 291 test("1.4e-12//", '/'); 292 testFail("1.2d3d"); 293 } 294 295 /+ 296 Skip over a binary number. 297 Params: 298 t = The tokenizer 299 Returns: 300 A character located after the number skipped. 301 +/ 302 char skipBinary(ref IonTokenizer t) @safe @nogc pure 303 { 304 return skipRadix!("a == 'b' || a == 'B'", "a == '0' || a == '1'")(t); 305 } 306 /// Test skipping over binary numbers 307 version(mir_ion_parser_test) unittest 308 { 309 import mir.deser.text.tokenizer : tokenizeString; 310 import mir.deser.text.tokens : IonTokenizerException; 311 312 void test(string ts, char expected) { 313 auto t = tokenizeString(ts); 314 assert(t.skipBinary() == expected); 315 } 316 317 void testFail(string ts) { 318 import std.exception : assertThrown; 319 auto t = tokenizeString(ts); 320 assertThrown!IonTokenizerException(t.skipBinary()); 321 } 322 323 test("0b0", 0); 324 test("-0b10 ", ' '); 325 test("0b010101,", ','); 326 327 testFail("0b2"); 328 } 329 330 /+ 331 Skip over a hex number. 332 Params: 333 t = The tokenizer 334 Returns: 335 A character located after the number skipped. 336 +/ 337 char skipHex(ref IonTokenizer t) @safe @nogc pure 338 { 339 return skipRadix!("a == 'x' || a == 'X'", isHexDigit)(t); 340 } 341 /// Test skipping over hex numbers 342 version(mir_ion_parser_test) unittest 343 { 344 import mir.deser.text.tokenizer : tokenizeString; 345 import mir.deser.text.tokens : IonTokenizerException; 346 347 void test(string ts, char expected) { 348 auto t = tokenizeString(ts); 349 assert(t.skipHex() == expected); 350 } 351 352 void testFail(string ts) { 353 import std.exception : assertThrown; 354 auto t = tokenizeString(ts); 355 assertThrown!IonTokenizerException(t.skipHex()); 356 } 357 358 test("0xDEADBABE,0xDEADBABE", ','); 359 test("0x0", 0); 360 test("-0x0F ", ' '); 361 test("0x1234567890abcdefABCDEF,", ','); 362 363 testFail("0xG"); 364 } 365 366 /+ 367 Skip over a number given two predicates to determine the number's marker (`0x`, `0b`) and if any input is valid. 368 Params: 369 isMarker = A predicate which determines if the marker in a number is valid. 370 isValid = A predicate which determines the validity of digits within a number. 371 t = The tokenizer 372 Returns: 373 A character located after the number skipped. 374 +/ 375 template skipRadix(alias isMarker, alias isValid) 376 { 377 import mir.functional : naryFun; 378 char skipRadix(ref IonTokenizer t) @safe @nogc pure { 379 auto c = t.readInput(); 380 381 // Skip over negatives 382 if (c == '-') { 383 c = t.readInput(); 384 } 385 386 t.expect!("a == '0'", true)(c); // 0 387 t.expect!(isMarker); // 0(x || b) 388 while (true) { 389 c = t.readInput(); 390 if (!naryFun!isValid(c)) { 391 break; 392 } 393 } 394 return t.expect!(isStopChar, true)(c); 395 } 396 } 397 398 /+ 399 Skip over a timestamp (compliant to ISO 8601) 400 Params: 401 t = The tokenizer 402 Returns: 403 A character located after the timestamp skipped. 404 +/ 405 char skipTimestamp(ref IonTokenizer t) @safe @nogc pure 406 { 407 char skipTSDigits(int count) { 408 int i = count; 409 while (i > 0) { 410 t.expect!(isDigit); 411 i--; 412 } 413 return t.readInput(); 414 } 415 416 char skipTSOffset(char c) { 417 if (c != '+' && c != '-') { 418 return c; 419 } 420 421 t.expect!("a == ':'", true)(skipTSDigits(2)); 422 return skipTSDigits(2); 423 } 424 425 char skipTSOffsetOrZ(char c) { 426 t.expect!("a == '+' || a == '-' || a == 'z' || a == 'Z'", true)(c); 427 if (c == '+' || c == '-') 428 return skipTSOffset(c); 429 if (c == 'z' || c == 'Z') 430 return t.readInput(); 431 assert(0); // should never hit this 432 } 433 434 char skipTSFinish(char c) { 435 return t.expect!(isStopChar, true)(c); 436 } 437 438 // YYYY(T || '-') 439 const(char) afterYear = t.expect!("a == 'T' || a == '-'", true)(skipTSDigits(4)); 440 if (afterYear == 'T') { 441 // skipped yyyyT 442 return t.readInput(); 443 } 444 445 // YYYY-MM('T' || '-') 446 const(char) afterMonth = t.expect!("a == 'T' || a == '-'", true)(skipTSDigits(2)); 447 if (afterMonth == 'T') { 448 // skipped yyyy-mmT 449 return t.readInput(); 450 } 451 452 // YYYY-MM-DD('T')? 453 char afterDay = skipTSDigits(2); 454 if (afterDay != 'T') { 455 // skipped yyyy-mm-dd 456 return skipTSFinish(afterDay); 457 } 458 459 // YYYY-MM-DDT('+' || '-' || 'z' || 'Z' || isDigit) 460 char offsetH = t.readInput(); 461 if (!offsetH.isDigit()) { 462 // YYYY-MM-DDT('+' || '-' || 'z' || 'Z') 463 // skipped yyyy-mm-ddT(+hh:mm) 464 immutable char afterOffset = skipTSOffset(offsetH); 465 return skipTSFinish(afterOffset); 466 } 467 468 // YYYY-MM-DDT[0-9][0-9]: 469 t.expect!("a == ':'", true)(skipTSDigits(1)); 470 471 // YYYY-MM-DDT[0-9][0-9]:[0-9][0-9](':' || '+' || '-' || 'z' || 'Z') 472 immutable char afterOffsetMM = t.expect!("a == ':' || a == '+' || a == '-' || a == 'z' || a == 'Z'", true) 473 (skipTSDigits(2)); 474 if (afterOffsetMM != ':') { 475 // skipped yyyy-mm-ddThh:mmZ 476 immutable char afterOffset = skipTSOffsetOrZ(afterOffsetMM); 477 return skipTSFinish(afterOffset); 478 } 479 // YYYY-MM-DDT[0-9][0-9]:[0-9][0-9]:[0-9][0-9]('.')? 480 immutable char afterOffsetSS = skipTSDigits(2); 481 if (afterOffsetSS != '.') { 482 immutable char afterOffset = skipTSOffsetOrZ(afterOffsetSS); 483 return skipTSFinish(afterOffset); 484 } 485 486 // YYYY-MM-DDT[0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9]* 487 char offsetNS = t.readInput(); 488 if (isDigit(offsetNS)) { 489 offsetNS = skipDigits(t, offsetNS); 490 } 491 492 // YYYY-MM-DDT[0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9]*('+' || '-' || 'z' || 'Z')([0-9][0-9]:[0-9][0-9])? 493 immutable char afterOffsetNS = skipTSOffsetOrZ(offsetNS); 494 return skipTSFinish(afterOffsetNS); 495 } 496 /// Test skipping over timestamps 497 version(mir_ion_parser_test) unittest 498 { 499 import mir.deser.text.tokenizer : tokenizeString; 500 import mir.deser.text.tokens : IonTokenizerException; 501 502 void test(string ts, char result) { 503 auto t = tokenizeString(ts); 504 assert(t.skipTimestamp() == result); 505 } 506 507 void testFail(string ts) { 508 import std.exception : assertThrown; 509 auto t = tokenizeString(ts); 510 assertThrown!IonTokenizerException(t.skipTimestamp()); 511 } 512 513 test("2001T", 0); 514 test("2001-01T,", ','); 515 test("2001-01-02}", '}'); 516 test("2001-01-02T ", ' '); 517 test("2001-01-02T+00:00\t", '\t'); 518 test("2001-01-02T-00:00\n", '\n'); 519 test("2001-01-02T03:04+00:00 ", ' '); 520 test("2001-01-02T03:04-00:00 ", ' '); 521 test("2001-01-02T03:04Z ", ' '); 522 test("2001-01-02T03:04z ", ' '); 523 test("2001-01-02T03:04:05Z ", ' '); 524 test("2001-01-02T03:04:05+00:00 ", ' '); 525 test("2001-01-02T03:04:05.666Z ", ' '); 526 test("2001-01-02T03:04:05.666666z ", ' '); 527 528 testFail(""); 529 testFail("2001"); 530 testFail("2001z"); 531 testFail("20011"); 532 testFail("2001-0"); 533 testFail("2001-01"); 534 testFail("2001-01-02Tz"); 535 testFail("2001-01-02T03"); 536 testFail("2001-01-02T03z"); 537 testFail("2001-01-02T03:04x "); 538 testFail("2001-01-02T03:04:05x "); 539 } 540 541 /+ 542 Skip over a double colon. 543 544 Params: 545 t = The tokenizer 546 Returns: 547 A character located after the double-colon that was skipped (or after any whitespace has been skipped, if a double-colon is not found) 548 +/ 549 char skipDoubleColon(ref IonTokenizer t) @safe @nogc pure 550 { 551 if (t.isDoubleColon()) { 552 t.skipExactly(2); 553 } 554 return t.readInput(); 555 } 556 /// Test skipping over double-colons 557 version(mir_ion_parser_test) unittest 558 { 559 import mir.deser.text.tokenizer; 560 561 void test(string ts, char result) { 562 auto t = tokenizeString(ts); 563 assert(t.skipDoubleColon() == result); 564 } 565 566 test(" :: ", ' '); 567 test(":: ", ' '); 568 test("::", 0); 569 test("::\t", '\t'); 570 } 571 /+ 572 Skip over a dot. 573 574 Params: 575 t = The tokenizer 576 Returns: 577 A character located after the dot was skipped. 578 +/ 579 char skipDot(ref IonTokenizer t) @safe @nogc pure 580 { 581 auto cs = t.peekMax(1); 582 if (cs.length == 1 && cs[0] == '.') { 583 t.skipOne(); 584 } 585 586 return t.readInput(); 587 } 588 589 /+ 590 Skip over a symbol. 591 Params: 592 t = The tokenizer 593 Returns: 594 A character located after the symbol skipped. 595 +/ 596 char skipSymbol(ref IonTokenizer t) @safe @nogc pure 597 { 598 char c = t.readInput(); 599 while (isIdentifierPart(c)) { 600 c = t.readInput(); 601 } 602 603 return c; 604 } 605 /// Test skipping over symbols 606 version(mir_ion_parser_test) unittest 607 { 608 import mir.deser.text.tokenizer : tokenizeString; 609 610 void test(string ts, char result) { 611 auto t = tokenizeString(ts); 612 assert(t.skipSymbol() == result); 613 } 614 615 test("f", 0); 616 test("foo:", ':'); 617 test("foo,", ','); 618 test("foo ", ' '); 619 test("foo\n", '\n'); 620 test("foo]", ']'); 621 test("foo}", '}'); 622 test("foo)", ')'); 623 test("foo\\n", '\\'); 624 } 625 626 /+ 627 Skip over a quoted symbol, but do not read the character after. 628 Params: 629 t = The tokenizer 630 +/ 631 void skipSymbolQuotedInternal(ref IonTokenizer t) @safe @nogc pure 632 { 633 char c; 634 while (true) { 635 c = t.expect!"a != 0 && a != '\\n'"; 636 switch (c) { 637 case '\'': 638 return; 639 case '\\': 640 t.expect!"a != 0"; 641 break; 642 default: 643 break; 644 } 645 } 646 } 647 648 /+ 649 Skip over a quoted symbol 650 Params: 651 t = The tokenizer 652 Returns: 653 A character located after the quoted symbol skipped. 654 +/ 655 char skipSymbolQuoted(ref IonTokenizer t) @safe @nogc pure 656 { 657 t.skipSymbolQuotedInternal(); 658 return t.readInput(); 659 } 660 /// Test skipping over quoted symbols 661 version(mir_ion_parser_test) unittest 662 { 663 import mir.deser.text.tokenizer : tokenizeString; 664 import mir.deser.text.tokens : IonTokenizerException; 665 666 void test(string ts, char result) { 667 auto t = tokenizeString(ts); 668 assert(t.skipSymbolQuoted() == result); 669 } 670 671 void testFail(string ts) { 672 import std.exception : assertThrown; 673 auto t = tokenizeString(ts); 674 assertThrown!IonTokenizerException(t.skipSymbolQuoted()); 675 } 676 677 test("'", 0); 678 test("foo',", ','); 679 test("foo\\'bar':", ':'); 680 test("foo\\\nbar',", ','); 681 testFail("foo"); 682 testFail("foo\n"); 683 } 684 685 /+ 686 Skip over a symbol operator. 687 Params: 688 t = The tokenizer 689 Returns: 690 A character located after the symbol operator skipped. 691 +/ 692 char skipSymbolOperator(ref IonTokenizer t) @safe @nogc pure 693 { 694 char c = t.readInput(); 695 696 while (isOperatorChar(c)) { 697 c = t.readInput(); 698 } 699 return c; 700 } 701 /// Test skipping over symbol operators 702 version(mir_ion_parser_test) unittest 703 { 704 import mir.deser.text.tokenizer : tokenizeString; 705 706 void test(string ts, char result) { 707 auto t = tokenizeString(ts); 708 assert(t.skipSymbolOperator() == result); 709 } 710 711 test("+", 0); 712 test("++", 0); 713 test("+= ", ' '); 714 test("%b", 'b'); 715 } 716 717 /+ 718 Skip over a string, but do not read the character following it. 719 Params: 720 t = The tokenizer 721 +/ 722 void skipStringInternal(ref IonTokenizer t) @safe @nogc pure 723 { 724 char c; 725 while (true) { 726 c = t.expect!("a != 0 && a != '\\n'"); 727 switch (c) { 728 case '"': 729 return; 730 case '\\': 731 t.expect!"a != 0"; 732 break; 733 default: 734 break; 735 } 736 } 737 } 738 739 /+ 740 Skip over a string. 741 Params: 742 t = The tokenizer 743 Returns: 744 A character located after the string skipped. 745 +/ 746 char skipString(ref IonTokenizer t) @safe @nogc pure 747 { 748 t.skipStringInternal(); 749 return t.readInput(); 750 } 751 /// Test skipping over strings 752 version(mir_ion_parser_test) unittest 753 { 754 import mir.deser.text.tokenizer : tokenizeString; 755 import mir.deser.text.tokens : IonTokenizerException; 756 757 void test(string ts, char result) { 758 auto t = tokenizeString(ts); 759 assert(t.skipString() == result); 760 } 761 762 void testFail(string ts) { 763 import std.exception : assertThrown; 764 auto t = tokenizeString(ts); 765 assertThrown!IonTokenizerException(t.skipString()); 766 } 767 768 test("\"", 0); 769 test("\",", ','); 770 test("foo\\\"bar\"], \"\"", ']'); 771 test("foo\\\nbar\" \t\t\t", ' '); 772 773 testFail("foobar"); 774 testFail("foobar\n"); 775 } 776 777 /+ 778 Skip over a long string, but do not read the character following it. 779 Params: 780 t = The tokenizer 781 +/ 782 void skipLongStringInternal(bool skipComments = true, bool failOnComment = false)(ref IonTokenizer t) @safe @nogc pure 783 if (__traits(compiles, { t.skipWhitespace!(skipComments, failOnComment); })) { 784 char c; 785 while (true) { 786 c = t.expect!("a != 0"); 787 switch (c) { 788 case '\'': 789 if(skipLongStringEnd!(skipComments, failOnComment)(t)) { 790 return; 791 } 792 break; 793 case '\\': 794 t.expect!("a != 0"); 795 break; 796 default: 797 break; 798 } 799 } 800 } 801 802 /+ 803 Skip over the end of a long string (`'''``), and see if we find a long string following this one. 804 Params: 805 t = The tokenizer 806 Returns: 807 true if we found a second long string 808 +/ 809 bool skipLongStringEnd(bool skipComments = true, bool failOnComment = false)(ref IonTokenizer t) @safe @nogc pure 810 if (__traits(compiles, { t.skipWhitespace!(skipComments, failOnComment); })) { 811 auto cs = t.peekMax(2); 812 if (cs.length < 2 || cs[0] != '\'' || cs[1] != '\'') { 813 throw IonTokenizerErrorCode.cannotSkipLongString.ionTokenizerException; 814 } 815 816 t.skipExactly(2); 817 immutable char c = t.skipWhitespace!(skipComments, failOnComment); 818 if (c == '\'') { 819 if (t.isTripleQuote()) { 820 return false; 821 } 822 } 823 824 t.unread(c); 825 return true; 826 } 827 828 /+ 829 Skip over a long string (marked by `'''`) 830 Params: 831 t = The tokenizer 832 Returns: 833 A character located after the long string skipped. 834 +/ 835 char skipLongString(ref IonTokenizer t) @safe @nogc pure 836 { 837 skipLongStringInternal!(true, false)(t); 838 return t.readInput(); 839 } 840 /// Test skipping over long strings 841 version(mir_ion_parser_test) unittest 842 { 843 import mir.deser.text.tokenizer : tokenizeString; 844 845 void test(string ts, char result) { 846 auto t = tokenizeString(ts); 847 assert(t.skipLongString() == result); 848 } 849 } 850 851 /+ 852 Skip over a blob. 853 Params: 854 t = The tokenizer 855 Returns: 856 A character located after the blob skipped. 857 +/ 858 char skipBlob(ref IonTokenizer t) @safe @nogc pure 859 { 860 t.skipBlobInternal(); 861 return t.readInput(); 862 } 863 /// Test skipping over blobs 864 version(mir_ion_parser_test) unittest 865 { 866 import mir.deser.text.tokenizer : tokenizeString; 867 868 void test(string ts, char result) { 869 auto t = tokenizeString(ts); 870 assert(t.skipBlob() == result); 871 } 872 873 test("}}", 0); 874 test("oogboog}},{{}}", ','); 875 test("'''not encoded'''}}\n", '\n'); 876 } 877 878 /+ 879 Skip over a blob, but do not read the character following it. 880 Params: 881 t = The tokenizer 882 +/ 883 void skipBlobInternal(ref IonTokenizer t) @safe @nogc pure 884 { 885 char c = t.skipLobWhitespace(); 886 while (c != '}') { 887 c = t.skipLobWhitespace(); 888 t.expect!("a != 0", true)(c); 889 } 890 891 t.expect!("a == '}'"); 892 893 return; 894 } 895 896 /+ 897 Skip over a struct. 898 Params: 899 t = The tokenizer 900 Returns: 901 A character located after the struct skipped. 902 +/ 903 char skipStruct(ref IonTokenizer t) @safe @nogc pure 904 { 905 return skipContainer(t, '}'); 906 } 907 /// Test skipping over structs 908 version(mir_ion_parser_test) unittest 909 { 910 import mir.deser.text.tokenizer : tokenizeString; 911 912 void test(string ts, char result) { 913 auto t = tokenizeString(ts); 914 assert(t.skipStruct() == result); 915 } 916 917 test("},", ','); 918 test("[\"foo bar baz\"]},", ','); 919 test("{}},{}", ','); // skip over an embedded struct inside of a struct 920 } 921 922 /+ 923 Skip over a struct, but do not read the character following it. 924 Params: 925 t = The tokenizer 926 +/ 927 void skipStructInternal(ref IonTokenizer t) @safe @nogc pure 928 { 929 skipContainerInternal(t, '}'); 930 return; 931 } 932 933 /+ 934 Skip over a S-expression. 935 Params: 936 t = The tokenizer 937 Returns: 938 A character located after the S-expression skipped. 939 +/ 940 char skipSexp(ref IonTokenizer t) @safe @nogc pure 941 { 942 return skipContainer(t, ')'); 943 } 944 /// Test skipping over S-expressions 945 version(mir_ion_parser_test) unittest 946 { 947 import mir.deser.text.tokenizer : tokenizeString; 948 949 void test(string ts, char result) { 950 auto t = tokenizeString(ts); 951 assert(t.skipSexp() == result); 952 } 953 954 test("1231 + 1123),", ','); 955 test("0xF00DBAD)", 0); 956 } 957 958 /+ 959 Skip over a S-expression, but do not read the character following it. 960 Params: 961 t = The tokenizer 962 +/ 963 void skipSexpInternal(ref IonTokenizer t) @safe @nogc pure 964 { 965 skipContainerInternal(t, ')'); 966 return; 967 } 968 969 /+ 970 Skip over a list. 971 Params: 972 t = The tokenizer 973 Returns: 974 A character located after the list skipped. 975 +/ 976 char skipList(ref IonTokenizer t) @safe @nogc pure 977 { 978 return skipContainer(t, ']'); 979 } 980 /// Test skipping over lists 981 version(mir_ion_parser_test) unittest 982 { 983 import mir.deser.text.tokenizer : tokenizeString; 984 985 void test(string ts, char result) { 986 auto t = tokenizeString(ts); 987 assert(t.skipList() == result); 988 } 989 990 test("\"foo\", \"bar\", \"baz\"],", ','); 991 test("\"foobar\"]", 0); 992 } 993 994 /+ 995 Skip over a list, but do not read the character following it. 996 Params: 997 t = The tokenizer 998 +/ 999 void skipListInternal(ref IonTokenizer t) @safe @nogc pure 1000 { 1001 skipContainerInternal(t, ']'); 1002 return; 1003 } 1004 1005 /+ 1006 Skip over the current token. 1007 Params: 1008 t = The tokenizer 1009 Returns: 1010 A non-whitespace character following the current token. 1011 +/ 1012 char skipValue(ref IonTokenizer t) @safe @nogc pure 1013 { 1014 char ret; 1015 with(IonTokenType) switch(t.currentToken) { 1016 case TokenNumber: 1017 ret = t.skipNumber(); 1018 break; 1019 case TokenBinary: 1020 ret = t.skipBinary(); 1021 break; 1022 case TokenHex: 1023 ret = t.skipHex(); 1024 break; 1025 case TokenTimestamp: 1026 ret = t.skipTimestamp(); 1027 break; 1028 case TokenSymbol: 1029 ret = t.skipSymbol(); 1030 break; 1031 case TokenSymbolQuoted: 1032 ret = t.skipSymbolQuoted(); 1033 break; 1034 case TokenSymbolOperator: 1035 ret = t.skipSymbolOperator(); 1036 break; 1037 case TokenString: 1038 ret = t.skipString(); 1039 break; 1040 case TokenLongString: 1041 ret = t.skipLongString(); 1042 break; 1043 case TokenOpenDoubleBrace: 1044 ret = t.skipBlob(); 1045 break; 1046 case TokenOpenBrace: 1047 ret = t.skipStruct(); 1048 break; 1049 case TokenOpenParen: 1050 ret = t.skipSexp(); 1051 break; 1052 case TokenOpenBracket: 1053 ret = t.skipList(); 1054 break; 1055 case TokenDoubleColon: 1056 ret = t.skipDoubleColon(); 1057 break; 1058 case TokenEOF: 1059 break; 1060 default: 1061 assert(0, "unhandled token"); 1062 } 1063 1064 if (ret.isWhitespace()) { 1065 ret = t.skipWhitespace(); 1066 } 1067 1068 t.finished = true; 1069 return ret; 1070 }