1 module squelch.format; 2 3 import std.algorithm.comparison : among, max; 4 import std.algorithm.iteration : map, filter, each; 5 import std.algorithm.searching; 6 import std.array : replicate, split; 7 import std.exception; 8 import std.range : retro; 9 import std.string : strip; 10 import std.sumtype : match; 11 12 static import std.string; 13 14 import ae.utils.array : elementIndex; 15 import ae.utils.math : maximize, minimize; 16 17 import squelch.common; 18 19 // Break lines in expressions with more than this many typical characters. 20 enum maxLineComplexity = 80; 21 22 // Indentation depth (number of spaces) to use for most constructs. 23 enum indentationWidth = 2; 24 25 Token[] format(const scope Token[] tokens) 26 { 27 // Lexical priorities 28 enum Level 29 { 30 // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators 31 parensOuter, /// function call, subscript 32 // unary, 33 // Binary operators - highest priority: 34 multiplication, 35 addition, 36 shift, 37 bitwiseAnd, 38 bitwiseXor, 39 bitwiseOr, 40 comparison, 41 // not, 42 and, 43 or, 44 45 then, 46 when, 47 case_, 48 49 as, 50 51 // Technically not a binary operator, but acts like a low-priority one 52 comma, 53 54 // Things like SELECT go here 55 with_, 56 on, 57 join, 58 select, 59 union_, 60 statement = union_, 61 62 // A closing paren unambiguously terminates all other lexical constructs 63 parensInner, 64 65 // Dbt macros form their own independent hierarchy 66 dbt, 67 68 // Lowest priority - terminates all constructs 69 file, 70 } 71 72 /// Tree node. 73 static struct Node 74 { 75 Level level; /// Nesting / priority level of this node's expression 76 string type; /// A string identifying the specific operation 77 78 Node*[] children; /// If null, then this is a leaf 79 80 /// Covered tokens. 81 size_t start, end; 82 83 /// Default indentation level. 84 sizediff_t indent = indentationWidth; 85 86 /// Don't indent unless this node contains line breaks 87 bool conditionalIndent; 88 89 /// If true, and the parent has its soft line breaks applied, do so here too 90 bool breakWithParent; 91 92 /// If true, and any sibling has its soft line breaks applied, do so here too 93 bool breakWithSibling; 94 95 /// Indentation level overrides for specific tokens 96 /// (mainly for tokens are part of this node's syntax, 97 /// and should not be indented). 98 sizediff_t[size_t] tokenIndent; 99 100 /// If set, overrides the parent's .indent for this node. 101 int parentIndentOverride = int.min; 102 103 /// If true, the corresponding whiteSpace may be changed to newLine 104 bool[size_t] softLineBreak; 105 106 /// State flag for newline application. 107 bool breaksApplied; 108 } 109 Node root = { 110 level : Level.file, 111 type : "file", 112 indent : false, 113 }; 114 115 enum WhiteSpace 116 { 117 none, 118 space, 119 newLine, 120 blankLine, 121 } 122 // whiteSpace[i] is what whitespace we should add before tokens[i] 123 auto whiteSpace = new WhiteSpace[tokens.length + 1]; 124 125 // First pass 126 { 127 Node*[] stack = [&root]; 128 bool wasWord; 129 130 foreach (ref token; tokens) 131 { 132 WhiteSpace wsPre, wsPost; 133 bool isWord; 134 135 size_t tokenIndex = tokens.elementIndex(token); 136 137 Node* stackPush_(Level level, string type) 138 { 139 auto n = new Node; 140 n.level = level; 141 n.type = type; 142 n.start = tokenIndex; 143 144 stack[$-1].children ~= n; 145 stack ~= n; 146 return n; 147 } 148 149 Node* stackPop_(bool afterCurrent) 150 { 151 auto n = stack[$-1]; 152 n.end = tokenIndex + afterCurrent; 153 stack = stack[0 .. $-1]; 154 return n; 155 } 156 157 Node* stackPopTo(Level level) 158 { 159 while (stack[$-1].level < level) 160 stackPop_(false); 161 return stack[$-1].level == level ? stack[$-1] : null; 162 } 163 164 Node* stackEnter(Level level, string type, bool glueBackwards = false) 165 { 166 stackPopTo(level); 167 168 if (stack[$-1].level == level) 169 { 170 if (glueBackwards) 171 { 172 stack[$-1].type = type; 173 return stack[$-1]; 174 } 175 else 176 stackPop_(false); 177 } 178 179 return stackPush_(level, type); 180 } 181 182 183 Node* stackExit(Level level, string expected) 184 { 185 stackPopTo(level); 186 enforce(stack[$-1].level == level, std..string.format! 187 "Found end of %s while looking for end of %s"( 188 stack[$-1].type, expected)); 189 return stackPop_(true); 190 } 191 192 // Like stackEnter, but adopts previous nodes that have a higher priority. 193 // Suitable for binary operators. 194 Node* stackInsert(Level level, string type) 195 { 196 stackPopTo(level); 197 198 if (stack[$-1].level == level) 199 { 200 // Implicitly glue backwards 201 stack[$-1].type = type; 202 } 203 else 204 { 205 auto p = stack[$-1]; 206 auto start = tokenIndex; 207 auto childrenStart = p.children.length; 208 209 // Extend the start backwards to adopt non-syntax tokens 210 // and adopt higher-priority nodes 211 while (start > p.start) 212 { 213 if (childrenStart && p.children[childrenStart - 1].end == start) 214 { 215 // Decide if we want to adopt this child 216 if (p.children[childrenStart - 1].level < level) 217 { 218 childrenStart--; 219 start = p.children[childrenStart].start; 220 } 221 else 222 break; 223 } 224 else 225 { 226 // Extend the start backwards to adopt non-syntax tokens 227 if ((start - 1) !in p.tokenIndent && (start) !in p.softLineBreak) 228 start--; 229 else 230 break; 231 } 232 } 233 234 // Move the previous hierarchy into the new inserted level 235 Node*[] children = p.children[childrenStart .. $]; 236 p.children = p.children[0 .. childrenStart]; 237 238 auto n = stackPush_(level, type); 239 n.children = children; 240 n.start = start; 241 } 242 return stack[$-1]; 243 } 244 245 // Common routine for creating nodes for binary operators 246 Node* stackInsertBinary(Level level, string type) 247 { 248 wsPre = wsPost = WhiteSpace.space; 249 auto n = stackInsert(level, type); 250 n.indent.maximize(type.length + 1); 251 n.tokenIndent[tokenIndex] = 0; 252 n.conditionalIndent = true; 253 n.softLineBreak[tokenIndex] = n.softLineBreak[n.start] = true; 254 return n; 255 } 256 257 token.match!( 258 (ref const TokenWhiteSpace t) 259 { 260 assert(false); 261 }, 262 (ref const TokenComment t) 263 { 264 wsPre = wsPost = WhiteSpace.newLine; 265 }, 266 (ref const TokenKeyword t) 267 { 268 isWord = true; 269 270 switch (t.kind) 271 { 272 case "AS": 273 wsPre = wsPost = WhiteSpace.space; 274 if (tokenIndex + 1 < tokens.length 275 && tokens[tokenIndex + 1].match!( 276 (ref const TokenIdentifier t) => true, 277 (ref const _) => false 278 ) 279 && stack.retro 280 .find!(n => n.level >= Level.select) 281 .front 282 .level == Level.select 283 && tokenIndex > 0 284 && !tokens[tokenIndex - 1].match!( 285 (ref const TokenKeyword t) => t.kind == "WITH OFFSET", 286 (ref const _) => false 287 ) 288 ) 289 stackInsertBinary(Level.as, t.kind); 290 break; 291 case "NOT": 292 case "IS": 293 case "OVER": 294 case "RETURNS": 295 case "IGNORE": 296 case "USING": 297 wsPre = wsPost = WhiteSpace.space; 298 break; 299 case "BETWEEN": 300 auto n = stackInsertBinary(Level.comparison, t.kind); 301 n.softLineBreak.remove(tokenIndex); 302 n.softLineBreak.remove(n.start); 303 break; 304 case "LIKE": 305 case "IN": 306 stackInsertBinary(Level.comparison, t.kind); 307 break; 308 case "AND": 309 wsPre = wsPost = WhiteSpace.space; 310 stackPopTo(Level.comparison); 311 if (stack[$-1].type == "BETWEEN") 312 { 313 auto n = stackInsert(Level.comparison, t.kind); 314 n.indent = 0; 315 n.tokenIndent[tokenIndex] = 0; 316 break; 317 } 318 stackInsertBinary(Level.and, t.kind); 319 return; 320 case "OR": 321 stackInsertBinary(Level.or, t.kind); 322 break; 323 case "SELECT": 324 wsPre = wsPost = WhiteSpace.newLine; 325 auto n = stackEnter(Level.select, t.text); 326 n.tokenIndent[tokenIndex] = 0; 327 break; 328 case "FROM": 329 if (stack[$-1].type == "EXTRACT(") 330 return; 331 goto case "WHERE"; 332 case "BY": 333 if (stack 334 .retro 335 .find!(n => n.level >= Level.parensInner) 336 .front 337 .type 338 .among("OVER(", "AS(", "`ARRAY_AGG`(")) 339 { 340 wsPre = wsPost = WhiteSpace.space; 341 auto n = stackEnter(Level.select, "BY-inline", true); 342 n.tokenIndent[tokenIndex] = 0; 343 n.softLineBreak[tokenIndex] = n.softLineBreak[tokenIndex + 1] = true; 344 return; 345 } 346 goto case "WHERE"; 347 case "WHERE": 348 case "HAVING": 349 case "QUALIFY": 350 case "WINDOW": 351 wsPre = wsPost = WhiteSpace.newLine; 352 auto n = stackEnter(Level.select, t.text); 353 n.tokenIndent[tokenIndex] = 0; 354 break; 355 case "JOIN": 356 wsPre = WhiteSpace.newLine; 357 wsPost = WhiteSpace.space; 358 stackPopTo(Level.join); 359 auto n = stackEnter(Level.join, t.kind); 360 n.tokenIndent[tokenIndex] = 0; 361 break; 362 case "ON": 363 wsPre = wsPost = WhiteSpace.newLine; 364 auto n = stackEnter(Level.on, t.kind); 365 n.tokenIndent[tokenIndex] = 0; 366 break; 367 case "ROWS": 368 wsPre = WhiteSpace.newLine; 369 wsPost = WhiteSpace.space; 370 auto n = stackEnter(Level.select, t.kind, true); 371 n.tokenIndent[tokenIndex] = 0; 372 n.softLineBreak[tokenIndex] = true; 373 break; 374 case "EXCEPT": 375 if (tokenIndex && tokens[tokenIndex - 1] == Token(TokenOperator("*"))) 376 return; 377 goto case; 378 case "UNION": 379 case "INTERSECT": 380 wsPre = wsPost = WhiteSpace.newLine; 381 auto n = stackEnter(Level.union_, t.text); 382 n.indent = 0; 383 n.tokenIndent[tokenIndex] = -indentationWidth; 384 break; 385 case "WITH": 386 wsPre = wsPost = WhiteSpace.newLine; 387 auto n = stackEnter(Level.select, t.text); 388 n.tokenIndent[tokenIndex] = 0; 389 break; 390 case "CREATE": 391 wsPre = WhiteSpace.newLine; 392 auto n = stackEnter(Level.select, t.text); 393 n.indent = 0; 394 n.tokenIndent[tokenIndex] = 0; 395 break; 396 case "CASE": 397 wsPre = wsPost = WhiteSpace.space; 398 auto n = stackEnter(Level.case_, t.text); 399 n.softLineBreak[tokenIndex] = n.softLineBreak[tokenIndex + 1] = true; 400 n.tokenIndent[tokenIndex] = 0; 401 break; 402 case "WHEN": 403 case "ELSE": 404 wsPre = wsPost = WhiteSpace.space; 405 auto p = stackPopTo(Level.case_); 406 if (p) 407 p.softLineBreak[tokenIndex] = true; 408 auto n = stackEnter(Level.when, t.text); 409 n.softLineBreak[tokenIndex] = true; 410 n.indent = 2 * indentationWidth; 411 n.tokenIndent[tokenIndex] = 0; 412 n.breakWithSibling = true; 413 break; 414 case "THEN": 415 wsPre = wsPost = WhiteSpace.space; 416 auto n = stackEnter(Level.when, t.text, true); 417 n.indent = 2 * indentationWidth; 418 n.softLineBreak[tokenIndex] = true; 419 n.tokenIndent[tokenIndex] = indentationWidth; 420 break; 421 case "END": 422 wsPre = WhiteSpace.space; 423 auto n = stackExit(Level.case_, "CASE ... END"); 424 n.softLineBreak[tokenIndex] = true; 425 n.tokenIndent[tokenIndex] = 0; 426 break; 427 default: 428 break; 429 } 430 }, 431 (ref const TokenIdentifier t) 432 { 433 // If this is a Dbt call which we know doesn't produce any output, 434 // format it like a statement. 435 if (t.text.length == 1) 436 t.text[0].match!( 437 (ref const DbtExpression e) 438 { 439 string s = e.expr; 440 s.skipOver("-"); 441 if (s.strip.startsWith("config(")) 442 wsPre = wsPost = WhiteSpace.blankLine; 443 }, 444 (ref const _) {} 445 ); 446 447 isWord = true; 448 }, 449 (ref const TokenNamedParameter t) 450 { 451 isWord = true; 452 }, 453 (ref const TokenOperator t) 454 { 455 switch (t.text) 456 { 457 case ".": 458 break; 459 case "(": 460 string context = "("; 461 if (tokenIndex) 462 tokens[tokenIndex - 1].match!( 463 (ref const TokenIdentifier t) { context = '`' ~ t.text.tryToString() ~ "`("; }, 464 (ref const TokenKeyword t) { context = t.kind ~ "("; }, 465 (ref const _) {}, 466 ); 467 auto n = stackInsert(Level.parensOuter, context); 468 n.indent = n.tokenIndent[tokenIndex] = 0; 469 if (context.among("JOIN(", "USING(") && stack[$-2].type == "JOIN") 470 n.parentIndentOverride = 0; 471 if (context == "IN(") 472 n.parentIndentOverride = 0; 473 n = stackPush_(Level.parensInner, context); 474 n.tokenIndent[tokenIndex] = 0; 475 n.softLineBreak[tokenIndex + 1] = true; 476 break; 477 case ")": 478 auto n = stackExit(Level.parensInner, "( ... )"); 479 enforce(n.type.endsWith("("), "Found end of " ~ n.type ~ " while looking for end of ( ... )"); 480 n.tokenIndent[tokenIndex] = 0; 481 n.softLineBreak[tokenIndex] = true; 482 stackExit(Level.parensOuter, "( ... )"); 483 484 if (tokenIndex + 1 < tokens.length) 485 tokens[tokenIndex + 1].match!( 486 (ref const TokenOperator t) {}, 487 (ref const TokenNumber t) {}, 488 (ref const _) { wsPost = WhiteSpace.space; } 489 ); 490 491 break; 492 case "[": 493 auto n = stackInsert(Level.parensOuter, t.text); 494 n.indent = n.tokenIndent[tokenIndex] = 0; 495 n = stackPush_(Level.parensInner, t.text); 496 n.tokenIndent[tokenIndex] = 0; 497 n.softLineBreak[tokenIndex + 1] = true; 498 break; 499 case "]": 500 auto n = stackExit(Level.parensInner, "[ ... ]"); 501 enforce(n.type == "[", "Found end of " ~ n.type ~ " while looking for end of [ ... ]"); 502 n.tokenIndent[tokenIndex] = 0; 503 n.softLineBreak[tokenIndex] = true; 504 stackExit(Level.parensOuter, "[ ... ]"); 505 break; 506 case ",": 507 auto n = stackInsert(Level.comma, t.text); 508 n.indent = 0; 509 n.tokenIndent[tokenIndex] = 0; 510 n.breakWithParent = true; 511 512 if (stack[$-2].type == "<") 513 wsPost = WhiteSpace.space; 514 else 515 if (stack[$-2].type == "WITH") 516 wsPost = WhiteSpace.blankLine; 517 else 518 if (stack[$-2].level == Level.select && stack[$-2].type != "BY-inline") 519 wsPost = WhiteSpace.newLine; 520 else 521 { 522 wsPost = WhiteSpace.space; 523 n.softLineBreak[tokenIndex + 1] = true; 524 } 525 break; 526 case ";": 527 wsPost = WhiteSpace.blankLine; 528 auto n = stackInsert(Level.statement, t.text); 529 n.indent = 0; 530 n.tokenIndent[tokenIndex] = 0; 531 break; 532 case "*": 533 if (tokenIndex && tokens[tokenIndex - 1].match!( 534 (ref const TokenOperator t) => t.text == ".", 535 (ref const _) => false 536 )) 537 { 538 wsPost = WhiteSpace.space; 539 break; 540 } 541 if (tokenIndex + 1 < tokens.length && tokens[tokenIndex + 1].match!( 542 (ref const TokenOperator t) => t.text == ",", 543 (ref const _) => false 544 )) 545 { 546 wsPre = WhiteSpace.space; 547 break; 548 } 549 goto case "/"; 550 551 // Binary operators and others 552 case "=": 553 case "<": 554 case ">": 555 case "<=": 556 case ">=": 557 case "!=": 558 case "<>": 559 stackInsertBinary(Level.comparison, t.text); 560 break; 561 case "|": 562 stackInsertBinary(Level.bitwiseOr, t.text); 563 break; 564 case "^": 565 stackInsertBinary(Level.bitwiseXor, t.text); 566 break; 567 case "&": 568 stackInsertBinary(Level.bitwiseAnd, t.text); 569 break; 570 case "<<": 571 case ">>": 572 stackInsertBinary(Level.shift, t.text); 573 break; 574 case "-": 575 case "+": 576 stackInsertBinary(Level.addition, t.text); 577 break; 578 case "/": 579 case "||": 580 stackInsertBinary(Level.multiplication, t.text); 581 break; 582 default: 583 wsPre = wsPost = WhiteSpace.space; 584 break; 585 } 586 }, 587 (ref const TokenAngleBracket t) 588 { 589 final switch (t.text) 590 { 591 case "<": 592 auto n = stackInsert(Level.parensOuter, t.text); 593 n.indent = n.tokenIndent[tokenIndex] = 0; 594 n = stackPush_(Level.parensInner, t.text); 595 n.tokenIndent[tokenIndex] = 0; 596 break; 597 case ">": 598 auto n = stackExit(Level.parensInner, "< ... >"); 599 enforce(n.type == "<", "Found end of " ~ n.type ~ " while looking for end of < ... >"); 600 n.tokenIndent[tokenIndex] = 0; 601 stackExit(Level.parensOuter, "< ... >"); 602 break; 603 } 604 }, 605 (ref const TokenString t) 606 { 607 isWord = true; 608 }, 609 (ref const TokenNumber t) 610 { 611 isWord = true; 612 }, 613 (ref const TokenDbtStatement t) 614 { 615 wsPre = wsPost = WhiteSpace.newLine; 616 switch (t.kind) 617 { 618 case "for": 619 case "if": 620 case "macro": 621 case "filter": 622 auto n = stackPush_(Level.dbt, "%" ~ t.kind); 623 n.tokenIndent[tokenIndex] = 0; 624 break; 625 case "set": 626 if (!t.text.canFind('=')) 627 goto case "for"; 628 break; 629 case "elif": 630 case "else": 631 stackPopTo(Level.dbt); 632 auto n = stack[$ - 1]; 633 enforce(n.type == "%if" || n.type == "%for", 634 "Found " ~ t.kind ~ " but expected end" ~ n.type[1..$] 635 ); 636 n.tokenIndent[tokenIndex] = 0; 637 break; 638 case "endfor": 639 case "endif": 640 auto n = stackExit(Level.dbt, "%" ~ t.kind[3 .. $]); 641 enforce(n.type.endsWith("%" ~ t.kind[3 .. $]), 642 "Found " ~ t.kind ~ " but expected end" ~ n.type[1..$] 643 ); 644 n.tokenIndent[tokenIndex] = 0; 645 break; 646 case "endmacro": 647 case "endfilter": 648 case "endset": 649 wsPost = WhiteSpace.blankLine; 650 goto case "endfor"; 651 default: 652 break; 653 } 654 }, 655 (ref const TokenDbtComment t) 656 { 657 wsPre = wsPost = WhiteSpace.newLine; 658 }, 659 ); 660 661 whiteSpace[tokenIndex].maximize(wsPre); 662 if (!whiteSpace[tokenIndex] && wasWord && isWord) 663 whiteSpace[tokenIndex] = WhiteSpace.space; 664 665 tokenIndex++; 666 667 whiteSpace[tokenIndex] = wsPost; 668 wasWord = isWord; 669 } 670 671 while (stack.length) 672 { 673 stack[$-1].end = tokens.length; 674 stack = stack[0 .. $-1]; 675 } 676 } 677 678 // Comments generally describe the thing below them, 679 // so evict them out of blocks if they are on the border. 680 { 681 void adjust(Node* n) 682 { 683 alias isComment = n => n.match!( 684 (ref const TokenComment t) => true, 685 (ref const _) => false 686 ); 687 while (n.start < n.end && isComment(tokens[n.start])) 688 n.start++; 689 while (n.end > n.start && isComment(tokens[n.end - 1])) 690 n.end--; 691 foreach (child; n.children) 692 adjust(child); 693 } 694 adjust(&root); 695 } 696 697 // AND should always have line breaks when nested directly in a WHERE. 698 { 699 void adjust(Node* n, Node* parent) 700 { 701 if (n.level == Level.and && (parent.level == Level.select || parent.level == Level.on)) 702 foreach (tokenIndex, _; n.tokenIndent) 703 whiteSpace[tokenIndex].maximize(WhiteSpace.newLine); 704 foreach (child; n.children) 705 adjust(child, n); 706 } 707 adjust(&root, null); 708 } 709 710 size_t typicalLength(size_t tokenIndex) 711 { 712 return tokens[tokenIndex].match!( 713 (ref const TokenWhiteSpace t) => 0, 714 (ref const TokenComment t) => 60, 715 (ref const TokenKeyword t) => t.text.split(" ").length * (5 + 1), 716 (ref const TokenIdentifier t) 717 { 718 import std.ascii : isAlphaNum; 719 720 bool wasLetter; 721 size_t length; 722 723 void handle(dchar c) 724 { 725 bool isLetter = c >= 0x80 || isAlphaNum(cast(char)c); 726 if (!isLetter) 727 length++; 728 else 729 if (!wasLetter) 730 length += 5; 731 wasLetter = isLetter; 732 } 733 734 foreach (e; t.text) 735 e.match!( 736 (dchar c) { handle(c); }, 737 (ref const DbtExpression e) { foreach (c; e.expr) handle(c); }, 738 ); 739 740 return length; 741 }, 742 (ref const TokenNamedParameter t) => 10, 743 (ref const TokenOperator t) => 1 + 1, 744 (ref const TokenAngleBracket t) => 1 + 1, 745 (ref const TokenString t) => 10, 746 (ref const TokenNumber t) => 3 + 1, 747 (ref const TokenDbtStatement t) => 50, 748 (ref const TokenDbtComment t) => 100, 749 ); 750 } 751 752 // Convert soft breaks into spaces or newlines, depending on local complexity 753 { 754 size_t i = root.start; 755 756 int scan(Node* n) 757 in (i == n.start) 758 out(; i == n.end) 759 { 760 int complexity = 0; 761 762 foreach (childIndex; 0 .. n.children.length + 1) 763 { 764 // Process the gap before this child 765 auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end; 766 while (true) 767 { 768 if (i < gapEnd) 769 complexity += typicalLength(i); 770 771 // A newline belongs to the inner-most node which fully contains it 772 if (i > n.start && i < n.end) 773 if (whiteSpace[i] >= WhiteSpace.newLine) 774 complexity = maxLineComplexity; // Forced by e.g. sub-query 775 776 if (i < gapEnd) 777 i++; 778 else 779 break; 780 } 781 782 // Process the child 783 if (childIndex < n.children.length) 784 complexity += scan(n.children[childIndex]); 785 } 786 787 void breakNode(Node* n) 788 { 789 if (n.breaksApplied) 790 return; 791 n.breaksApplied = true; 792 793 foreach (i, _; n.softLineBreak) 794 whiteSpace[i].maximize(WhiteSpace.newLine); 795 796 foreach (child; n.children) 797 if (child.breakWithParent) 798 breakNode(child); 799 } 800 801 if (complexity >= maxLineComplexity) 802 { 803 // Do a second pass, converting soft line breaks to hard 804 breakNode(n); 805 } 806 807 // Propagate breaks to siblings, if requested 808 if (n.children.any!(c => c.breaksApplied)) 809 n.children.filter!(c => c.breakWithSibling).each!breakNode; 810 811 return complexity; 812 } 813 scan(&root); 814 } 815 816 // Decide if we want to apply indentation on a per-node basis. 817 { 818 size_t i = root.start; 819 820 bool scan(Node* n) 821 in (i == n.start) 822 out(; i == n.end) 823 { 824 bool hasLineBreaks; 825 826 foreach (childIndex; 0 .. n.children.length + 1) 827 { 828 // Process the gap before this child 829 auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end; 830 while (i < gapEnd) 831 { 832 if (i > n.start) 833 hasLineBreaks |= whiteSpace[i] >= WhiteSpace.newLine; 834 i++; 835 } 836 837 // Process the child 838 if (childIndex < n.children.length) 839 hasLineBreaks |= scan(n.children[childIndex]); 840 } 841 842 if (n.conditionalIndent) 843 if (!(hasLineBreaks && whiteSpace[n.start] >= WhiteSpace.newLine)) 844 { 845 // We decided to not indent this node, clear its indentation information 846 n.indent = 0; 847 n.tokenIndent = null; 848 } 849 850 return hasLineBreaks; 851 } 852 scan(&root); 853 } 854 855 // Convert nested hierarchy into flattened indentation. 856 auto indent = new sizediff_t[tokens.length + 1]; 857 { 858 int currentIndent; 859 size_t i = root.start; 860 861 void scan(Node* n) 862 in (i == n.start) 863 out(; i == n.end) 864 { 865 foreach (childIndex; 0 .. n.children.length + 1) 866 { 867 // Process the gap before this child 868 // auto gapStart = i; 869 auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end; 870 while (i < gapEnd) 871 indent[i++] = currentIndent + n.indent; 872 873 // Process the child 874 if (childIndex < n.children.length) 875 { 876 auto child = n.children[childIndex]; 877 auto childIndent = n.indent; 878 if (child.parentIndentOverride != typeof(*child).init.parentIndentOverride) 879 childIndent = child.parentIndentOverride; 880 881 currentIndent += childIndent; 882 scope(success) currentIndent -= childIndent; 883 884 scan(child); 885 } 886 } 887 888 // Apply per-token indents at this level 889 foreach (tokenIndex, tokenIndent; n.tokenIndent) 890 indent[tokenIndex] += tokenIndent - n.indent; 891 } 892 scan(&root); 893 894 assert(currentIndent == 0); 895 assert(i == root.end); 896 } 897 898 // Style tweak: remove space on the inside of ( and ) 899 foreach (i; 0 .. tokens.length) 900 { 901 if (tokens[i].among(Token(TokenOperator("(")), Token(TokenOperator("["))) && whiteSpace[i + 1] == WhiteSpace.space) 902 whiteSpace[i + 1] = WhiteSpace.none; 903 if (tokens[i].among(Token(TokenOperator(")")), Token(TokenOperator("]"))) && whiteSpace[i] == WhiteSpace.space) 904 whiteSpace[i] = WhiteSpace.none; 905 } 906 907 // Make another pass to avoid excessively deep comment indentation 908 foreach_reverse (i; 0 .. tokens.length) 909 tokens[i].match!( 910 (ref const TokenComment t) { indent[i].minimize(indent[i + 1]); }, 911 (ref const _) {} 912 ); 913 914 // Final pass: materialize WhiteSpace into TokenWhiteSpace 915 Token[] result; 916 foreach (i; 0 .. tokens.length) 917 { 918 if (i) 919 final switch (whiteSpace[i]) 920 { 921 case WhiteSpace.none: 922 break; 923 case WhiteSpace.space: 924 result ~= Token(TokenWhiteSpace(" ")); 925 break; 926 case WhiteSpace.blankLine: 927 result ~= Token(TokenWhiteSpace("\n")); 928 goto case; 929 case WhiteSpace.newLine: 930 result ~= Token(TokenWhiteSpace("\n" ~ " ".replicate(indent[i].max(0)))); 931 break; 932 } 933 else 934 result ~= Token(TokenWhiteSpace(" ".replicate(indent[i].max(0)))); // Pedantic - should always be 0 935 result ~= tokens[i]; 936 } 937 938 if (tokens.length) 939 result ~= Token(TokenWhiteSpace("\n")); 940 941 return result; 942 }