1 module squelch.format;
2 
3 import std.algorithm.comparison : among, max;
4 import std.algorithm.iteration : map, filter, each;
5 import std.algorithm.searching;
6 import std.array : replicate, split;
7 import std.exception;
8 import std.range : retro;
9 import std.string : strip;
10 import std.sumtype : match;
11 
12 static import std.string;
13 
14 import ae.utils.array : elementIndex;
15 import ae.utils.math : maximize, minimize;
16 
17 import squelch.common;
18 
19 // Break lines in expressions with more than this many typical characters.
20 enum maxLineComplexity = 80;
21 
22 // Indentation depth (number of spaces) to use for most constructs.
23 enum indentationWidth = 2;
24 
25 Token[] format(const scope Token[] tokens)
26 {
27 	// Lexical priorities
28 	enum Level
29 	{
30 		// https://cloud.google.com/bigquery/docs/reference/standard-sql/operators
31 		parensOuter, /// function call, subscript
32 		// unary,
33 		// Binary operators - highest priority:
34 		multiplication,
35 		addition,
36 		shift,
37 		bitwiseAnd,
38 		bitwiseXor,
39 		bitwiseOr,
40 		comparison,
41 		// not,
42 		and,
43 		or,
44 
45 		then,
46 		when,
47 		case_,
48 
49 		as,
50 
51 		// Technically not a binary operator, but acts like a low-priority one
52 		comma,
53 
54 		// Things like SELECT go here
55 		with_,
56 		on,
57 		join,
58 		select,
59 		union_,
60 		statement = union_,
61 
62 		// A closing paren unambiguously terminates all other lexical constructs
63 		parensInner,
64 
65 		// Dbt macros form their own independent hierarchy
66 		dbt,
67 
68 		// Lowest priority - terminates all constructs
69 		file,
70 	}
71 
72 	/// Tree node.
73 	static struct Node
74 	{
75 		Level level; /// Nesting / priority level of this node's expression
76 		string type; /// A string identifying the specific operation
77 
78 		Node*[] children; /// If null, then this is a leaf
79 
80 		/// Covered tokens.
81 		size_t start, end;
82 
83 		/// Default indentation level.
84 		sizediff_t indent = indentationWidth;
85 
86 		/// Don't indent unless this node contains line breaks
87 		bool conditionalIndent;
88 
89 		/// If true, and the parent has its soft line breaks applied, do so here too
90 		bool breakWithParent;
91 
92 		/// If true, and any sibling has its soft line breaks applied, do so here too
93 		bool breakWithSibling;
94 
95 		/// Indentation level overrides for specific tokens
96 		/// (mainly for tokens are part of this node's syntax,
97 		/// and should not be indented).
98 		sizediff_t[size_t] tokenIndent;
99 
100 		/// If set, overrides the parent's .indent for this node.
101 		int parentIndentOverride = int.min;
102 
103 		/// If true, the corresponding whiteSpace may be changed to newLine
104 		bool[size_t] softLineBreak;
105 
106 		/// State flag for newline application.
107 		bool breaksApplied;
108 	}
109 	Node root = {
110 		level : Level.file,
111 		type : "file",
112 		indent : false,
113 	};
114 
115 	enum WhiteSpace
116 	{
117 		none,
118 		space,
119 		newLine,
120 		blankLine,
121 	}
122 	// whiteSpace[i] is what whitespace we should add before tokens[i]
123 	auto whiteSpace = new WhiteSpace[tokens.length + 1];
124 
125 	// First pass
126 	{
127 		Node*[] stack = [&root];
128 		bool wasWord;
129 
130 		foreach (ref token; tokens)
131 		{
132 			WhiteSpace wsPre, wsPost;
133 			bool isWord;
134 
135 			size_t tokenIndex = tokens.elementIndex(token);
136 
137 			Node* stackPush_(Level level, string type)
138 			{
139 				auto n = new Node;
140 				n.level = level;
141 				n.type = type;
142 				n.start = tokenIndex;
143 
144 				stack[$-1].children ~= n;
145 				stack ~= n;
146 				return n;
147 			}
148 
149 			Node* stackPop_(bool afterCurrent)
150 			{
151 				auto n = stack[$-1];
152 				n.end = tokenIndex + afterCurrent;
153 				stack = stack[0 .. $-1];
154 				return n;
155 			}
156 
157 			Node* stackPopTo(Level level)
158 			{
159 				while (stack[$-1].level < level)
160 					stackPop_(false);
161 				return stack[$-1].level == level ? stack[$-1] : null;
162 			}
163 
164 			Node* stackEnter(Level level, string type, bool glueBackwards = false)
165 			{
166 				stackPopTo(level);
167 
168 				if (stack[$-1].level == level)
169 				{
170 					if (glueBackwards)
171 					{
172 						stack[$-1].type = type;
173 						return stack[$-1];
174 					}
175 					else
176 						stackPop_(false);
177 				}
178 
179 				return stackPush_(level, type);
180 			}
181 
182 
183 			Node* stackExit(Level level, string expected)
184 			{
185 				stackPopTo(level);
186 				enforce(stack[$-1].level == level, std..string.format!
187 					"Found end of %s while looking for end of %s"(
188 					stack[$-1].type, expected));
189 				return stackPop_(true);
190 			}
191 
192 			// Like stackEnter, but adopts previous nodes that have a higher priority.
193 			// Suitable for binary operators.
194 			Node* stackInsert(Level level, string type)
195 			{
196 				stackPopTo(level);
197 
198 				if (stack[$-1].level == level)
199 				{
200 					// Implicitly glue backwards
201 					stack[$-1].type = type;
202 				}
203 				else
204 				{
205 					auto p = stack[$-1];
206 					auto start = tokenIndex;
207 					auto childrenStart = p.children.length;
208 
209 					// Extend the start backwards to adopt non-syntax tokens
210 					// and adopt higher-priority nodes
211 					while (start > p.start)
212 					{
213 						if (childrenStart && p.children[childrenStart - 1].end == start)
214 						{
215 							// Decide if we want to adopt this child
216 							if (p.children[childrenStart - 1].level < level)
217 							{
218 								childrenStart--;
219 								start = p.children[childrenStart].start;
220 							}
221 							else
222 								break;
223 						}
224 						else
225 						{
226 							// Extend the start backwards to adopt non-syntax tokens
227 							if ((start - 1) !in p.tokenIndent && (start) !in p.softLineBreak)
228 								start--;
229 							else
230 								break;
231 						}
232 					}
233 
234 					// Move the previous hierarchy into the new inserted level
235 					Node*[] children = p.children[childrenStart .. $];
236 					p.children = p.children[0 .. childrenStart];
237 
238 					auto n = stackPush_(level, type);
239 					n.children = children;
240 					n.start = start;
241 				}
242 				return stack[$-1];
243 			}
244 
245 			// Common routine for creating nodes for binary operators
246 			Node* stackInsertBinary(Level level, string type)
247 			{
248 				wsPre = wsPost = WhiteSpace.space;
249 				auto n = stackInsert(level, type);
250 				n.indent.maximize(type.length + 1);
251 				n.tokenIndent[tokenIndex] = 0;
252 				n.conditionalIndent = true;
253 				n.softLineBreak[tokenIndex] = n.softLineBreak[n.start] = true;
254 				return n;
255 			}
256 
257 			token.match!(
258 				(ref const TokenWhiteSpace t)
259 				{
260 					assert(false);
261 				},
262 				(ref const TokenComment t)
263 				{
264 					wsPre = wsPost = WhiteSpace.newLine;
265 				},
266 				(ref const TokenKeyword t)
267 				{
268 					isWord = true;
269 
270 					switch (t.kind)
271 					{
272 						case "AS":
273 							wsPre = wsPost = WhiteSpace.space;
274 							if (tokenIndex + 1 < tokens.length
275 								&& tokens[tokenIndex + 1].match!(
276 									(ref const TokenIdentifier t) => true,
277 									(ref const _) => false
278 								)
279 								&& stack.retro
280 								.find!(n => n.level >= Level.select)
281 								.front
282 								.level == Level.select
283 								&& tokenIndex > 0
284 								&& !tokens[tokenIndex - 1].match!(
285 									(ref const TokenKeyword t) => t.kind == "WITH OFFSET",
286 									(ref const _) => false
287 								)
288 							)
289 								stackInsertBinary(Level.as, t.kind);
290 							break;
291 						case "NOT":
292 						case "IS":
293 						case "OVER":
294 						case "RETURNS":
295 						case "IGNORE":
296 						case "USING":
297 							wsPre = wsPost = WhiteSpace.space;
298 							break;
299 						case "BETWEEN":
300 							auto n = stackInsertBinary(Level.comparison, t.kind);
301 							n.softLineBreak.remove(tokenIndex);
302 							n.softLineBreak.remove(n.start);
303 							break;
304 						case "LIKE":
305 						case "IN":
306 							stackInsertBinary(Level.comparison, t.kind);
307 							break;
308 						case "AND":
309 							wsPre = wsPost = WhiteSpace.space;
310 							stackPopTo(Level.comparison);
311 							if (stack[$-1].type == "BETWEEN")
312 							{
313 								auto n = stackInsert(Level.comparison, t.kind);
314 								n.indent = 0;
315 								n.tokenIndent[tokenIndex] = 0;
316 								break;
317 							}
318 							stackInsertBinary(Level.and, t.kind);
319 							return;
320 						case "OR":
321 							stackInsertBinary(Level.or, t.kind);
322 							break;
323 						case "SELECT":
324 							wsPre = wsPost = WhiteSpace.newLine;
325 							auto n = stackEnter(Level.select, t.text);
326 							n.tokenIndent[tokenIndex] = 0;
327 							break;
328 						case "FROM":
329 							if (stack[$-1].type == "EXTRACT(")
330 								return;
331 							goto case "WHERE";
332 						case "BY":
333 							if (stack
334 								.retro
335 								.find!(n => n.level >= Level.parensInner)
336 								.front
337 								.type
338 								.among("OVER(", "AS(", "`ARRAY_AGG`("))
339 							{
340 								wsPre = wsPost = WhiteSpace.space;
341 								auto n = stackEnter(Level.select, "BY-inline", true);
342 								n.tokenIndent[tokenIndex] = 0;
343 								n.softLineBreak[tokenIndex] = n.softLineBreak[tokenIndex + 1] = true;
344 								return;
345 							}
346 							goto case "WHERE";
347 						case "WHERE":
348 						case "HAVING":
349 						case "QUALIFY":
350 						case "WINDOW":
351 							wsPre = wsPost = WhiteSpace.newLine;
352 							auto n = stackEnter(Level.select, t.text);
353 							n.tokenIndent[tokenIndex] = 0;
354 							break;
355 						case "JOIN":
356 							wsPre = WhiteSpace.newLine;
357 							wsPost = WhiteSpace.space;
358 							stackPopTo(Level.join);
359 							auto n = stackEnter(Level.join, t.kind);
360 							n.tokenIndent[tokenIndex] = 0;
361 							break;
362 						case "ON":
363 							wsPre = wsPost = WhiteSpace.newLine;
364 							auto n = stackEnter(Level.on, t.kind);
365 							n.tokenIndent[tokenIndex] = 0;
366 							break;
367 						case "ROWS":
368 							wsPre = WhiteSpace.newLine;
369 							wsPost = WhiteSpace.space;
370 							auto n = stackEnter(Level.select, t.kind, true);
371 							n.tokenIndent[tokenIndex] = 0;
372 							n.softLineBreak[tokenIndex] = true;
373 							break;
374 						case "EXCEPT":
375 							if (tokenIndex && tokens[tokenIndex - 1] == Token(TokenOperator("*")))
376 								return;
377 							goto case;
378 						case "UNION":
379 						case "INTERSECT":
380 							wsPre = wsPost = WhiteSpace.newLine;
381 							auto n = stackEnter(Level.union_, t.text);
382 							n.indent = 0;
383 							n.tokenIndent[tokenIndex] = -indentationWidth;
384 							break;
385 						case "WITH":
386 							wsPre = wsPost = WhiteSpace.newLine;
387 							auto n = stackEnter(Level.select, t.text);
388 							n.tokenIndent[tokenIndex] = 0;
389 							break;
390 						case "CREATE":
391 							wsPre = WhiteSpace.newLine;
392 							auto n = stackEnter(Level.select, t.text);
393 							n.indent = 0;
394 							n.tokenIndent[tokenIndex] = 0;
395 							break;
396 						case "CASE":
397 							wsPre = wsPost = WhiteSpace.space;
398 							auto n = stackEnter(Level.case_, t.text);
399 							n.softLineBreak[tokenIndex] = n.softLineBreak[tokenIndex + 1] = true;
400 							n.tokenIndent[tokenIndex] = 0;
401 							break;
402 						case "WHEN":
403 						case "ELSE":
404 							wsPre = wsPost = WhiteSpace.space;
405 							auto p = stackPopTo(Level.case_);
406 							if (p)
407 								p.softLineBreak[tokenIndex] = true;
408 							auto n = stackEnter(Level.when, t.text);
409 							n.softLineBreak[tokenIndex] = true;
410 							n.indent = 2 * indentationWidth;
411 							n.tokenIndent[tokenIndex] = 0;
412 							n.breakWithSibling = true;
413 							break;
414 						case "THEN":
415 							wsPre = wsPost = WhiteSpace.space;
416 							auto n = stackEnter(Level.when, t.text, true);
417 							n.indent = 2 * indentationWidth;
418 							n.softLineBreak[tokenIndex] = true;
419 							n.tokenIndent[tokenIndex] = indentationWidth;
420 							break;
421 						case "END":
422 							wsPre = WhiteSpace.space;
423 							auto n = stackExit(Level.case_, "CASE ... END");
424 							n.softLineBreak[tokenIndex] = true;
425 							n.tokenIndent[tokenIndex] = 0;
426 							break;
427 						default:
428 							break;
429 					}
430 				},
431 				(ref const TokenIdentifier t)
432 				{
433 					// If this is a Dbt call which we know doesn't produce any output,
434 					// format it like a statement.
435 					if (t.text.length == 1)
436 						t.text[0].match!(
437 							(ref const DbtExpression e)
438 							{
439 								string s = e.expr;
440 								s.skipOver("-");
441 								if (s.strip.startsWith("config("))
442 									wsPre = wsPost = WhiteSpace.blankLine;
443 							},
444 							(ref const _) {}
445 						);
446 
447 					isWord = true;
448 				},
449 				(ref const TokenNamedParameter t)
450 				{
451 					isWord = true;
452 				},
453 				(ref const TokenOperator t)
454 				{
455 					switch (t.text)
456 					{
457 						case ".":
458 							break;
459 						case "(":
460 							string context = "(";
461 							if (tokenIndex)
462 								tokens[tokenIndex - 1].match!(
463 									(ref const TokenIdentifier t) { context = '`' ~ t.text.tryToString() ~ "`("; },
464 									(ref const TokenKeyword t) { context = t.kind ~ "("; },
465 									(ref const _) {},
466 								);
467 							auto n = stackInsert(Level.parensOuter, context);
468 							n.indent = n.tokenIndent[tokenIndex] = 0;
469 							if (context.among("JOIN(", "USING(") && stack[$-2].type == "JOIN")
470 								n.parentIndentOverride = 0;
471 							if (context == "IN(")
472 								n.parentIndentOverride = 0;
473 							n = stackPush_(Level.parensInner, context);
474 							n.tokenIndent[tokenIndex] = 0;
475 							n.softLineBreak[tokenIndex + 1] = true;
476 							break;
477 						case ")":
478 							auto n = stackExit(Level.parensInner, "( ... )");
479 							enforce(n.type.endsWith("("), "Found end of " ~ n.type ~ " while looking for end of ( ... )");
480 							n.tokenIndent[tokenIndex] = 0;
481 							n.softLineBreak[tokenIndex] = true;
482 							stackExit(Level.parensOuter, "( ... )");
483 
484 							if (tokenIndex + 1 < tokens.length)
485 								tokens[tokenIndex + 1].match!(
486 									(ref const TokenOperator t) {},
487 									(ref const TokenNumber t) {},
488 									(ref const _) { wsPost = WhiteSpace.space; }
489 								);
490 
491 							break;
492 						case "[":
493 							auto n = stackInsert(Level.parensOuter, t.text);
494 							n.indent = n.tokenIndent[tokenIndex] = 0;
495 							n = stackPush_(Level.parensInner, t.text);
496 							n.tokenIndent[tokenIndex] = 0;
497 							n.softLineBreak[tokenIndex + 1] = true;
498 							break;
499 						case "]":
500 							auto n = stackExit(Level.parensInner, "[ ... ]");
501 							enforce(n.type == "[", "Found end of " ~ n.type ~ " while looking for end of [ ... ]");
502 							n.tokenIndent[tokenIndex] = 0;
503 							n.softLineBreak[tokenIndex] = true;
504 							stackExit(Level.parensOuter, "[ ... ]");
505 							break;
506 						case ",":
507 							auto n = stackInsert(Level.comma, t.text);
508 							n.indent = 0;
509 							n.tokenIndent[tokenIndex] = 0;
510 							n.breakWithParent = true;
511 
512 							if (stack[$-2].type == "<")
513 								wsPost = WhiteSpace.space;
514 							else
515 							if (stack[$-2].type == "WITH")
516 								wsPost = WhiteSpace.blankLine;
517 							else
518 							if (stack[$-2].level == Level.select && stack[$-2].type != "BY-inline")
519 								wsPost = WhiteSpace.newLine;
520 							else
521 							{
522 								wsPost = WhiteSpace.space;
523 								n.softLineBreak[tokenIndex + 1] = true;
524 							}
525 							break;
526 						case ";":
527 							wsPost = WhiteSpace.blankLine;
528 							auto n = stackInsert(Level.statement, t.text);
529 							n.indent = 0;
530 							n.tokenIndent[tokenIndex] = 0;
531 							break;
532 						case "*":
533 							if (tokenIndex && tokens[tokenIndex - 1].match!(
534 									(ref const TokenOperator t) => t.text == ".",
535 									(ref const _) => false
536 								))
537 							{
538 								wsPost = WhiteSpace.space;
539 								break;
540 							}
541 							if (tokenIndex + 1 < tokens.length && tokens[tokenIndex + 1].match!(
542 									(ref const TokenOperator t) => t.text == ",",
543 									(ref const _) => false
544 								))
545 							{
546 								wsPre = WhiteSpace.space;
547 								break;
548 							}
549 							goto case "/";
550 
551 						// Binary operators and others
552 						case "=":
553 						case "<":
554 						case ">":
555 						case "<=":
556 						case ">=":
557 						case "!=":
558 						case "<>":
559 							stackInsertBinary(Level.comparison, t.text);
560 							break;
561 						case "|":
562 							stackInsertBinary(Level.bitwiseOr, t.text);
563 							break;
564 						case "^":
565 							stackInsertBinary(Level.bitwiseXor, t.text);
566 							break;
567 						case "&":
568 							stackInsertBinary(Level.bitwiseAnd, t.text);
569 							break;
570 						case "<<":
571 						case ">>":
572 							stackInsertBinary(Level.shift, t.text);
573 							break;
574 						case "-":
575 						case "+":
576 							stackInsertBinary(Level.addition, t.text);
577 							break;
578 						case "/":
579 						case "||":
580 							stackInsertBinary(Level.multiplication, t.text);
581 							break;
582 						default:
583 							wsPre = wsPost = WhiteSpace.space;
584 							break;
585 					}
586 				},
587 				(ref const TokenAngleBracket t)
588 				{
589 					final switch (t.text)
590 					{
591 						case "<":
592 							auto n = stackInsert(Level.parensOuter, t.text);
593 							n.indent = n.tokenIndent[tokenIndex] = 0;
594 							n = stackPush_(Level.parensInner, t.text);
595 							n.tokenIndent[tokenIndex] = 0;
596 							break;
597 						case ">":
598 							auto n = stackExit(Level.parensInner, "< ... >");
599 							enforce(n.type == "<", "Found end of " ~ n.type ~ " while looking for end of < ... >");
600 							n.tokenIndent[tokenIndex] = 0;
601 							stackExit(Level.parensOuter, "< ... >");
602 							break;
603 					}
604 				},
605 				(ref const TokenString t)
606 				{
607 					isWord = true;
608 				},
609 				(ref const TokenNumber t)
610 				{
611 					isWord = true;
612 				},
613 				(ref const TokenDbtStatement t)
614 				{
615 					wsPre = wsPost = WhiteSpace.newLine;
616 					switch (t.kind)
617 					{
618 						case "for":
619 						case "if":
620 						case "macro":
621 						case "filter":
622 							auto n = stackPush_(Level.dbt, "%" ~ t.kind);
623 							n.tokenIndent[tokenIndex] = 0;
624 							break;
625 						case "set":
626 							if (!t.text.canFind('='))
627 								goto case "for";
628 							break;
629 						case "elif":
630 						case "else":
631 							stackPopTo(Level.dbt);
632 							auto n = stack[$ - 1];
633 							enforce(n.type == "%if" || n.type == "%for",
634 								"Found " ~ t.kind ~ " but expected end" ~ n.type[1..$]
635 							);
636 							n.tokenIndent[tokenIndex] = 0;
637 							break;
638 						case "endfor":
639 						case "endif":
640 							auto n = stackExit(Level.dbt, "%" ~ t.kind[3 .. $]);
641 							enforce(n.type.endsWith("%" ~ t.kind[3 .. $]),
642 								"Found " ~ t.kind ~ " but expected end" ~ n.type[1..$]
643 							);
644 							n.tokenIndent[tokenIndex] = 0;
645 							break;
646 						case "endmacro":
647 						case "endfilter":
648 						case "endset":
649 							wsPost = WhiteSpace.blankLine;
650 							goto case "endfor";
651 						default:
652 							break;
653 					}
654 				},
655 				(ref const TokenDbtComment t)
656 				{
657 					wsPre = wsPost = WhiteSpace.newLine;
658 				},
659 			);
660 
661 			whiteSpace[tokenIndex].maximize(wsPre);
662 			if (!whiteSpace[tokenIndex] && wasWord && isWord)
663 				whiteSpace[tokenIndex] = WhiteSpace.space;
664 
665 			tokenIndex++;
666 
667 			whiteSpace[tokenIndex] = wsPost;
668 			wasWord = isWord;
669 		}
670 
671 		while (stack.length)
672 		{
673 			stack[$-1].end = tokens.length;
674 			stack = stack[0 .. $-1];
675 		}
676 	}
677 
678 	// Comments generally describe the thing below them,
679 	// so evict them out of blocks if they are on the border.
680 	{
681 		void adjust(Node* n)
682 		{
683 			alias isComment = n => n.match!(
684 				(ref const TokenComment t) => true,
685 				(ref const _) => false
686 			);
687 			while (n.start < n.end && isComment(tokens[n.start]))
688 				n.start++;
689 			while (n.end > n.start && isComment(tokens[n.end - 1]))
690 				n.end--;
691 			foreach (child; n.children)
692 				adjust(child);
693 		}
694 		adjust(&root);
695 	}
696 
697 	// AND should always have line breaks when nested directly in a WHERE.
698 	{
699 		void adjust(Node* n, Node* parent)
700 		{
701 			if (n.level == Level.and && (parent.level == Level.select || parent.level == Level.on))
702 				foreach (tokenIndex, _; n.tokenIndent)
703 					whiteSpace[tokenIndex].maximize(WhiteSpace.newLine);
704 			foreach (child; n.children)
705 				adjust(child, n);
706 		}
707 		adjust(&root, null);
708 	}
709 
710 	size_t typicalLength(size_t tokenIndex)
711 	{
712 		return tokens[tokenIndex].match!(
713 			(ref const TokenWhiteSpace t) => 0,
714 			(ref const TokenComment t) => 60,
715 			(ref const TokenKeyword t) => t.text.split(" ").length * (5 + 1),
716 			(ref const TokenIdentifier t)
717 			{
718 				import std.ascii : isAlphaNum;
719 
720 				bool wasLetter;
721 				size_t length;
722 
723 				void handle(dchar c)
724 				{
725 					bool isLetter = c >= 0x80 || isAlphaNum(cast(char)c);
726 					if (!isLetter)
727 						length++;
728 					else
729 						if (!wasLetter)
730 							length += 5;
731 					wasLetter = isLetter;
732 				}
733 
734 				foreach (e; t.text)
735 					e.match!(
736 						(dchar c) { handle(c); },
737 						(ref const DbtExpression e) { foreach (c; e.expr) handle(c); },
738 					);
739 
740 				return length;
741 			},
742 			(ref const TokenNamedParameter t) => 10,
743 			(ref const TokenOperator t) => 1 + 1,
744 			(ref const TokenAngleBracket t) => 1 + 1,
745 			(ref const TokenString t) => 10,
746 			(ref const TokenNumber t) => 3 + 1,
747 			(ref const TokenDbtStatement t) => 50,
748 			(ref const TokenDbtComment t) => 100,
749 		);
750 	}
751 
752 	// Convert soft breaks into spaces or newlines, depending on local complexity
753 	{
754 		size_t i = root.start;
755 
756 		int scan(Node* n)
757 		in (i == n.start)
758 		out(; i == n.end)
759 		{
760 			int complexity = 0;
761 
762 			foreach (childIndex; 0 .. n.children.length + 1)
763 			{
764 				// Process the gap before this child
765 				auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end;
766 				while (true)
767 				{
768 					if (i < gapEnd)
769 						complexity += typicalLength(i);
770 
771 					// A newline belongs to the inner-most node which fully contains it
772 					if (i > n.start && i < n.end)
773 						if (whiteSpace[i] >= WhiteSpace.newLine)
774 							complexity = maxLineComplexity;  // Forced by e.g. sub-query
775 
776 					if (i < gapEnd)
777 						i++;
778 					else
779 						break;
780 				}
781 
782 				// Process the child
783 				if (childIndex < n.children.length)
784 					complexity += scan(n.children[childIndex]);
785 			}
786 
787 			void breakNode(Node* n)
788 			{
789 				if (n.breaksApplied)
790 					return;
791 				n.breaksApplied = true;
792 
793 				foreach (i, _; n.softLineBreak)
794 					whiteSpace[i].maximize(WhiteSpace.newLine);
795 
796 				foreach (child; n.children)
797 					if (child.breakWithParent)
798 						breakNode(child);
799 			}
800 
801 			if (complexity >= maxLineComplexity)
802 			{
803 				// Do a second pass, converting soft line breaks to hard
804 				breakNode(n);
805 			}
806 
807 			// Propagate breaks to siblings, if requested
808 			if (n.children.any!(c => c.breaksApplied))
809 				n.children.filter!(c => c.breakWithSibling).each!breakNode;
810 
811 			return complexity;
812 		}
813 		scan(&root);
814 	}
815 
816 	// Decide if we want to apply indentation on a per-node basis.
817 	{
818 		size_t i = root.start;
819 
820 		bool scan(Node* n)
821 		in (i == n.start)
822 		out(; i == n.end)
823 		{
824 			bool hasLineBreaks;
825 
826 			foreach (childIndex; 0 .. n.children.length + 1)
827 			{
828 				// Process the gap before this child
829 				auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end;
830 				while (i < gapEnd)
831 				{
832 					if (i > n.start)
833 						hasLineBreaks |= whiteSpace[i] >= WhiteSpace.newLine;
834 					i++;
835 				}
836 
837 				// Process the child
838 				if (childIndex < n.children.length)
839 					hasLineBreaks |= scan(n.children[childIndex]);
840 			}
841 
842 			if (n.conditionalIndent)
843 				if (!(hasLineBreaks && whiteSpace[n.start] >= WhiteSpace.newLine))
844 				{
845 					// We decided to not indent this node, clear its indentation information
846 					n.indent = 0;
847 					n.tokenIndent = null;
848 				}
849 
850 			return hasLineBreaks;
851 		}
852 		scan(&root);
853 	}
854 
855 	// Convert nested hierarchy into flattened indentation.
856 	auto indent = new sizediff_t[tokens.length + 1];
857 	{
858 		int currentIndent;
859 		size_t i = root.start;
860 
861 		void scan(Node* n)
862 		in (i == n.start)
863 		out(; i == n.end)
864 		{
865 			foreach (childIndex; 0 .. n.children.length + 1)
866 			{
867 				// Process the gap before this child
868 				// auto gapStart = i;
869 				auto gapEnd = childIndex < n.children.length ? n.children[childIndex].start : n.end;
870 				while (i < gapEnd)
871 					indent[i++] = currentIndent + n.indent;
872 
873 				// Process the child
874 				if (childIndex < n.children.length)
875 				{
876 					auto child = n.children[childIndex];
877 					auto childIndent = n.indent;
878 					if (child.parentIndentOverride != typeof(*child).init.parentIndentOverride)
879 						childIndent = child.parentIndentOverride;
880 
881 					currentIndent += childIndent;
882 					scope(success) currentIndent -= childIndent;
883 
884 					scan(child);
885 				}
886 			}
887 
888 			// Apply per-token indents at this level
889 			foreach (tokenIndex, tokenIndent; n.tokenIndent)
890 				indent[tokenIndex] += tokenIndent - n.indent;
891 		}
892 		scan(&root);
893 
894 		assert(currentIndent == 0);
895 		assert(i == root.end);
896 	}
897 
898 	// Style tweak: remove space on the inside of ( and )
899 	foreach (i; 0 .. tokens.length)
900 	{
901 		if (tokens[i].among(Token(TokenOperator("(")), Token(TokenOperator("["))) && whiteSpace[i + 1] == WhiteSpace.space)
902 			whiteSpace[i + 1] = WhiteSpace.none;
903 		if (tokens[i].among(Token(TokenOperator(")")), Token(TokenOperator("]"))) && whiteSpace[i] == WhiteSpace.space)
904 			whiteSpace[i] = WhiteSpace.none;
905 	}
906 
907 	// Make another pass to avoid excessively deep comment indentation
908 	foreach_reverse (i; 0 .. tokens.length)
909 		tokens[i].match!(
910 			(ref const TokenComment t) { indent[i].minimize(indent[i + 1]); },
911 			(ref const _) {}
912 		);
913 
914 	// Final pass: materialize WhiteSpace into TokenWhiteSpace
915 	Token[] result;
916 	foreach (i; 0 .. tokens.length)
917 	{
918 		if (i)
919 			final switch (whiteSpace[i])
920 			{
921 				case WhiteSpace.none:
922 					break;
923 				case WhiteSpace.space:
924 					result ~= Token(TokenWhiteSpace(" "));
925 					break;
926 				case WhiteSpace.blankLine:
927 					result ~= Token(TokenWhiteSpace("\n"));
928 					goto case;
929 				case WhiteSpace.newLine:
930 					result ~= Token(TokenWhiteSpace("\n" ~ " ".replicate(indent[i].max(0))));
931 					break;
932 			}
933 		else
934 			result ~= Token(TokenWhiteSpace(" ".replicate(indent[i].max(0)))); // Pedantic - should always be 0
935 		result ~= tokens[i];
936 	}
937 
938 	if (tokens.length)
939 		result ~= Token(TokenWhiteSpace("\n"));
940 
941 	return result;
942 }