Improve shortcode indentation handling - hugo - Unnamed repository; edit this file 'description' to name the repository.

commit d2cfaede5be420c7d8b701d97b98bc61b87e46d5
parent 322d19a81fedbf423a047bdf286499d2e25d14be
Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Date:   Sat, 28 May 2022 13:18:50 +0200

Improve shortcode indentation handling

* Record the leading whitespace (tabs, spaces) before the shortcode when parsing the page.
* Apply that indentation to the rendered result of shortcodes without inner content (where the user will apply indentation).

Fixes #9946

Diffstat:
M common/text/transform.go  | 14 ++++++++++++++
M common/text/transform_test.go  | 18 ++++++++++++++++++
M hugolib/shortcode.go  | 27 +++++++++++++++++++++++++++
M hugolib/shortcode_test.go  | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M parser/pageparser/item.go  | 12 +++++++++++-
M parser/pageparser/itemtype_string.go  | 31 +++++++++++++++++++++++++++++--
M parser/pageparser/pagelexer.go  | 29 ++++++++++++++++++++++++++++-
M parser/pageparser/pageparser.go  | 5 +++++
M parser/pageparser/pageparser_shortcode_test.go  | 3 +++

9 files changed, 208 insertions(+), 4 deletions(-)
diff --git a/common/text/transform.go b/common/text/transform.go
@@ -61,3 +61,17 @@ func Puts(s string) string {
 	}
 	return s + "\n"
 }
+
+// VisitLinesAfter calls the given function for each line, including newlines, in the given string.
+func VisitLinesAfter(s string, fn func(line string)) {
+	high := strings.Index(s, "\n")
+	for high != -1 {
+		fn(s[:high+1])
+		s = s[high+1:]
+		high = strings.Index(s, "\n")
+	}
+
+	if s != "" {
+		fn(s)
+	}
+}
diff --git a/common/text/transform_test.go b/common/text/transform_test.go
@@ -41,3 +41,21 @@ func TestPuts(t *testing.T) {
 	c.Assert(Puts("\nA\n"), qt.Equals, "\nA\n")
 	c.Assert(Puts(""), qt.Equals, "")
 }
+
+func TestVisitLinesAfter(t *testing.T) {
+	const lines = `line 1
+line 2
+
+line 3`
+
+	var collected []string
+
+	VisitLinesAfter(lines, func(s string) {
+		collected = append(collected, s)
+	})
+
+	c := qt.New(t)
+
+	c.Assert(collected, qt.DeepEquals, []string{"line 1\n", "line 2\n", "\n", "line 3"})
+
+}
diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go
@@ -170,6 +170,8 @@ type shortcode struct {
 	ordinal   int
 	err       error
 
+	indentation string // indentation from source.
+
 	info   tpl.Info       // One of the output formats (arbitrary)
 	templs []tpl.Template // All output formats
 
@@ -398,6 +400,22 @@ func renderShortcode(
 		return "", false, fe
 	}
 
+	if len(sc.inner) == 0 && len(sc.indentation) > 0 {
+		b := bp.GetBuffer()
+		i := 0
+		text.VisitLinesAfter(result, func(line string) {
+			// The first line is correctly indented.
+			if i > 0 {
+				b.WriteString(sc.indentation)
+			}
+			i++
+			b.WriteString(line)
+		})
+
+		result = b.String()
+		bp.PutBuffer(b)
+	}
+
 	return result, hasVariants, err
 }
 
@@ -447,6 +465,15 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
 	}
 	sc := &shortcode{ordinal: ordinal}
 
+	// Back up one to identify any indentation.
+	if pt.Pos() > 0 {
+		pt.Backup()
+		item := pt.Next()
+		if item.IsIndentation() {
+			sc.indentation = string(item.Val)
+		}
+	}
+
 	cnt := 0
 	nestedOrdinal := 0
 	nextLevel := level + 1
diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go
@@ -942,3 +942,76 @@ title: "p1"
 	`)
 
 }
+
+func TestShortcodePreserveIndentation(t *testing.T) {
+	t.Parallel()
+
+	files := `
+-- config.toml --
+-- content/p1.md --
+---
+title: "p1"
+---
+
+## List With Indented Shortcodes
+
+1. List 1
+    {{% mark1 %}}
+	1. Item Mark1 1
+	1. Item Mark1 2
+	{{% mark2 %}}
+	{{% /mark1 %}}
+-- layouts/shortcodes/mark1.md --
+{{ .Inner }}
+-- layouts/shortcodes/mark2.md --
+1. Item Mark2 1
+1. Item Mark2 2
+   1. Item Mark2 2-1
+1. Item Mark2 3
+-- layouts/_default/single.html --
+{{ .Content }}
+`
+
+	b := NewIntegrationTestBuilder(
+		IntegrationTestConfig{
+			T:           t,
+			TxtarString: files,
+			Running:     true,
+		},
+	).Build()
+
+	b.AssertFileContent("public/p1/index.html", "<ol>\n<li>\n<p>List 1</p>\n<ol>\n<li>Item Mark1 1</li>\n<li>Item Mark1 2</li>\n<li>Item Mark2 1</li>\n<li>Item Mark2 2\n<ol>\n<li>Item Mark2 2-1</li>\n</ol>\n</li>\n<li>Item Mark2 3</li>\n</ol>\n</li>\n</ol>")
+
+}
+
+func TestShortcodeCodeblockIndent(t *testing.T) {
+	t.Parallel()
+
+	files := `
+-- config.toml --
+-- content/p1.md --
+---
+title: "p1"
+---
+
+## Code block
+
+    {{% code %}}
+
+-- layouts/shortcodes/code.md --
+echo "foo";
+-- layouts/_default/single.html --
+{{ .Content }}
+`
+
+	b := NewIntegrationTestBuilder(
+		IntegrationTestConfig{
+			T:           t,
+			TxtarString: files,
+			Running:     true,
+		},
+	).Build()
+
+	b.AssertFileContent("public/p1/index.html", "<pre><code>echo &quot;foo&quot;;\n</code></pre>")
+
+}
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
@@ -18,6 +18,8 @@ import (
 	"fmt"
 	"regexp"
 	"strconv"
+
+	"github.com/yuin/goldmark/util"
 )
 
 type Item struct {
@@ -64,7 +66,11 @@ func (i Item) ValTyped() any {
 }
 
 func (i Item) IsText() bool {
-	return i.Type == tText
+	return i.Type == tText || i.Type == tIndentation
+}
+
+func (i Item) IsIndentation() bool {
+	return i.Type == tIndentation
 }
 
 func (i Item) IsNonWhitespace() bool {
@@ -125,6 +131,8 @@ func (i Item) String() string {
 		return "EOF"
 	case i.Type == tError:
 		return string(i.Val)
+	case i.Type == tIndentation:
+		return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
 	case i.Type > tKeywordMarker:
 		return fmt.Sprintf("<%s>", i.Val)
 	case len(i.Val) > 50:
@@ -159,6 +167,8 @@ const (
 	tScParam
 	tScParamVal
 
+	tIndentation
+
 	tText // plain text
 
 	// preserved for later - keywords come after this
diff --git a/parser/pageparser/itemtype_string.go b/parser/pageparser/itemtype_string.go
@@ -4,9 +4,36 @@ package pageparser
 
 import "strconv"
 
-const _ItemType_name = "tErrortEOFTypeHTMLStartTypeLeadSummaryDividerTypeFrontMatterYAMLTypeFrontMatterTOMLTypeFrontMatterJSONTypeFrontMatterORGTypeEmojiTypeIgnoretLeftDelimScNoMarkuptRightDelimScNoMarkuptLeftDelimScWithMarkuptRightDelimScWithMarkuptScClosetScNametScNameInlinetScParamtScParamValtTexttKeywordMarker"
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[tError-0]
+	_ = x[tEOF-1]
+	_ = x[TypeLeadSummaryDivider-2]
+	_ = x[TypeFrontMatterYAML-3]
+	_ = x[TypeFrontMatterTOML-4]
+	_ = x[TypeFrontMatterJSON-5]
+	_ = x[TypeFrontMatterORG-6]
+	_ = x[TypeEmoji-7]
+	_ = x[TypeIgnore-8]
+	_ = x[tLeftDelimScNoMarkup-9]
+	_ = x[tRightDelimScNoMarkup-10]
+	_ = x[tLeftDelimScWithMarkup-11]
+	_ = x[tRightDelimScWithMarkup-12]
+	_ = x[tScClose-13]
+	_ = x[tScName-14]
+	_ = x[tScNameInline-15]
+	_ = x[tScParam-16]
+	_ = x[tScParamVal-17]
+	_ = x[tIndentation-18]
+	_ = x[tText-19]
+	_ = x[tKeywordMarker-20]
+}
+
+const _ItemType_name = "tErrortEOFTypeLeadSummaryDividerTypeFrontMatterYAMLTypeFrontMatterTOMLTypeFrontMatterJSONTypeFrontMatterORGTypeEmojiTypeIgnoretLeftDelimScNoMarkuptRightDelimScNoMarkuptLeftDelimScWithMarkuptRightDelimScWithMarkuptScClosetScNametScNameInlinetScParamtScParamValtIndentationtTexttKeywordMarker"
 
-var _ItemType_index = [...]uint16{0, 6, 10, 23, 45, 64, 83, 102, 120, 129, 139, 159, 180, 202, 225, 233, 240, 253, 261, 272, 277, 291}
+var _ItemType_index = [...]uint16{0, 6, 10, 32, 51, 70, 89, 107, 116, 126, 146, 167, 189, 212, 220, 227, 240, 248, 259, 271, 276, 290}
 
 func (i ItemType) String() string {
 	if i < 0 || i >= ItemType(len(_ItemType_index)-1) {
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
@@ -120,6 +120,7 @@ func (l *pageLexer) next() rune {
 	runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
 	l.width = runeWidth
 	l.pos += l.width
+
 	return runeValue
 }
 
@@ -137,8 +138,34 @@ func (l *pageLexer) backup() {
 
 // sends an item back to the client.
 func (l *pageLexer) emit(t ItemType) {
+	defer func() {
+		l.start = l.pos
+	}()
+
+	if t == tText {
+		// Identify any trailing whitespace/intendation.
+		// We currently only care about the last one.
+		for i := l.pos - 1; i >= l.start; i-- {
+			b := l.input[i]
+			if b != ' ' && b != '\t' && b != '\r' && b != '\n' {
+				break
+			}
+			if i == l.start && b != '\n' {
+				l.items = append(l.items, Item{tIndentation, l.start, l.input[l.start:l.pos], false})
+				return
+			} else if b == '\n' && i < l.pos-1 {
+				l.items = append(l.items, Item{t, l.start, l.input[l.start : i+1], false})
+				l.items = append(l.items, Item{tIndentation, i + 1, l.input[i+1 : l.pos], false})
+				return
+			} else if b == '\n' && i == l.pos-1 {
+				break
+			}
+
+		}
+	}
+
 	l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], false})
-	l.start = l.pos
+
 }
 
 // sends a string item back to the client.
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
@@ -149,6 +149,11 @@ func (t *Iterator) Backup() {
 	t.lastPos--
 }
 
+// Pos returns the current position in the input.
+func (t *Iterator) Pos() int {
+	return t.lastPos
+}
+
 // check for non-error and non-EOF types coming next
 func (t *Iterator) IsValueNext() bool {
 	i := t.Peek()
diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go
@@ -51,6 +51,9 @@ var shortCodeLexerTests = []lexerTest{
 
 	{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
 	{"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+	{"indented on new line", "Hello\n    {{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"indented on new line tab", "Hello\n\t{{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"indented on first line", "    {{% sc1 %}}", []Item{nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
 	{"mismatched rightDelim", `{{< sc1 %}}`, []Item{
 		tstLeftNoMD, tstSC1,
 		nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"),

M	common/text/transform.go	\|	14	++++++++++++++
M	common/text/transform_test.go	\|	18	++++++++++++++++++
M	hugolib/shortcode.go	\|	27	+++++++++++++++++++++++++++
M	hugolib/shortcode_test.go	\|	73	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	parser/pageparser/item.go	\|	12	+++++++++++-
M	parser/pageparser/itemtype_string.go	\|	31	+++++++++++++++++++++++++++++--
M	parser/pageparser/pagelexer.go	\|	29	++++++++++++++++++++++++++++-
M	parser/pageparser/pageparser.go	\|	5	+++++
M	parser/pageparser/pageparser_shortcode_test.go	\|	3	+++