pagelexer_shortcode.go (9376B)
1 // Copyright 2018 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package pageparser
15
16 type lexerShortcodeState struct {
17 currLeftDelimItem ItemType
18 currRightDelimItem ItemType
19 isInline bool
20 currShortcodeName string // is only set when a shortcode is in opened state
21 closingState int // > 0 = on its way to be closed
22 elementStepNum int // step number in element
23 paramElements int // number of elements (name + value = 2) found first
24 openShortcodes map[string]bool // set of shortcodes in open state
25
26 }
27
28 // Shortcode syntax
29 var (
30 leftDelimSc = []byte("{{")
31 leftDelimScNoMarkup = []byte("{{<")
32 rightDelimScNoMarkup = []byte(">}}")
33 leftDelimScWithMarkup = []byte("{{%")
34 rightDelimScWithMarkup = []byte("%}}")
35 leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
36 rightComment = []byte("*/")
37 )
38
39 func (l *pageLexer) isShortCodeStart() bool {
40 return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
41 }
42
43 func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
44 l.pos += len(l.currentLeftShortcodeDelim())
45 if l.hasPrefix(leftComment) {
46 return lexShortcodeComment
47 }
48 l.emit(l.currentLeftShortcodeDelimItem())
49 l.elementStepNum = 0
50 l.paramElements = 0
51 return lexInsideShortcode
52 }
53
54 func lexShortcodeComment(l *pageLexer) stateFunc {
55 posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
56 if posRightComment <= 1 {
57 return l.errorf("comment must be closed")
58 }
59 // we emit all as text, except the comment markers
60 l.emit(tText)
61 l.pos += len(leftComment)
62 l.ignore()
63 l.pos += posRightComment - len(leftComment)
64 l.emit(tText)
65 l.pos += len(rightComment)
66 l.ignore()
67 l.pos += len(l.currentRightShortcodeDelim())
68 l.emit(tText)
69 return lexMainSection
70 }
71
72 func lexShortcodeRightDelim(l *pageLexer) stateFunc {
73 l.closingState = 0
74 l.pos += len(l.currentRightShortcodeDelim())
75 l.emit(l.currentRightShortcodeDelimItem())
76 return lexMainSection
77 }
78
79 // either:
80 // 1. param
81 // 2. "param" or "param\"
82 // 3. param="123" or param="123\"
83 // 4. param="Some \"escaped\" text"
84 // 5. `param`
85 // 6. param=`123`
86 func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {
87 first := true
88 nextEq := false
89
90 var r rune
91
92 for {
93 r = l.next()
94 if first {
95 if r == '"' || (r == '`' && !escapedQuoteStart) {
96 // a positional param with quotes
97 if l.paramElements == 2 {
98 return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")
99 }
100 l.paramElements = 1
101 l.backup()
102 if r == '"' {
103 return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
104 }
105 return lexShortCodeParamRawStringVal(l, tScParam)
106
107 } else if r == '`' && escapedQuoteStart {
108 return l.errorf("unrecognized escape character")
109 }
110 first = false
111 } else if r == '=' {
112 // a named param
113 l.backup()
114 nextEq = true
115 break
116 }
117
118 if !isAlphaNumericOrHyphen(r) && r != '.' { // Floats have period
119 l.backup()
120 break
121 }
122 }
123
124 if l.paramElements == 0 {
125 l.paramElements++
126
127 if nextEq {
128 l.paramElements++
129 }
130 } else {
131 if nextEq && l.paramElements == 1 {
132 return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())
133 } else if !nextEq && l.paramElements == 2 {
134 return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())
135 }
136 }
137
138 l.emit(tScParam)
139 return lexInsideShortcode
140 }
141
142 func lexShortcodeParamVal(l *pageLexer) stateFunc {
143 l.consumeToSpace()
144 l.emit(tScParamVal)
145 return lexInsideShortcode
146 }
147
148 func lexShortCodeParamRawStringVal(l *pageLexer, typ ItemType) stateFunc {
149 openBacktickFound := false
150
151 Loop:
152 for {
153 switch r := l.next(); {
154 case r == '`':
155 if openBacktickFound {
156 l.backup()
157 break Loop
158 } else {
159 openBacktickFound = true
160 l.ignore()
161 }
162 case r == eof:
163 return l.errorf("unterminated raw string in shortcode parameter-argument: '%s'", l.current())
164 }
165 }
166
167 l.emitString(typ)
168 l.next()
169 l.ignore()
170
171 return lexInsideShortcode
172 }
173
174 func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {
175 openQuoteFound := false
176 escapedInnerQuoteFound := false
177 escapedQuoteState := 0
178
179 Loop:
180 for {
181 switch r := l.next(); {
182 case r == '\\':
183 if l.peek() == '"' {
184 if openQuoteFound && !escapedQuotedValuesAllowed {
185 l.backup()
186 break Loop
187 } else if openQuoteFound {
188 // the coming quote is inside
189 escapedInnerQuoteFound = true
190 escapedQuoteState = 1
191 }
192 } else if l.peek() == '`' {
193 return l.errorf("unrecognized escape character")
194 }
195 case r == eof, r == '\n':
196 return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())
197 case r == '"':
198 if escapedQuoteState == 0 {
199 if openQuoteFound {
200 l.backup()
201 break Loop
202
203 } else {
204 openQuoteFound = true
205 l.ignore()
206 }
207 } else {
208 escapedQuoteState = 0
209 }
210 }
211 }
212
213 if escapedInnerQuoteFound {
214 l.ignoreEscapesAndEmit(typ, true)
215 } else {
216 l.emitString(typ)
217 }
218
219 r := l.next()
220
221 if r == '\\' {
222 if l.peek() == '"' {
223 // ignore the escaped closing quote
224 l.ignore()
225 l.next()
226 l.ignore()
227 }
228 } else if r == '"' {
229 // ignore closing quote
230 l.ignore()
231 } else {
232 // handled by next state
233 l.backup()
234 }
235
236 return lexInsideShortcode
237 }
238
239 // Inline shortcodes has the form {{< myshortcode.inline >}}
240 var inlineIdentifier = []byte("inline ")
241
242 // scans an alphanumeric inside shortcode
243 func lexIdentifierInShortcode(l *pageLexer) stateFunc {
244 lookForEnd := false
245 Loop:
246 for {
247 switch r := l.next(); {
248 case isAlphaNumericOrHyphen(r):
249 // Allow forward slash inside names to make it possible to create namespaces.
250 case r == '/':
251 case r == '.':
252 l.isInline = l.hasPrefix(inlineIdentifier)
253 if !l.isInline {
254 return l.errorf("period in shortcode name only allowed for inline identifiers")
255 }
256 default:
257 l.backup()
258 word := string(l.input[l.start:l.pos])
259 if l.closingState > 0 && !l.openShortcodes[word] {
260 return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
261 } else if l.closingState > 0 {
262 l.openShortcodes[word] = false
263 lookForEnd = true
264 }
265
266 l.closingState = 0
267 l.currShortcodeName = word
268 l.openShortcodes[word] = true
269 l.elementStepNum++
270 if l.isInline {
271 l.emit(tScNameInline)
272 } else {
273 l.emit(tScName)
274 }
275 break Loop
276 }
277 }
278
279 if lookForEnd {
280 return lexEndOfShortcode
281 }
282 return lexInsideShortcode
283 }
284
285 func lexEndOfShortcode(l *pageLexer) stateFunc {
286 l.isInline = false
287 if l.hasPrefix(l.currentRightShortcodeDelim()) {
288 return lexShortcodeRightDelim
289 }
290 switch r := l.next(); {
291 case isSpace(r):
292 l.ignore()
293 default:
294 return l.errorf("unclosed shortcode")
295 }
296 return lexEndOfShortcode
297 }
298
299 // scans the elements inside shortcode tags
300 func lexInsideShortcode(l *pageLexer) stateFunc {
301 if l.hasPrefix(l.currentRightShortcodeDelim()) {
302 return lexShortcodeRightDelim
303 }
304 switch r := l.next(); {
305 case r == eof:
306 // eol is allowed inside shortcodes; this may go to end of document before it fails
307 return l.errorf("unclosed shortcode action")
308 case isSpace(r), isEndOfLine(r):
309 l.ignore()
310 case r == '=':
311 l.consumeSpace()
312 l.ignore()
313 peek := l.peek()
314 if peek == '"' || peek == '\\' {
315 return lexShortcodeQuotedParamVal(l, peek != '\\', tScParamVal)
316 } else if peek == '`' {
317 return lexShortCodeParamRawStringVal(l, tScParamVal)
318 }
319 return lexShortcodeParamVal
320 case r == '/':
321 if l.currShortcodeName == "" {
322 return l.errorf("got closing shortcode, but none is open")
323 }
324 l.closingState++
325 l.isInline = false
326 l.emit(tScClose)
327 case r == '\\':
328 l.ignore()
329 if l.peek() == '"' || l.peek() == '`' {
330 return lexShortcodeParam(l, true)
331 }
332 case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"' || r == '`'): // positional params can have quotes
333 l.backup()
334 return lexShortcodeParam(l, false)
335 case isAlphaNumeric(r):
336 l.backup()
337 return lexIdentifierInShortcode
338 default:
339 return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)
340 }
341 return lexInsideShortcode
342 }
343
344 func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {
345 return l.currLeftDelimItem
346 }
347
348 func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {
349 return l.currRightDelimItem
350 }
351
352 func (l *pageLexer) currentLeftShortcodeDelim() []byte {
353 if l.currLeftDelimItem == tLeftDelimScWithMarkup {
354 return leftDelimScWithMarkup
355 }
356 return leftDelimScNoMarkup
357 }
358
359 func (l *pageLexer) currentRightShortcodeDelim() []byte {
360 if l.currRightDelimItem == tRightDelimScWithMarkup {
361 return rightDelimScWithMarkup
362 }
363 return rightDelimScNoMarkup
364 }