pagelexer_shortcode.go (9376B)
1 // Copyright 2018 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package pageparser 15 16 type lexerShortcodeState struct { 17 currLeftDelimItem ItemType 18 currRightDelimItem ItemType 19 isInline bool 20 currShortcodeName string // is only set when a shortcode is in opened state 21 closingState int // > 0 = on its way to be closed 22 elementStepNum int // step number in element 23 paramElements int // number of elements (name + value = 2) found first 24 openShortcodes map[string]bool // set of shortcodes in open state 25 26 } 27 28 // Shortcode syntax 29 var ( 30 leftDelimSc = []byte("{{") 31 leftDelimScNoMarkup = []byte("{{<") 32 rightDelimScNoMarkup = []byte(">}}") 33 leftDelimScWithMarkup = []byte("{{%") 34 rightDelimScWithMarkup = []byte("%}}") 35 leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode" 36 rightComment = []byte("*/") 37 ) 38 39 func (l *pageLexer) isShortCodeStart() bool { 40 return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup) 41 } 42 43 func lexShortcodeLeftDelim(l *pageLexer) stateFunc { 44 l.pos += len(l.currentLeftShortcodeDelim()) 45 if l.hasPrefix(leftComment) { 46 return lexShortcodeComment 47 } 48 l.emit(l.currentLeftShortcodeDelimItem()) 49 l.elementStepNum = 0 50 l.paramElements = 0 51 return lexInsideShortcode 52 } 53 54 func lexShortcodeComment(l *pageLexer) stateFunc { 55 posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...)) 56 if posRightComment <= 1 { 57 return l.errorf("comment must be closed") 58 } 59 // we emit all as text, except the comment markers 60 l.emit(tText) 61 l.pos += len(leftComment) 62 l.ignore() 63 l.pos += posRightComment - len(leftComment) 64 l.emit(tText) 65 l.pos += len(rightComment) 66 l.ignore() 67 l.pos += len(l.currentRightShortcodeDelim()) 68 l.emit(tText) 69 return lexMainSection 70 } 71 72 func lexShortcodeRightDelim(l *pageLexer) stateFunc { 73 l.closingState = 0 74 l.pos += len(l.currentRightShortcodeDelim()) 75 l.emit(l.currentRightShortcodeDelimItem()) 76 return lexMainSection 77 } 78 79 // either: 80 // 1. param 81 // 2. "param" or "param\" 82 // 3. param="123" or param="123\" 83 // 4. param="Some \"escaped\" text" 84 // 5. `param` 85 // 6. param=`123` 86 func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc { 87 first := true 88 nextEq := false 89 90 var r rune 91 92 for { 93 r = l.next() 94 if first { 95 if r == '"' || (r == '`' && !escapedQuoteStart) { 96 // a positional param with quotes 97 if l.paramElements == 2 { 98 return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters") 99 } 100 l.paramElements = 1 101 l.backup() 102 if r == '"' { 103 return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam) 104 } 105 return lexShortCodeParamRawStringVal(l, tScParam) 106 107 } else if r == '`' && escapedQuoteStart { 108 return l.errorf("unrecognized escape character") 109 } 110 first = false 111 } else if r == '=' { 112 // a named param 113 l.backup() 114 nextEq = true 115 break 116 } 117 118 if !isAlphaNumericOrHyphen(r) && r != '.' { // Floats have period 119 l.backup() 120 break 121 } 122 } 123 124 if l.paramElements == 0 { 125 l.paramElements++ 126 127 if nextEq { 128 l.paramElements++ 129 } 130 } else { 131 if nextEq && l.paramElements == 1 { 132 return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current()) 133 } else if !nextEq && l.paramElements == 2 { 134 return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current()) 135 } 136 } 137 138 l.emit(tScParam) 139 return lexInsideShortcode 140 } 141 142 func lexShortcodeParamVal(l *pageLexer) stateFunc { 143 l.consumeToSpace() 144 l.emit(tScParamVal) 145 return lexInsideShortcode 146 } 147 148 func lexShortCodeParamRawStringVal(l *pageLexer, typ ItemType) stateFunc { 149 openBacktickFound := false 150 151 Loop: 152 for { 153 switch r := l.next(); { 154 case r == '`': 155 if openBacktickFound { 156 l.backup() 157 break Loop 158 } else { 159 openBacktickFound = true 160 l.ignore() 161 } 162 case r == eof: 163 return l.errorf("unterminated raw string in shortcode parameter-argument: '%s'", l.current()) 164 } 165 } 166 167 l.emitString(typ) 168 l.next() 169 l.ignore() 170 171 return lexInsideShortcode 172 } 173 174 func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc { 175 openQuoteFound := false 176 escapedInnerQuoteFound := false 177 escapedQuoteState := 0 178 179 Loop: 180 for { 181 switch r := l.next(); { 182 case r == '\\': 183 if l.peek() == '"' { 184 if openQuoteFound && !escapedQuotedValuesAllowed { 185 l.backup() 186 break Loop 187 } else if openQuoteFound { 188 // the coming quote is inside 189 escapedInnerQuoteFound = true 190 escapedQuoteState = 1 191 } 192 } else if l.peek() == '`' { 193 return l.errorf("unrecognized escape character") 194 } 195 case r == eof, r == '\n': 196 return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current()) 197 case r == '"': 198 if escapedQuoteState == 0 { 199 if openQuoteFound { 200 l.backup() 201 break Loop 202 203 } else { 204 openQuoteFound = true 205 l.ignore() 206 } 207 } else { 208 escapedQuoteState = 0 209 } 210 } 211 } 212 213 if escapedInnerQuoteFound { 214 l.ignoreEscapesAndEmit(typ, true) 215 } else { 216 l.emitString(typ) 217 } 218 219 r := l.next() 220 221 if r == '\\' { 222 if l.peek() == '"' { 223 // ignore the escaped closing quote 224 l.ignore() 225 l.next() 226 l.ignore() 227 } 228 } else if r == '"' { 229 // ignore closing quote 230 l.ignore() 231 } else { 232 // handled by next state 233 l.backup() 234 } 235 236 return lexInsideShortcode 237 } 238 239 // Inline shortcodes has the form {{< myshortcode.inline >}} 240 var inlineIdentifier = []byte("inline ") 241 242 // scans an alphanumeric inside shortcode 243 func lexIdentifierInShortcode(l *pageLexer) stateFunc { 244 lookForEnd := false 245 Loop: 246 for { 247 switch r := l.next(); { 248 case isAlphaNumericOrHyphen(r): 249 // Allow forward slash inside names to make it possible to create namespaces. 250 case r == '/': 251 case r == '.': 252 l.isInline = l.hasPrefix(inlineIdentifier) 253 if !l.isInline { 254 return l.errorf("period in shortcode name only allowed for inline identifiers") 255 } 256 default: 257 l.backup() 258 word := string(l.input[l.start:l.pos]) 259 if l.closingState > 0 && !l.openShortcodes[word] { 260 return l.errorf("closing tag for shortcode '%s' does not match start tag", word) 261 } else if l.closingState > 0 { 262 l.openShortcodes[word] = false 263 lookForEnd = true 264 } 265 266 l.closingState = 0 267 l.currShortcodeName = word 268 l.openShortcodes[word] = true 269 l.elementStepNum++ 270 if l.isInline { 271 l.emit(tScNameInline) 272 } else { 273 l.emit(tScName) 274 } 275 break Loop 276 } 277 } 278 279 if lookForEnd { 280 return lexEndOfShortcode 281 } 282 return lexInsideShortcode 283 } 284 285 func lexEndOfShortcode(l *pageLexer) stateFunc { 286 l.isInline = false 287 if l.hasPrefix(l.currentRightShortcodeDelim()) { 288 return lexShortcodeRightDelim 289 } 290 switch r := l.next(); { 291 case isSpace(r): 292 l.ignore() 293 default: 294 return l.errorf("unclosed shortcode") 295 } 296 return lexEndOfShortcode 297 } 298 299 // scans the elements inside shortcode tags 300 func lexInsideShortcode(l *pageLexer) stateFunc { 301 if l.hasPrefix(l.currentRightShortcodeDelim()) { 302 return lexShortcodeRightDelim 303 } 304 switch r := l.next(); { 305 case r == eof: 306 // eol is allowed inside shortcodes; this may go to end of document before it fails 307 return l.errorf("unclosed shortcode action") 308 case isSpace(r), isEndOfLine(r): 309 l.ignore() 310 case r == '=': 311 l.consumeSpace() 312 l.ignore() 313 peek := l.peek() 314 if peek == '"' || peek == '\\' { 315 return lexShortcodeQuotedParamVal(l, peek != '\\', tScParamVal) 316 } else if peek == '`' { 317 return lexShortCodeParamRawStringVal(l, tScParamVal) 318 } 319 return lexShortcodeParamVal 320 case r == '/': 321 if l.currShortcodeName == "" { 322 return l.errorf("got closing shortcode, but none is open") 323 } 324 l.closingState++ 325 l.isInline = false 326 l.emit(tScClose) 327 case r == '\\': 328 l.ignore() 329 if l.peek() == '"' || l.peek() == '`' { 330 return lexShortcodeParam(l, true) 331 } 332 case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"' || r == '`'): // positional params can have quotes 333 l.backup() 334 return lexShortcodeParam(l, false) 335 case isAlphaNumeric(r): 336 l.backup() 337 return lexIdentifierInShortcode 338 default: 339 return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r) 340 } 341 return lexInsideShortcode 342 } 343 344 func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType { 345 return l.currLeftDelimItem 346 } 347 348 func (l *pageLexer) currentRightShortcodeDelimItem() ItemType { 349 return l.currRightDelimItem 350 } 351 352 func (l *pageLexer) currentLeftShortcodeDelim() []byte { 353 if l.currLeftDelimItem == tLeftDelimScWithMarkup { 354 return leftDelimScWithMarkup 355 } 356 return leftDelimScNoMarkup 357 } 358 359 func (l *pageLexer) currentRightShortcodeDelim() []byte { 360 if l.currRightDelimItem == tRightDelimScWithMarkup { 361 return rightDelimScWithMarkup 362 } 363 return rightDelimScNoMarkup 364 }