transition.go - hugo - Fork of github.com/gohugoio/hugo with reverse pagination support

transition.go (15549B)

    1 // Copyright 2011 The Go Authors. All rights reserved.
    2 // Use of this source code is governed by a BSD-style
    3 // license that can be found in the LICENSE file.
    4 
    5 package template
    6 
    7 import (
    8 	"bytes"
    9 	"strings"
   10 )
   11 
   12 // transitionFunc is the array of context transition functions for text nodes.
   13 // A transition function takes a context and template text input, and returns
   14 // the updated context and the number of bytes consumed from the front of the
   15 // input.
   16 var transitionFunc = [...]func(context, []byte) (context, int){
   17 	stateText:        tText,
   18 	stateTag:         tTag,
   19 	stateAttrName:    tAttrName,
   20 	stateAfterName:   tAfterName,
   21 	stateBeforeValue: tBeforeValue,
   22 	stateHTMLCmt:     tHTMLCmt,
   23 	stateRCDATA:      tSpecialTagEnd,
   24 	stateAttr:        tAttr,
   25 	stateURL:         tURL,
   26 	stateSrcset:      tURL,
   27 	stateJS:          tJS,
   28 	stateJSDqStr:     tJSDelimited,
   29 	stateJSSqStr:     tJSDelimited,
   30 	stateJSRegexp:    tJSDelimited,
   31 	stateJSBlockCmt:  tBlockCmt,
   32 	stateJSLineCmt:   tLineCmt,
   33 	stateCSS:         tCSS,
   34 	stateCSSDqStr:    tCSSStr,
   35 	stateCSSSqStr:    tCSSStr,
   36 	stateCSSDqURL:    tCSSStr,
   37 	stateCSSSqURL:    tCSSStr,
   38 	stateCSSURL:      tCSSStr,
   39 	stateCSSBlockCmt: tBlockCmt,
   40 	stateCSSLineCmt:  tLineCmt,
   41 	stateError:       tError,
   42 }
   43 
   44 var commentStart = []byte("<!--")
   45 var commentEnd = []byte("-->")
   46 
   47 // tText is the context transition function for the text state.
   48 func tText(c context, s []byte) (context, int) {
   49 	k := 0
   50 	for {
   51 		i := k + bytes.IndexByte(s[k:], '<')
   52 		if i < k || i+1 == len(s) {
   53 			return c, len(s)
   54 		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
   55 			return context{state: stateHTMLCmt}, i + 4
   56 		}
   57 		i++
   58 		end := false
   59 		if s[i] == '/' {
   60 			if i+1 == len(s) {
   61 				return c, len(s)
   62 			}
   63 			end, i = true, i+1
   64 		}
   65 		j, e := eatTagName(s, i)
   66 		if j != i {
   67 			if end {
   68 				e = elementNone
   69 			}
   70 			// We've found an HTML tag.
   71 			return context{state: stateTag, element: e}, j
   72 		}
   73 		k = j
   74 	}
   75 }
   76 
   77 var elementContentType = [...]state{
   78 	elementNone:     stateText,
   79 	elementScript:   stateJS,
   80 	elementStyle:    stateCSS,
   81 	elementTextarea: stateRCDATA,
   82 	elementTitle:    stateRCDATA,
   83 }
   84 
   85 // tTag is the context transition function for the tag state.
   86 func tTag(c context, s []byte) (context, int) {
   87 	// Find the attribute name.
   88 	i := eatWhiteSpace(s, 0)
   89 	if i == len(s) {
   90 		return c, len(s)
   91 	}
   92 	if s[i] == '>' {
   93 		return context{
   94 			state:   elementContentType[c.element],
   95 			element: c.element,
   96 		}, i + 1
   97 	}
   98 	j, err := eatAttrName(s, i)
   99 	if err != nil {
  100 		return context{state: stateError, err: err}, len(s)
  101 	}
  102 	state, attr := stateTag, attrNone
  103 	if i == j {
  104 		return context{
  105 			state: stateError,
  106 			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
  107 		}, len(s)
  108 	}
  109 
  110 	attrName := strings.ToLower(string(s[i:j]))
  111 	if c.element == elementScript && attrName == "type" {
  112 		attr = attrScriptType
  113 	} else {
  114 		switch attrType(attrName) {
  115 		case contentTypeURL:
  116 			attr = attrURL
  117 		case contentTypeCSS:
  118 			attr = attrStyle
  119 		case contentTypeJS:
  120 			attr = attrScript
  121 		case contentTypeSrcset:
  122 			attr = attrSrcset
  123 		}
  124 	}
  125 
  126 	if j == len(s) {
  127 		state = stateAttrName
  128 	} else {
  129 		state = stateAfterName
  130 	}
  131 	return context{state: state, element: c.element, attr: attr}, j
  132 }
  133 
  134 // tAttrName is the context transition function for stateAttrName.
  135 func tAttrName(c context, s []byte) (context, int) {
  136 	i, err := eatAttrName(s, 0)
  137 	if err != nil {
  138 		return context{state: stateError, err: err}, len(s)
  139 	} else if i != len(s) {
  140 		c.state = stateAfterName
  141 	}
  142 	return c, i
  143 }
  144 
  145 // tAfterName is the context transition function for stateAfterName.
  146 func tAfterName(c context, s []byte) (context, int) {
  147 	// Look for the start of the value.
  148 	i := eatWhiteSpace(s, 0)
  149 	if i == len(s) {
  150 		return c, len(s)
  151 	} else if s[i] != '=' {
  152 		// Occurs due to tag ending '>', and valueless attribute.
  153 		c.state = stateTag
  154 		return c, i
  155 	}
  156 	c.state = stateBeforeValue
  157 	// Consume the "=".
  158 	return c, i + 1
  159 }
  160 
  161 var attrStartStates = [...]state{
  162 	attrNone:       stateAttr,
  163 	attrScript:     stateJS,
  164 	attrScriptType: stateAttr,
  165 	attrStyle:      stateCSS,
  166 	attrURL:        stateURL,
  167 	attrSrcset:     stateSrcset,
  168 }
  169 
  170 // tBeforeValue is the context transition function for stateBeforeValue.
  171 func tBeforeValue(c context, s []byte) (context, int) {
  172 	i := eatWhiteSpace(s, 0)
  173 	if i == len(s) {
  174 		return c, len(s)
  175 	}
  176 	// Find the attribute delimiter.
  177 	delim := delimSpaceOrTagEnd
  178 	switch s[i] {
  179 	case '\'':
  180 		delim, i = delimSingleQuote, i+1
  181 	case '"':
  182 		delim, i = delimDoubleQuote, i+1
  183 	}
  184 	c.state, c.delim = attrStartStates[c.attr], delim
  185 	return c, i
  186 }
  187 
  188 // tHTMLCmt is the context transition function for stateHTMLCmt.
  189 func tHTMLCmt(c context, s []byte) (context, int) {
  190 	if i := bytes.Index(s, commentEnd); i != -1 {
  191 		return context{}, i + 3
  192 	}
  193 	return c, len(s)
  194 }
  195 
  196 // specialTagEndMarkers maps element types to the character sequence that
  197 // case-insensitively signals the end of the special tag body.
  198 var specialTagEndMarkers = [...][]byte{
  199 	elementScript:   []byte("script"),
  200 	elementStyle:    []byte("style"),
  201 	elementTextarea: []byte("textarea"),
  202 	elementTitle:    []byte("title"),
  203 }
  204 
  205 var (
  206 	specialTagEndPrefix = []byte("</")
  207 	tagEndSeparators    = []byte("> \t\n\f/")
  208 )
  209 
  210 // tSpecialTagEnd is the context transition function for raw text and RCDATA
  211 // element states.
  212 func tSpecialTagEnd(c context, s []byte) (context, int) {
  213 	if c.element != elementNone {
  214 		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
  215 			return context{}, i
  216 		}
  217 	}
  218 	return c, len(s)
  219 }
  220 
  221 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
  222 func indexTagEnd(s []byte, tag []byte) int {
  223 	res := 0
  224 	plen := len(specialTagEndPrefix)
  225 	for len(s) > 0 {
  226 		// Try to find the tag end prefix first
  227 		i := bytes.Index(s, specialTagEndPrefix)
  228 		if i == -1 {
  229 			return i
  230 		}
  231 		s = s[i+plen:]
  232 		// Try to match the actual tag if there is still space for it
  233 		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
  234 			s = s[len(tag):]
  235 			// Check the tag is followed by a proper separator
  236 			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
  237 				return res + i
  238 			}
  239 			res += len(tag)
  240 		}
  241 		res += i + plen
  242 	}
  243 	return -1
  244 }
  245 
  246 // tAttr is the context transition function for the attribute state.
  247 func tAttr(c context, s []byte) (context, int) {
  248 	return c, len(s)
  249 }
  250 
  251 // tURL is the context transition function for the URL state.
  252 func tURL(c context, s []byte) (context, int) {
  253 	if bytes.ContainsAny(s, "#?") {
  254 		c.urlPart = urlPartQueryOrFrag
  255 	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
  256 		// HTML5 uses "Valid URL potentially surrounded by spaces" for
  257 		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
  258 		c.urlPart = urlPartPreQuery
  259 	}
  260 	return c, len(s)
  261 }
  262 
  263 // tJS is the context transition function for the JS state.
  264 func tJS(c context, s []byte) (context, int) {
  265 	i := bytes.IndexAny(s, `"'/`)
  266 	if i == -1 {
  267 		// Entire input is non string, comment, regexp tokens.
  268 		c.jsCtx = nextJSCtx(s, c.jsCtx)
  269 		return c, len(s)
  270 	}
  271 	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
  272 	switch s[i] {
  273 	case '"':
  274 		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
  275 	case '\'':
  276 		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
  277 	case '/':
  278 		switch {
  279 		case i+1 < len(s) && s[i+1] == '/':
  280 			c.state, i = stateJSLineCmt, i+1
  281 		case i+1 < len(s) && s[i+1] == '*':
  282 			c.state, i = stateJSBlockCmt, i+1
  283 		case c.jsCtx == jsCtxRegexp:
  284 			c.state = stateJSRegexp
  285 		case c.jsCtx == jsCtxDivOp:
  286 			c.jsCtx = jsCtxRegexp
  287 		default:
  288 			return context{
  289 				state: stateError,
  290 				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
  291 			}, len(s)
  292 		}
  293 	default:
  294 		panic("unreachable")
  295 	}
  296 	return c, i + 1
  297 }
  298 
  299 // tJSDelimited is the context transition function for the JS string and regexp
  300 // states.
  301 func tJSDelimited(c context, s []byte) (context, int) {
  302 	specials := `\"`
  303 	switch c.state {
  304 	case stateJSSqStr:
  305 		specials = `\'`
  306 	case stateJSRegexp:
  307 		specials = `\/[]`
  308 	}
  309 
  310 	k, inCharset := 0, false
  311 	for {
  312 		i := k + bytes.IndexAny(s[k:], specials)
  313 		if i < k {
  314 			break
  315 		}
  316 		switch s[i] {
  317 		case '\\':
  318 			i++
  319 			if i == len(s) {
  320 				return context{
  321 					state: stateError,
  322 					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
  323 				}, len(s)
  324 			}
  325 		case '[':
  326 			inCharset = true
  327 		case ']':
  328 			inCharset = false
  329 		default:
  330 			// end delimiter
  331 			if !inCharset {
  332 				c.state, c.jsCtx = stateJS, jsCtxDivOp
  333 				return c, i + 1
  334 			}
  335 		}
  336 		k = i + 1
  337 	}
  338 
  339 	if inCharset {
  340 		// This can be fixed by making context richer if interpolation
  341 		// into charsets is desired.
  342 		return context{
  343 			state: stateError,
  344 			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
  345 		}, len(s)
  346 	}
  347 
  348 	return c, len(s)
  349 }
  350 
  351 var blockCommentEnd = []byte("*/")
  352 
  353 // tBlockCmt is the context transition function for /*comment*/ states.
  354 func tBlockCmt(c context, s []byte) (context, int) {
  355 	i := bytes.Index(s, blockCommentEnd)
  356 	if i == -1 {
  357 		return c, len(s)
  358 	}
  359 	switch c.state {
  360 	case stateJSBlockCmt:
  361 		c.state = stateJS
  362 	case stateCSSBlockCmt:
  363 		c.state = stateCSS
  364 	default:
  365 		panic(c.state.String())
  366 	}
  367 	return c, i + 2
  368 }
  369 
  370 // tLineCmt is the context transition function for //comment states.
  371 func tLineCmt(c context, s []byte) (context, int) {
  372 	var lineTerminators string
  373 	var endState state
  374 	switch c.state {
  375 	case stateJSLineCmt:
  376 		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
  377 	case stateCSSLineCmt:
  378 		lineTerminators, endState = "\n\f\r", stateCSS
  379 		// Line comments are not part of any published CSS standard but
  380 		// are supported by the 4 major browsers.
  381 		// This defines line comments as
  382 		//     LINECOMMENT ::= "//" [^\n\f\d]*
  383 		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
  384 		// newlines:
  385 		//     nl ::= #xA | #xD #xA | #xD | #xC
  386 	default:
  387 		panic(c.state.String())
  388 	}
  389 
  390 	i := bytes.IndexAny(s, lineTerminators)
  391 	if i == -1 {
  392 		return c, len(s)
  393 	}
  394 	c.state = endState
  395 	// Per section 7.4 of EcmaScript 5 : https://es5.github.com/#x7.4
  396 	// "However, the LineTerminator at the end of the line is not
  397 	// considered to be part of the single-line comment; it is
  398 	// recognized separately by the lexical grammar and becomes part
  399 	// of the stream of input elements for the syntactic grammar."
  400 	return c, i
  401 }
  402 
  403 // tCSS is the context transition function for the CSS state.
  404 func tCSS(c context, s []byte) (context, int) {
  405 	// CSS quoted strings are almost never used except for:
  406 	// (1) URLs as in background: "/foo.png"
  407 	// (2) Multiword font-names as in font-family: "Times New Roman"
  408 	// (3) List separators in content values as in inline-lists:
  409 	//    <style>
  410 	//    ul.inlineList { list-style: none; padding:0 }
  411 	//    ul.inlineList > li { display: inline }
  412 	//    ul.inlineList > li:before { content: ", " }
  413 	//    ul.inlineList > li:first-child:before { content: "" }
  414 	//    </style>
  415 	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
  416 	// (4) Attribute value selectors as in a[href="http://example.com/"]
  417 	//
  418 	// We conservatively treat all strings as URLs, but make some
  419 	// allowances to avoid confusion.
  420 	//
  421 	// In (1), our conservative assumption is justified.
  422 	// In (2), valid font names do not contain ':', '?', or '#', so our
  423 	// conservative assumption is fine since we will never transition past
  424 	// urlPartPreQuery.
  425 	// In (3), our protocol heuristic should not be tripped, and there
  426 	// should not be non-space content after a '?' or '#', so as long as
  427 	// we only %-encode RFC 3986 reserved characters we are ok.
  428 	// In (4), we should URL escape for URL attributes, and for others we
  429 	// have the attribute name available if our conservative assumption
  430 	// proves problematic for real code.
  431 
  432 	k := 0
  433 	for {
  434 		i := k + bytes.IndexAny(s[k:], `("'/`)
  435 		if i < k {
  436 			return c, len(s)
  437 		}
  438 		switch s[i] {
  439 		case '(':
  440 			// Look for url to the left.
  441 			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
  442 			if endsWithCSSKeyword(p, "url") {
  443 				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
  444 				switch {
  445 				case j != len(s) && s[j] == '"':
  446 					c.state, j = stateCSSDqURL, j+1
  447 				case j != len(s) && s[j] == '\'':
  448 					c.state, j = stateCSSSqURL, j+1
  449 				default:
  450 					c.state = stateCSSURL
  451 				}
  452 				return c, j
  453 			}
  454 		case '/':
  455 			if i+1 < len(s) {
  456 				switch s[i+1] {
  457 				case '/':
  458 					c.state = stateCSSLineCmt
  459 					return c, i + 2
  460 				case '*':
  461 					c.state = stateCSSBlockCmt
  462 					return c, i + 2
  463 				}
  464 			}
  465 		case '"':
  466 			c.state = stateCSSDqStr
  467 			return c, i + 1
  468 		case '\'':
  469 			c.state = stateCSSSqStr
  470 			return c, i + 1
  471 		}
  472 		k = i + 1
  473 	}
  474 }
  475 
  476 // tCSSStr is the context transition function for the CSS string and URL states.
  477 func tCSSStr(c context, s []byte) (context, int) {
  478 	var endAndEsc string
  479 	switch c.state {
  480 	case stateCSSDqStr, stateCSSDqURL:
  481 		endAndEsc = `\"`
  482 	case stateCSSSqStr, stateCSSSqURL:
  483 		endAndEsc = `\'`
  484 	case stateCSSURL:
  485 		// Unquoted URLs end with a newline or close parenthesis.
  486 		// The below includes the wc (whitespace character) and nl.
  487 		endAndEsc = "\\\t\n\f\r )"
  488 	default:
  489 		panic(c.state.String())
  490 	}
  491 
  492 	k := 0
  493 	for {
  494 		i := k + bytes.IndexAny(s[k:], endAndEsc)
  495 		if i < k {
  496 			c, nread := tURL(c, decodeCSS(s[k:]))
  497 			return c, k + nread
  498 		}
  499 		if s[i] == '\\' {
  500 			i++
  501 			if i == len(s) {
  502 				return context{
  503 					state: stateError,
  504 					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
  505 				}, len(s)
  506 			}
  507 		} else {
  508 			c.state = stateCSS
  509 			return c, i + 1
  510 		}
  511 		c, _ = tURL(c, decodeCSS(s[:i+1]))
  512 		k = i + 1
  513 	}
  514 }
  515 
  516 // tError is the context transition function for the error state.
  517 func tError(c context, s []byte) (context, int) {
  518 	return c, len(s)
  519 }
  520 
  521 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
  522 // It returns an error if s[i:] does not look like it begins with an
  523 // attribute name, such as encountering a quote mark without a preceding
  524 // equals sign.
  525 func eatAttrName(s []byte, i int) (int, *Error) {
  526 	for j := i; j < len(s); j++ {
  527 		switch s[j] {
  528 		case ' ', '\t', '\n', '\f', '\r', '=', '>':
  529 			return j, nil
  530 		case '\'', '"', '<':
  531 			// These result in a parse warning in HTML5 and are
  532 			// indicative of serious problems if seen in an attr
  533 			// name in a template.
  534 			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
  535 		default:
  536 			// No-op.
  537 		}
  538 	}
  539 	return len(s), nil
  540 }
  541 
  542 var elementNameMap = map[string]element{
  543 	"script":   elementScript,
  544 	"style":    elementStyle,
  545 	"textarea": elementTextarea,
  546 	"title":    elementTitle,
  547 }
  548 
  549 // asciiAlpha reports whether c is an ASCII letter.
  550 func asciiAlpha(c byte) bool {
  551 	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
  552 }
  553 
  554 // asciiAlphaNum reports whether c is an ASCII letter or digit.
  555 func asciiAlphaNum(c byte) bool {
  556 	return asciiAlpha(c) || '0' <= c && c <= '9'
  557 }
  558 
  559 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
  560 func eatTagName(s []byte, i int) (int, element) {
  561 	if i == len(s) || !asciiAlpha(s[i]) {
  562 		return i, elementNone
  563 	}
  564 	j := i + 1
  565 	for j < len(s) {
  566 		x := s[j]
  567 		if asciiAlphaNum(x) {
  568 			j++
  569 			continue
  570 		}
  571 		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
  572 		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
  573 			j += 2
  574 			continue
  575 		}
  576 		break
  577 	}
  578 	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
  579 }
  580 
  581 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
  582 func eatWhiteSpace(s []byte, i int) int {
  583 	for j := i; j < len(s); j++ {
  584 		switch s[j] {
  585 		case ' ', '\t', '\n', '\f', '\r':
  586 			// No-op.
  587 		default:
  588 			return j
  589 		}
  590 	}
  591 	return len(s)
  592 }