js.go - hugo - Fork of github.com/gohugoio/hugo with reverse pagination support

js.go (12527B)

    1 // Copyright 2011 The Go Authors. All rights reserved.
    2 // Use of this source code is governed by a BSD-style
    3 // license that can be found in the LICENSE file.
    4 
    5 package template
    6 
    7 import (
    8 	"bytes"
    9 	"encoding/json"
   10 	"fmt"
   11 	htmltemplate "html/template"
   12 	"reflect"
   13 	"strings"
   14 	"unicode/utf8"
   15 )
   16 
   17 // nextJSCtx returns the context that determines whether a slash after the
   18 // given run of tokens starts a regular expression instead of a division
   19 // operator: / or /=.
   20 //
   21 // This assumes that the token run does not include any string tokens, comment
   22 // tokens, regular expression literal tokens, or division operators.
   23 //
   24 // This fails on some valid but nonsensical JavaScript programs like
   25 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
   26 // fail on any known useful programs. It is based on the draft
   27 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
   28 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
   29 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
   30 	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
   31 	if len(s) == 0 {
   32 		return preceding
   33 	}
   34 
   35 	// All cases below are in the single-byte UTF-8 group.
   36 	switch c, n := s[len(s)-1], len(s); c {
   37 	case '+', '-':
   38 		// ++ and -- are not regexp preceders, but + and - are whether
   39 		// they are used as infix or prefix operators.
   40 		start := n - 1
   41 		// Count the number of adjacent dashes or pluses.
   42 		for start > 0 && s[start-1] == c {
   43 			start--
   44 		}
   45 		if (n-start)&1 == 1 {
   46 			// Reached for trailing minus signs since "---" is the
   47 			// same as "-- -".
   48 			return jsCtxRegexp
   49 		}
   50 		return jsCtxDivOp
   51 	case '.':
   52 		// Handle "42."
   53 		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
   54 			return jsCtxDivOp
   55 		}
   56 		return jsCtxRegexp
   57 	// Suffixes for all punctuators from section 7.7 of the language spec
   58 	// that only end binary operators not handled above.
   59 	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
   60 		return jsCtxRegexp
   61 	// Suffixes for all punctuators from section 7.7 of the language spec
   62 	// that are prefix operators not handled above.
   63 	case '!', '~':
   64 		return jsCtxRegexp
   65 	// Matches all the punctuators from section 7.7 of the language spec
   66 	// that are open brackets not handled above.
   67 	case '(', '[':
   68 		return jsCtxRegexp
   69 	// Matches all the punctuators from section 7.7 of the language spec
   70 	// that precede expression starts.
   71 	case ':', ';', '{':
   72 		return jsCtxRegexp
   73 	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
   74 	// are handled in the default except for '}' which can precede a
   75 	// division op as in
   76 	//    ({ valueOf: function () { return 42 } } / 2
   77 	// which is valid, but, in practice, developers don't divide object
   78 	// literals, so our heuristic works well for code like
   79 	//    function () { ... }  /foo/.test(x) && sideEffect();
   80 	// The ')' punctuator can precede a regular expression as in
   81 	//     if (b) /foo/.test(x) && ...
   82 	// but this is much less likely than
   83 	//     (a + b) / c
   84 	case '}':
   85 		return jsCtxRegexp
   86 	default:
   87 		// Look for an IdentifierName and see if it is a keyword that
   88 		// can precede a regular expression.
   89 		j := n
   90 		for j > 0 && isJSIdentPart(rune(s[j-1])) {
   91 			j--
   92 		}
   93 		if regexpPrecederKeywords[string(s[j:])] {
   94 			return jsCtxRegexp
   95 		}
   96 	}
   97 	// Otherwise is a punctuator not listed above, or
   98 	// a string which precedes a div op, or an identifier
   99 	// which precedes a div op.
  100 	return jsCtxDivOp
  101 }
  102 
  103 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
  104 // regular expression in JS source.
  105 var regexpPrecederKeywords = map[string]bool{
  106 	"break":      true,
  107 	"case":       true,
  108 	"continue":   true,
  109 	"delete":     true,
  110 	"do":         true,
  111 	"else":       true,
  112 	"finally":    true,
  113 	"in":         true,
  114 	"instanceof": true,
  115 	"return":     true,
  116 	"throw":      true,
  117 	"try":        true,
  118 	"typeof":     true,
  119 	"void":       true,
  120 }
  121 
  122 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
  123 
  124 // indirectToJSONMarshaler returns the value, after dereferencing as many times
  125 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
  126 func indirectToJSONMarshaler(a any) any {
  127 	// text/template now supports passing untyped nil as a func call
  128 	// argument, so we must support it. Otherwise we'd panic below, as one
  129 	// cannot call the Type or Interface methods on an invalid
  130 	// reflect.Value. See golang.org/issue/18716.
  131 	if a == nil {
  132 		return nil
  133 	}
  134 
  135 	v := reflect.ValueOf(a)
  136 	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
  137 		v = v.Elem()
  138 	}
  139 	return v.Interface()
  140 }
  141 
  142 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
  143 // neither side-effects nor free variables outside (NaN, Infinity).
  144 func jsValEscaper(args ...any) string {
  145 	var a any
  146 	if len(args) == 1 {
  147 		a = indirectToJSONMarshaler(args[0])
  148 		switch t := a.(type) {
  149 		case htmltemplate.JS:
  150 			return string(t)
  151 		case htmltemplate.JSStr:
  152 			// TODO: normalize quotes.
  153 			return `"` + string(t) + `"`
  154 		case json.Marshaler:
  155 			// Do not treat as a Stringer.
  156 		case fmt.Stringer:
  157 			a = t.String()
  158 		}
  159 	} else {
  160 		for i, arg := range args {
  161 			args[i] = indirectToJSONMarshaler(arg)
  162 		}
  163 		a = fmt.Sprint(args...)
  164 	}
  165 	// TODO: detect cycles before calling Marshal which loops infinitely on
  166 	// cyclic data. This may be an unacceptable DoS risk.
  167 	b, err := json.Marshal(a)
  168 	if err != nil {
  169 		// Put a space before comment so that if it is flush against
  170 		// a division operator it is not turned into a line comment:
  171 		//     x/{{y}}
  172 		// turning into
  173 		//     x//* error marshaling y:
  174 		//          second line of error message */null
  175 		return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
  176 	}
  177 
  178 	// TODO: maybe post-process output to prevent it from containing
  179 	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
  180 	// in case custom marshalers produce output containing those.
  181 	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
  182 	// supports ld+json content-type.
  183 	if len(b) == 0 {
  184 		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
  185 		// not cause the output `x=y/*z`.
  186 		return " null "
  187 	}
  188 	first, _ := utf8.DecodeRune(b)
  189 	last, _ := utf8.DecodeLastRune(b)
  190 	var buf strings.Builder
  191 	// Prevent IdentifierNames and NumericLiterals from running into
  192 	// keywords: in, instanceof, typeof, void
  193 	pad := isJSIdentPart(first) || isJSIdentPart(last)
  194 	if pad {
  195 		buf.WriteByte(' ')
  196 	}
  197 	written := 0
  198 	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
  199 	// so it falls within the subset of JSON which is valid JS.
  200 	for i := 0; i < len(b); {
  201 		rune, n := utf8.DecodeRune(b[i:])
  202 		repl := ""
  203 		if rune == 0x2028 {
  204 			repl = `\u2028`
  205 		} else if rune == 0x2029 {
  206 			repl = `\u2029`
  207 		}
  208 		if repl != "" {
  209 			buf.Write(b[written:i])
  210 			buf.WriteString(repl)
  211 			written = i + n
  212 		}
  213 		i += n
  214 	}
  215 	if buf.Len() != 0 {
  216 		buf.Write(b[written:])
  217 		if pad {
  218 			buf.WriteByte(' ')
  219 		}
  220 		return buf.String()
  221 	}
  222 	return string(b)
  223 }
  224 
  225 // jsStrEscaper produces a string that can be included between quotes in
  226 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
  227 // or in an HTML5 event handler attribute such as onclick.
  228 func jsStrEscaper(args ...any) string {
  229 	s, t := stringify(args...)
  230 	if t == contentTypeJSStr {
  231 		return replace(s, jsStrNormReplacementTable)
  232 	}
  233 	return replace(s, jsStrReplacementTable)
  234 }
  235 
  236 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
  237 // specials so the result is treated literally when included in a regular
  238 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
  239 // the literal text of {{.X}} followed by the string "bar".
  240 func jsRegexpEscaper(args ...any) string {
  241 	s, _ := stringify(args...)
  242 	s = replace(s, jsRegexpReplacementTable)
  243 	if s == "" {
  244 		// /{{.X}}/ should not produce a line comment when .X == "".
  245 		return "(?:)"
  246 	}
  247 	return s
  248 }
  249 
  250 // replace replaces each rune r of s with replacementTable[r], provided that
  251 // r < len(replacementTable). If replacementTable[r] is the empty string then
  252 // no replacement is made.
  253 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
  254 // `\u2029`.
  255 func replace(s string, replacementTable []string) string {
  256 	var b strings.Builder
  257 	r, w, written := rune(0), 0, 0
  258 	for i := 0; i < len(s); i += w {
  259 		// See comment in htmlEscaper.
  260 		r, w = utf8.DecodeRuneInString(s[i:])
  261 		var repl string
  262 		switch {
  263 		case int(r) < len(lowUnicodeReplacementTable):
  264 			repl = lowUnicodeReplacementTable[r]
  265 		case int(r) < len(replacementTable) && replacementTable[r] != "":
  266 			repl = replacementTable[r]
  267 		case r == '\u2028':
  268 			repl = `\u2028`
  269 		case r == '\u2029':
  270 			repl = `\u2029`
  271 		default:
  272 			continue
  273 		}
  274 		if written == 0 {
  275 			b.Grow(len(s))
  276 		}
  277 		b.WriteString(s[written:i])
  278 		b.WriteString(repl)
  279 		written = i + w
  280 	}
  281 	if written == 0 {
  282 		return s
  283 	}
  284 	b.WriteString(s[written:])
  285 	return b.String()
  286 }
  287 
  288 var lowUnicodeReplacementTable = []string{
  289 	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
  290 	'\a': `\u0007`,
  291 	'\b': `\u0008`,
  292 	'\t': `\t`,
  293 	'\n': `\n`,
  294 	'\v': `\u000b`, // "\v" == "v" on IE 6.
  295 	'\f': `\f`,
  296 	'\r': `\r`,
  297 	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
  298 	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
  299 	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
  300 }
  301 
  302 var jsStrReplacementTable = []string{
  303 	0:    `\u0000`,
  304 	'\t': `\t`,
  305 	'\n': `\n`,
  306 	'\v': `\u000b`, // "\v" == "v" on IE 6.
  307 	'\f': `\f`,
  308 	'\r': `\r`,
  309 	// Encode HTML specials as hex so the output can be embedded
  310 	// in HTML attributes without further encoding.
  311 	'"':  `\u0022`,
  312 	'&':  `\u0026`,
  313 	'\'': `\u0027`,
  314 	'+':  `\u002b`,
  315 	'/':  `\/`,
  316 	'<':  `\u003c`,
  317 	'>':  `\u003e`,
  318 	'\\': `\\`,
  319 }
  320 
  321 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
  322 // overencode existing escapes since this table has no entry for `\`.
  323 var jsStrNormReplacementTable = []string{
  324 	0:    `\u0000`,
  325 	'\t': `\t`,
  326 	'\n': `\n`,
  327 	'\v': `\u000b`, // "\v" == "v" on IE 6.
  328 	'\f': `\f`,
  329 	'\r': `\r`,
  330 	// Encode HTML specials as hex so the output can be embedded
  331 	// in HTML attributes without further encoding.
  332 	'"':  `\u0022`,
  333 	'&':  `\u0026`,
  334 	'\'': `\u0027`,
  335 	'+':  `\u002b`,
  336 	'/':  `\/`,
  337 	'<':  `\u003c`,
  338 	'>':  `\u003e`,
  339 }
  340 var jsRegexpReplacementTable = []string{
  341 	0:    `\u0000`,
  342 	'\t': `\t`,
  343 	'\n': `\n`,
  344 	'\v': `\u000b`, // "\v" == "v" on IE 6.
  345 	'\f': `\f`,
  346 	'\r': `\r`,
  347 	// Encode HTML specials as hex so the output can be embedded
  348 	// in HTML attributes without further encoding.
  349 	'"':  `\u0022`,
  350 	'$':  `\$`,
  351 	'&':  `\u0026`,
  352 	'\'': `\u0027`,
  353 	'(':  `\(`,
  354 	')':  `\)`,
  355 	'*':  `\*`,
  356 	'+':  `\u002b`,
  357 	'-':  `\-`,
  358 	'.':  `\.`,
  359 	'/':  `\/`,
  360 	'<':  `\u003c`,
  361 	'>':  `\u003e`,
  362 	'?':  `\?`,
  363 	'[':  `\[`,
  364 	'\\': `\\`,
  365 	']':  `\]`,
  366 	'^':  `\^`,
  367 	'{':  `\{`,
  368 	'|':  `\|`,
  369 	'}':  `\}`,
  370 }
  371 
  372 // isJSIdentPart reports whether the given rune is a JS identifier part.
  373 // It does not handle all the non-Latin letters, joiners, and combining marks,
  374 // but it does handle every codepoint that can occur in a numeric literal or
  375 // a keyword.
  376 func isJSIdentPart(r rune) bool {
  377 	switch {
  378 	case r == '$':
  379 		return true
  380 	case '0' <= r && r <= '9':
  381 		return true
  382 	case 'A' <= r && r <= 'Z':
  383 		return true
  384 	case r == '_':
  385 		return true
  386 	case 'a' <= r && r <= 'z':
  387 		return true
  388 	}
  389 	return false
  390 }
  391 
  392 // isJSType reports whether the given MIME type should be considered JavaScript.
  393 //
  394 // It is used to determine whether a script tag with a type attribute is a javascript container.
  395 func isJSType(mimeType string) bool {
  396 	// per
  397 	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
  398 	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
  399 	//   https://tools.ietf.org/html/rfc4329#section-3
  400 	//   https://www.ietf.org/rfc/rfc4627.txt
  401 	// discard parameters
  402 	mimeType, _, _ = strings.Cut(mimeType, ";")
  403 	mimeType = strings.ToLower(mimeType)
  404 	mimeType = strings.TrimSpace(mimeType)
  405 	switch mimeType {
  406 	case
  407 		"application/ecmascript",
  408 		"application/javascript",
  409 		"application/json",
  410 		"application/ld+json",
  411 		"application/x-ecmascript",
  412 		"application/x-javascript",
  413 		"module",
  414 		"text/ecmascript",
  415 		"text/javascript",
  416 		"text/javascript1.0",
  417 		"text/javascript1.1",
  418 		"text/javascript1.2",
  419 		"text/javascript1.3",
  420 		"text/javascript1.4",
  421 		"text/javascript1.5",
  422 		"text/jscript",
  423 		"text/livescript",
  424 		"text/x-ecmascript",
  425 		"text/x-javascript":
  426 		return true
  427 	default:
  428 		return false
  429 	}
  430 }