js.go (12527B)
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 htmltemplate "html/template" 12 "reflect" 13 "strings" 14 "unicode/utf8" 15 ) 16 17 // nextJSCtx returns the context that determines whether a slash after the 18 // given run of tokens starts a regular expression instead of a division 19 // operator: / or /=. 20 // 21 // This assumes that the token run does not include any string tokens, comment 22 // tokens, regular expression literal tokens, or division operators. 23 // 24 // This fails on some valid but nonsensical JavaScript programs like 25 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 26 // fail on any known useful programs. It is based on the draft 27 // JavaScript 2.0 lexical grammar and requires one token of lookbehind: 28 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 29 func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 30 s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029") 31 if len(s) == 0 { 32 return preceding 33 } 34 35 // All cases below are in the single-byte UTF-8 group. 36 switch c, n := s[len(s)-1], len(s); c { 37 case '+', '-': 38 // ++ and -- are not regexp preceders, but + and - are whether 39 // they are used as infix or prefix operators. 40 start := n - 1 41 // Count the number of adjacent dashes or pluses. 42 for start > 0 && s[start-1] == c { 43 start-- 44 } 45 if (n-start)&1 == 1 { 46 // Reached for trailing minus signs since "---" is the 47 // same as "-- -". 48 return jsCtxRegexp 49 } 50 return jsCtxDivOp 51 case '.': 52 // Handle "42." 53 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 54 return jsCtxDivOp 55 } 56 return jsCtxRegexp 57 // Suffixes for all punctuators from section 7.7 of the language spec 58 // that only end binary operators not handled above. 59 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 60 return jsCtxRegexp 61 // Suffixes for all punctuators from section 7.7 of the language spec 62 // that are prefix operators not handled above. 63 case '!', '~': 64 return jsCtxRegexp 65 // Matches all the punctuators from section 7.7 of the language spec 66 // that are open brackets not handled above. 67 case '(', '[': 68 return jsCtxRegexp 69 // Matches all the punctuators from section 7.7 of the language spec 70 // that precede expression starts. 71 case ':', ';', '{': 72 return jsCtxRegexp 73 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 74 // are handled in the default except for '}' which can precede a 75 // division op as in 76 // ({ valueOf: function () { return 42 } } / 2 77 // which is valid, but, in practice, developers don't divide object 78 // literals, so our heuristic works well for code like 79 // function () { ... } /foo/.test(x) && sideEffect(); 80 // The ')' punctuator can precede a regular expression as in 81 // if (b) /foo/.test(x) && ... 82 // but this is much less likely than 83 // (a + b) / c 84 case '}': 85 return jsCtxRegexp 86 default: 87 // Look for an IdentifierName and see if it is a keyword that 88 // can precede a regular expression. 89 j := n 90 for j > 0 && isJSIdentPart(rune(s[j-1])) { 91 j-- 92 } 93 if regexpPrecederKeywords[string(s[j:])] { 94 return jsCtxRegexp 95 } 96 } 97 // Otherwise is a punctuator not listed above, or 98 // a string which precedes a div op, or an identifier 99 // which precedes a div op. 100 return jsCtxDivOp 101 } 102 103 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a 104 // regular expression in JS source. 105 var regexpPrecederKeywords = map[string]bool{ 106 "break": true, 107 "case": true, 108 "continue": true, 109 "delete": true, 110 "do": true, 111 "else": true, 112 "finally": true, 113 "in": true, 114 "instanceof": true, 115 "return": true, 116 "throw": true, 117 "try": true, 118 "typeof": true, 119 "void": true, 120 } 121 122 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() 123 124 // indirectToJSONMarshaler returns the value, after dereferencing as many times 125 // as necessary to reach the base type (or nil) or an implementation of json.Marshal. 126 func indirectToJSONMarshaler(a any) any { 127 // text/template now supports passing untyped nil as a func call 128 // argument, so we must support it. Otherwise we'd panic below, as one 129 // cannot call the Type or Interface methods on an invalid 130 // reflect.Value. See golang.org/issue/18716. 131 if a == nil { 132 return nil 133 } 134 135 v := reflect.ValueOf(a) 136 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { 137 v = v.Elem() 138 } 139 return v.Interface() 140 } 141 142 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 143 // neither side-effects nor free variables outside (NaN, Infinity). 144 func jsValEscaper(args ...any) string { 145 var a any 146 if len(args) == 1 { 147 a = indirectToJSONMarshaler(args[0]) 148 switch t := a.(type) { 149 case htmltemplate.JS: 150 return string(t) 151 case htmltemplate.JSStr: 152 // TODO: normalize quotes. 153 return `"` + string(t) + `"` 154 case json.Marshaler: 155 // Do not treat as a Stringer. 156 case fmt.Stringer: 157 a = t.String() 158 } 159 } else { 160 for i, arg := range args { 161 args[i] = indirectToJSONMarshaler(arg) 162 } 163 a = fmt.Sprint(args...) 164 } 165 // TODO: detect cycles before calling Marshal which loops infinitely on 166 // cyclic data. This may be an unacceptable DoS risk. 167 b, err := json.Marshal(a) 168 if err != nil { 169 // Put a space before comment so that if it is flush against 170 // a division operator it is not turned into a line comment: 171 // x/{{y}} 172 // turning into 173 // x//* error marshaling y: 174 // second line of error message */null 175 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /")) 176 } 177 178 // TODO: maybe post-process output to prevent it from containing 179 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 180 // in case custom marshalers produce output containing those. 181 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper 182 // supports ld+json content-type. 183 if len(b) == 0 { 184 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 185 // not cause the output `x=y/*z`. 186 return " null " 187 } 188 first, _ := utf8.DecodeRune(b) 189 last, _ := utf8.DecodeLastRune(b) 190 var buf strings.Builder 191 // Prevent IdentifierNames and NumericLiterals from running into 192 // keywords: in, instanceof, typeof, void 193 pad := isJSIdentPart(first) || isJSIdentPart(last) 194 if pad { 195 buf.WriteByte(' ') 196 } 197 written := 0 198 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 199 // so it falls within the subset of JSON which is valid JS. 200 for i := 0; i < len(b); { 201 rune, n := utf8.DecodeRune(b[i:]) 202 repl := "" 203 if rune == 0x2028 { 204 repl = `\u2028` 205 } else if rune == 0x2029 { 206 repl = `\u2029` 207 } 208 if repl != "" { 209 buf.Write(b[written:i]) 210 buf.WriteString(repl) 211 written = i + n 212 } 213 i += n 214 } 215 if buf.Len() != 0 { 216 buf.Write(b[written:]) 217 if pad { 218 buf.WriteByte(' ') 219 } 220 return buf.String() 221 } 222 return string(b) 223 } 224 225 // jsStrEscaper produces a string that can be included between quotes in 226 // JavaScript source, in JavaScript embedded in an HTML5 <script> element, 227 // or in an HTML5 event handler attribute such as onclick. 228 func jsStrEscaper(args ...any) string { 229 s, t := stringify(args...) 230 if t == contentTypeJSStr { 231 return replace(s, jsStrNormReplacementTable) 232 } 233 return replace(s, jsStrReplacementTable) 234 } 235 236 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 237 // specials so the result is treated literally when included in a regular 238 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 239 // the literal text of {{.X}} followed by the string "bar". 240 func jsRegexpEscaper(args ...any) string { 241 s, _ := stringify(args...) 242 s = replace(s, jsRegexpReplacementTable) 243 if s == "" { 244 // /{{.X}}/ should not produce a line comment when .X == "". 245 return "(?:)" 246 } 247 return s 248 } 249 250 // replace replaces each rune r of s with replacementTable[r], provided that 251 // r < len(replacementTable). If replacementTable[r] is the empty string then 252 // no replacement is made. 253 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 254 // `\u2029`. 255 func replace(s string, replacementTable []string) string { 256 var b strings.Builder 257 r, w, written := rune(0), 0, 0 258 for i := 0; i < len(s); i += w { 259 // See comment in htmlEscaper. 260 r, w = utf8.DecodeRuneInString(s[i:]) 261 var repl string 262 switch { 263 case int(r) < len(lowUnicodeReplacementTable): 264 repl = lowUnicodeReplacementTable[r] 265 case int(r) < len(replacementTable) && replacementTable[r] != "": 266 repl = replacementTable[r] 267 case r == '\u2028': 268 repl = `\u2028` 269 case r == '\u2029': 270 repl = `\u2029` 271 default: 272 continue 273 } 274 if written == 0 { 275 b.Grow(len(s)) 276 } 277 b.WriteString(s[written:i]) 278 b.WriteString(repl) 279 written = i + w 280 } 281 if written == 0 { 282 return s 283 } 284 b.WriteString(s[written:]) 285 return b.String() 286 } 287 288 var lowUnicodeReplacementTable = []string{ 289 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, 290 '\a': `\u0007`, 291 '\b': `\u0008`, 292 '\t': `\t`, 293 '\n': `\n`, 294 '\v': `\u000b`, // "\v" == "v" on IE 6. 295 '\f': `\f`, 296 '\r': `\r`, 297 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, 298 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, 299 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, 300 } 301 302 var jsStrReplacementTable = []string{ 303 0: `\u0000`, 304 '\t': `\t`, 305 '\n': `\n`, 306 '\v': `\u000b`, // "\v" == "v" on IE 6. 307 '\f': `\f`, 308 '\r': `\r`, 309 // Encode HTML specials as hex so the output can be embedded 310 // in HTML attributes without further encoding. 311 '"': `\u0022`, 312 '&': `\u0026`, 313 '\'': `\u0027`, 314 '+': `\u002b`, 315 '/': `\/`, 316 '<': `\u003c`, 317 '>': `\u003e`, 318 '\\': `\\`, 319 } 320 321 // jsStrNormReplacementTable is like jsStrReplacementTable but does not 322 // overencode existing escapes since this table has no entry for `\`. 323 var jsStrNormReplacementTable = []string{ 324 0: `\u0000`, 325 '\t': `\t`, 326 '\n': `\n`, 327 '\v': `\u000b`, // "\v" == "v" on IE 6. 328 '\f': `\f`, 329 '\r': `\r`, 330 // Encode HTML specials as hex so the output can be embedded 331 // in HTML attributes without further encoding. 332 '"': `\u0022`, 333 '&': `\u0026`, 334 '\'': `\u0027`, 335 '+': `\u002b`, 336 '/': `\/`, 337 '<': `\u003c`, 338 '>': `\u003e`, 339 } 340 var jsRegexpReplacementTable = []string{ 341 0: `\u0000`, 342 '\t': `\t`, 343 '\n': `\n`, 344 '\v': `\u000b`, // "\v" == "v" on IE 6. 345 '\f': `\f`, 346 '\r': `\r`, 347 // Encode HTML specials as hex so the output can be embedded 348 // in HTML attributes without further encoding. 349 '"': `\u0022`, 350 '$': `\$`, 351 '&': `\u0026`, 352 '\'': `\u0027`, 353 '(': `\(`, 354 ')': `\)`, 355 '*': `\*`, 356 '+': `\u002b`, 357 '-': `\-`, 358 '.': `\.`, 359 '/': `\/`, 360 '<': `\u003c`, 361 '>': `\u003e`, 362 '?': `\?`, 363 '[': `\[`, 364 '\\': `\\`, 365 ']': `\]`, 366 '^': `\^`, 367 '{': `\{`, 368 '|': `\|`, 369 '}': `\}`, 370 } 371 372 // isJSIdentPart reports whether the given rune is a JS identifier part. 373 // It does not handle all the non-Latin letters, joiners, and combining marks, 374 // but it does handle every codepoint that can occur in a numeric literal or 375 // a keyword. 376 func isJSIdentPart(r rune) bool { 377 switch { 378 case r == '$': 379 return true 380 case '0' <= r && r <= '9': 381 return true 382 case 'A' <= r && r <= 'Z': 383 return true 384 case r == '_': 385 return true 386 case 'a' <= r && r <= 'z': 387 return true 388 } 389 return false 390 } 391 392 // isJSType reports whether the given MIME type should be considered JavaScript. 393 // 394 // It is used to determine whether a script tag with a type attribute is a javascript container. 395 func isJSType(mimeType string) bool { 396 // per 397 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 398 // https://tools.ietf.org/html/rfc7231#section-3.1.1 399 // https://tools.ietf.org/html/rfc4329#section-3 400 // https://www.ietf.org/rfc/rfc4627.txt 401 // discard parameters 402 mimeType, _, _ = strings.Cut(mimeType, ";") 403 mimeType = strings.ToLower(mimeType) 404 mimeType = strings.TrimSpace(mimeType) 405 switch mimeType { 406 case 407 "application/ecmascript", 408 "application/javascript", 409 "application/json", 410 "application/ld+json", 411 "application/x-ecmascript", 412 "application/x-javascript", 413 "module", 414 "text/ecmascript", 415 "text/javascript", 416 "text/javascript1.0", 417 "text/javascript1.1", 418 "text/javascript1.2", 419 "text/javascript1.3", 420 "text/javascript1.4", 421 "text/javascript1.5", 422 "text/jscript", 423 "text/livescript", 424 "text/x-ecmascript", 425 "text/x-javascript": 426 return true 427 default: 428 return false 429 } 430 }