js.go (12527B)
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "encoding/json"
10 "fmt"
11 htmltemplate "html/template"
12 "reflect"
13 "strings"
14 "unicode/utf8"
15 )
16
17 // nextJSCtx returns the context that determines whether a slash after the
18 // given run of tokens starts a regular expression instead of a division
19 // operator: / or /=.
20 //
21 // This assumes that the token run does not include any string tokens, comment
22 // tokens, regular expression literal tokens, or division operators.
23 //
24 // This fails on some valid but nonsensical JavaScript programs like
25 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
26 // fail on any known useful programs. It is based on the draft
27 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
28 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
29 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
30 s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
31 if len(s) == 0 {
32 return preceding
33 }
34
35 // All cases below are in the single-byte UTF-8 group.
36 switch c, n := s[len(s)-1], len(s); c {
37 case '+', '-':
38 // ++ and -- are not regexp preceders, but + and - are whether
39 // they are used as infix or prefix operators.
40 start := n - 1
41 // Count the number of adjacent dashes or pluses.
42 for start > 0 && s[start-1] == c {
43 start--
44 }
45 if (n-start)&1 == 1 {
46 // Reached for trailing minus signs since "---" is the
47 // same as "-- -".
48 return jsCtxRegexp
49 }
50 return jsCtxDivOp
51 case '.':
52 // Handle "42."
53 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
54 return jsCtxDivOp
55 }
56 return jsCtxRegexp
57 // Suffixes for all punctuators from section 7.7 of the language spec
58 // that only end binary operators not handled above.
59 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
60 return jsCtxRegexp
61 // Suffixes for all punctuators from section 7.7 of the language spec
62 // that are prefix operators not handled above.
63 case '!', '~':
64 return jsCtxRegexp
65 // Matches all the punctuators from section 7.7 of the language spec
66 // that are open brackets not handled above.
67 case '(', '[':
68 return jsCtxRegexp
69 // Matches all the punctuators from section 7.7 of the language spec
70 // that precede expression starts.
71 case ':', ';', '{':
72 return jsCtxRegexp
73 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
74 // are handled in the default except for '}' which can precede a
75 // division op as in
76 // ({ valueOf: function () { return 42 } } / 2
77 // which is valid, but, in practice, developers don't divide object
78 // literals, so our heuristic works well for code like
79 // function () { ... } /foo/.test(x) && sideEffect();
80 // The ')' punctuator can precede a regular expression as in
81 // if (b) /foo/.test(x) && ...
82 // but this is much less likely than
83 // (a + b) / c
84 case '}':
85 return jsCtxRegexp
86 default:
87 // Look for an IdentifierName and see if it is a keyword that
88 // can precede a regular expression.
89 j := n
90 for j > 0 && isJSIdentPart(rune(s[j-1])) {
91 j--
92 }
93 if regexpPrecederKeywords[string(s[j:])] {
94 return jsCtxRegexp
95 }
96 }
97 // Otherwise is a punctuator not listed above, or
98 // a string which precedes a div op, or an identifier
99 // which precedes a div op.
100 return jsCtxDivOp
101 }
102
103 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
104 // regular expression in JS source.
105 var regexpPrecederKeywords = map[string]bool{
106 "break": true,
107 "case": true,
108 "continue": true,
109 "delete": true,
110 "do": true,
111 "else": true,
112 "finally": true,
113 "in": true,
114 "instanceof": true,
115 "return": true,
116 "throw": true,
117 "try": true,
118 "typeof": true,
119 "void": true,
120 }
121
122 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
123
124 // indirectToJSONMarshaler returns the value, after dereferencing as many times
125 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
126 func indirectToJSONMarshaler(a any) any {
127 // text/template now supports passing untyped nil as a func call
128 // argument, so we must support it. Otherwise we'd panic below, as one
129 // cannot call the Type or Interface methods on an invalid
130 // reflect.Value. See golang.org/issue/18716.
131 if a == nil {
132 return nil
133 }
134
135 v := reflect.ValueOf(a)
136 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
137 v = v.Elem()
138 }
139 return v.Interface()
140 }
141
142 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
143 // neither side-effects nor free variables outside (NaN, Infinity).
144 func jsValEscaper(args ...any) string {
145 var a any
146 if len(args) == 1 {
147 a = indirectToJSONMarshaler(args[0])
148 switch t := a.(type) {
149 case htmltemplate.JS:
150 return string(t)
151 case htmltemplate.JSStr:
152 // TODO: normalize quotes.
153 return `"` + string(t) + `"`
154 case json.Marshaler:
155 // Do not treat as a Stringer.
156 case fmt.Stringer:
157 a = t.String()
158 }
159 } else {
160 for i, arg := range args {
161 args[i] = indirectToJSONMarshaler(arg)
162 }
163 a = fmt.Sprint(args...)
164 }
165 // TODO: detect cycles before calling Marshal which loops infinitely on
166 // cyclic data. This may be an unacceptable DoS risk.
167 b, err := json.Marshal(a)
168 if err != nil {
169 // Put a space before comment so that if it is flush against
170 // a division operator it is not turned into a line comment:
171 // x/{{y}}
172 // turning into
173 // x//* error marshaling y:
174 // second line of error message */null
175 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
176 }
177
178 // TODO: maybe post-process output to prevent it from containing
179 // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
180 // in case custom marshalers produce output containing those.
181 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
182 // supports ld+json content-type.
183 if len(b) == 0 {
184 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
185 // not cause the output `x=y/*z`.
186 return " null "
187 }
188 first, _ := utf8.DecodeRune(b)
189 last, _ := utf8.DecodeLastRune(b)
190 var buf strings.Builder
191 // Prevent IdentifierNames and NumericLiterals from running into
192 // keywords: in, instanceof, typeof, void
193 pad := isJSIdentPart(first) || isJSIdentPart(last)
194 if pad {
195 buf.WriteByte(' ')
196 }
197 written := 0
198 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
199 // so it falls within the subset of JSON which is valid JS.
200 for i := 0; i < len(b); {
201 rune, n := utf8.DecodeRune(b[i:])
202 repl := ""
203 if rune == 0x2028 {
204 repl = `\u2028`
205 } else if rune == 0x2029 {
206 repl = `\u2029`
207 }
208 if repl != "" {
209 buf.Write(b[written:i])
210 buf.WriteString(repl)
211 written = i + n
212 }
213 i += n
214 }
215 if buf.Len() != 0 {
216 buf.Write(b[written:])
217 if pad {
218 buf.WriteByte(' ')
219 }
220 return buf.String()
221 }
222 return string(b)
223 }
224
225 // jsStrEscaper produces a string that can be included between quotes in
226 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
227 // or in an HTML5 event handler attribute such as onclick.
228 func jsStrEscaper(args ...any) string {
229 s, t := stringify(args...)
230 if t == contentTypeJSStr {
231 return replace(s, jsStrNormReplacementTable)
232 }
233 return replace(s, jsStrReplacementTable)
234 }
235
236 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
237 // specials so the result is treated literally when included in a regular
238 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
239 // the literal text of {{.X}} followed by the string "bar".
240 func jsRegexpEscaper(args ...any) string {
241 s, _ := stringify(args...)
242 s = replace(s, jsRegexpReplacementTable)
243 if s == "" {
244 // /{{.X}}/ should not produce a line comment when .X == "".
245 return "(?:)"
246 }
247 return s
248 }
249
250 // replace replaces each rune r of s with replacementTable[r], provided that
251 // r < len(replacementTable). If replacementTable[r] is the empty string then
252 // no replacement is made.
253 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
254 // `\u2029`.
255 func replace(s string, replacementTable []string) string {
256 var b strings.Builder
257 r, w, written := rune(0), 0, 0
258 for i := 0; i < len(s); i += w {
259 // See comment in htmlEscaper.
260 r, w = utf8.DecodeRuneInString(s[i:])
261 var repl string
262 switch {
263 case int(r) < len(lowUnicodeReplacementTable):
264 repl = lowUnicodeReplacementTable[r]
265 case int(r) < len(replacementTable) && replacementTable[r] != "":
266 repl = replacementTable[r]
267 case r == '\u2028':
268 repl = `\u2028`
269 case r == '\u2029':
270 repl = `\u2029`
271 default:
272 continue
273 }
274 if written == 0 {
275 b.Grow(len(s))
276 }
277 b.WriteString(s[written:i])
278 b.WriteString(repl)
279 written = i + w
280 }
281 if written == 0 {
282 return s
283 }
284 b.WriteString(s[written:])
285 return b.String()
286 }
287
288 var lowUnicodeReplacementTable = []string{
289 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
290 '\a': `\u0007`,
291 '\b': `\u0008`,
292 '\t': `\t`,
293 '\n': `\n`,
294 '\v': `\u000b`, // "\v" == "v" on IE 6.
295 '\f': `\f`,
296 '\r': `\r`,
297 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
298 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
299 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
300 }
301
302 var jsStrReplacementTable = []string{
303 0: `\u0000`,
304 '\t': `\t`,
305 '\n': `\n`,
306 '\v': `\u000b`, // "\v" == "v" on IE 6.
307 '\f': `\f`,
308 '\r': `\r`,
309 // Encode HTML specials as hex so the output can be embedded
310 // in HTML attributes without further encoding.
311 '"': `\u0022`,
312 '&': `\u0026`,
313 '\'': `\u0027`,
314 '+': `\u002b`,
315 '/': `\/`,
316 '<': `\u003c`,
317 '>': `\u003e`,
318 '\\': `\\`,
319 }
320
321 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
322 // overencode existing escapes since this table has no entry for `\`.
323 var jsStrNormReplacementTable = []string{
324 0: `\u0000`,
325 '\t': `\t`,
326 '\n': `\n`,
327 '\v': `\u000b`, // "\v" == "v" on IE 6.
328 '\f': `\f`,
329 '\r': `\r`,
330 // Encode HTML specials as hex so the output can be embedded
331 // in HTML attributes without further encoding.
332 '"': `\u0022`,
333 '&': `\u0026`,
334 '\'': `\u0027`,
335 '+': `\u002b`,
336 '/': `\/`,
337 '<': `\u003c`,
338 '>': `\u003e`,
339 }
340 var jsRegexpReplacementTable = []string{
341 0: `\u0000`,
342 '\t': `\t`,
343 '\n': `\n`,
344 '\v': `\u000b`, // "\v" == "v" on IE 6.
345 '\f': `\f`,
346 '\r': `\r`,
347 // Encode HTML specials as hex so the output can be embedded
348 // in HTML attributes without further encoding.
349 '"': `\u0022`,
350 '$': `\$`,
351 '&': `\u0026`,
352 '\'': `\u0027`,
353 '(': `\(`,
354 ')': `\)`,
355 '*': `\*`,
356 '+': `\u002b`,
357 '-': `\-`,
358 '.': `\.`,
359 '/': `\/`,
360 '<': `\u003c`,
361 '>': `\u003e`,
362 '?': `\?`,
363 '[': `\[`,
364 '\\': `\\`,
365 ']': `\]`,
366 '^': `\^`,
367 '{': `\{`,
368 '|': `\|`,
369 '}': `\}`,
370 }
371
372 // isJSIdentPart reports whether the given rune is a JS identifier part.
373 // It does not handle all the non-Latin letters, joiners, and combining marks,
374 // but it does handle every codepoint that can occur in a numeric literal or
375 // a keyword.
376 func isJSIdentPart(r rune) bool {
377 switch {
378 case r == '$':
379 return true
380 case '0' <= r && r <= '9':
381 return true
382 case 'A' <= r && r <= 'Z':
383 return true
384 case r == '_':
385 return true
386 case 'a' <= r && r <= 'z':
387 return true
388 }
389 return false
390 }
391
392 // isJSType reports whether the given MIME type should be considered JavaScript.
393 //
394 // It is used to determine whether a script tag with a type attribute is a javascript container.
395 func isJSType(mimeType string) bool {
396 // per
397 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
398 // https://tools.ietf.org/html/rfc7231#section-3.1.1
399 // https://tools.ietf.org/html/rfc4329#section-3
400 // https://www.ietf.org/rfc/rfc4627.txt
401 // discard parameters
402 mimeType, _, _ = strings.Cut(mimeType, ";")
403 mimeType = strings.ToLower(mimeType)
404 mimeType = strings.TrimSpace(mimeType)
405 switch mimeType {
406 case
407 "application/ecmascript",
408 "application/javascript",
409 "application/json",
410 "application/ld+json",
411 "application/x-ecmascript",
412 "application/x-javascript",
413 "module",
414 "text/ecmascript",
415 "text/javascript",
416 "text/javascript1.0",
417 "text/javascript1.1",
418 "text/javascript1.2",
419 "text/javascript1.3",
420 "text/javascript1.4",
421 "text/javascript1.5",
422 "text/jscript",
423 "text/livescript",
424 "text/x-ecmascript",
425 "text/x-javascript":
426 return true
427 default:
428 return false
429 }
430 }