escape.go (31859B)
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "fmt"
10 "html"
11 "io"
12
13 template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
14 "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
15 )
16
17 // escapeTemplate rewrites the named template, which must be
18 // associated with t, to guarantee that the output of any of the named
19 // templates is properly escaped. If no error is returned, then the named templates have
20 // been modified. Otherwise the named templates have been rendered
21 // unusable.
22 func escapeTemplate(tmpl *Template, node parse.Node, name string) error {
23 c, _ := tmpl.esc.escapeTree(context{}, node, name, 0)
24 var err error
25 if c.err != nil {
26 err, c.err.Name = c.err, name
27 } else if c.state != stateText {
28 err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
29 }
30 if err != nil {
31 // Prevent execution of unsafe templates.
32 if t := tmpl.set[name]; t != nil {
33 t.escapeErr = err
34 t.text.Tree = nil
35 t.Tree = nil
36 }
37 return err
38 }
39 tmpl.esc.commit()
40 if t := tmpl.set[name]; t != nil {
41 t.escapeErr = escapeOK
42 t.Tree = t.text.Tree
43 }
44 return nil
45 }
46
47 // evalArgs formats the list of arguments into a string. It is equivalent to
48 // fmt.Sprint(args...), except that it dereferences all pointers.
49 func evalArgs(args ...any) string {
50 // Optimization for simple common case of a single string argument.
51 if len(args) == 1 {
52 if s, ok := args[0].(string); ok {
53 return s
54 }
55 }
56 for i, arg := range args {
57 args[i] = indirectToStringerOrError(arg)
58 }
59 return fmt.Sprint(args...)
60 }
61
62 // funcMap maps command names to functions that render their inputs safe.
63 var funcMap = template.FuncMap{
64 "_html_template_attrescaper": attrEscaper,
65 "_html_template_commentescaper": commentEscaper,
66 "_html_template_cssescaper": cssEscaper,
67 "_html_template_cssvaluefilter": cssValueFilter,
68 "_html_template_htmlnamefilter": htmlNameFilter,
69 "_html_template_htmlescaper": htmlEscaper,
70 "_html_template_jsregexpescaper": jsRegexpEscaper,
71 "_html_template_jsstrescaper": jsStrEscaper,
72 "_html_template_jsvalescaper": jsValEscaper,
73 "_html_template_nospaceescaper": htmlNospaceEscaper,
74 "_html_template_rcdataescaper": rcdataEscaper,
75 "_html_template_srcsetescaper": srcsetFilterAndEscaper,
76 "_html_template_urlescaper": urlEscaper,
77 "_html_template_urlfilter": urlFilter,
78 "_html_template_urlnormalizer": urlNormalizer,
79 "_eval_args_": evalArgs,
80 }
81
82 // escaper collects type inferences about templates and changes needed to make
83 // templates injection safe.
84 type escaper struct {
85 // ns is the nameSpace that this escaper is associated with.
86 ns *nameSpace
87 // output[templateName] is the output context for a templateName that
88 // has been mangled to include its input context.
89 output map[string]context
90 // derived[c.mangle(name)] maps to a template derived from the template
91 // named name templateName for the start context c.
92 derived map[string]*template.Template
93 // called[templateName] is a set of called mangled template names.
94 called map[string]bool
95 // xxxNodeEdits are the accumulated edits to apply during commit.
96 // Such edits are not applied immediately in case a template set
97 // executes a given template in different escaping contexts.
98 actionNodeEdits map[*parse.ActionNode][]string
99 templateNodeEdits map[*parse.TemplateNode]string
100 textNodeEdits map[*parse.TextNode][]byte
101 // rangeContext holds context about the current range loop.
102 rangeContext *rangeContext
103 }
104
105 // rangeContext holds information about the current range loop.
106 type rangeContext struct {
107 outer *rangeContext // outer loop
108 breaks []context // context at each break action
109 continues []context // context at each continue action
110 }
111
112 // makeEscaper creates a blank escaper for the given set.
113 func makeEscaper(n *nameSpace) escaper {
114 return escaper{
115 n,
116 map[string]context{},
117 map[string]*template.Template{},
118 map[string]bool{},
119 map[*parse.ActionNode][]string{},
120 map[*parse.TemplateNode]string{},
121 map[*parse.TextNode][]byte{},
122 nil,
123 }
124 }
125
126 // filterFailsafe is an innocuous word that is emitted in place of unsafe values
127 // by sanitizer functions. It is not a keyword in any programming language,
128 // contains no special characters, is not empty, and when it appears in output
129 // it is distinct enough that a developer can find the source of the problem
130 // via a search engine.
131 const filterFailsafe = "ZgotmplZ"
132
133 // escape escapes a template node.
134 func (e *escaper) escape(c context, n parse.Node) context {
135 switch n := n.(type) {
136 case *parse.ActionNode:
137 return e.escapeAction(c, n)
138 case *parse.BreakNode:
139 c.n = n
140 e.rangeContext.breaks = append(e.rangeContext.breaks, c)
141 return context{state: stateDead}
142 case *parse.CommentNode:
143 return c
144 case *parse.ContinueNode:
145 c.n = n
146 e.rangeContext.continues = append(e.rangeContext.breaks, c)
147 return context{state: stateDead}
148 case *parse.IfNode:
149 return e.escapeBranch(c, &n.BranchNode, "if")
150 case *parse.ListNode:
151 return e.escapeList(c, n)
152 case *parse.RangeNode:
153 return e.escapeBranch(c, &n.BranchNode, "range")
154 case *parse.TemplateNode:
155 return e.escapeTemplate(c, n)
156 case *parse.TextNode:
157 return e.escapeText(c, n)
158 case *parse.WithNode:
159 return e.escapeBranch(c, &n.BranchNode, "with")
160 }
161 panic("escaping " + n.String() + " is unimplemented")
162 }
163
164 // escapeAction escapes an action template node.
165 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
166 if len(n.Pipe.Decl) != 0 {
167 // A local variable assignment, not an interpolation.
168 return c
169 }
170 c = nudge(c)
171 // Check for disallowed use of predefined escapers in the pipeline.
172 for pos, idNode := range n.Pipe.Cmds {
173 node, ok := idNode.Args[0].(*parse.IdentifierNode)
174 if !ok {
175 // A predefined escaper "esc" will never be found as an identifier in a
176 // Chain or Field node, since:
177 // - "esc.x ..." is invalid, since predefined escapers return strings, and
178 // strings do not have methods, keys or fields.
179 // - "... .esc" is invalid, since predefined escapers are global functions,
180 // not methods or fields of any types.
181 // Therefore, it is safe to ignore these two node types.
182 continue
183 }
184 ident := node.Ident
185 if _, ok := predefinedEscapers[ident]; ok {
186 if pos < len(n.Pipe.Cmds)-1 ||
187 c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
188 return context{
189 state: stateError,
190 err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
191 }
192 }
193 }
194 }
195 s := make([]string, 0, 3)
196 switch c.state {
197 case stateError:
198 return c
199 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
200 switch c.urlPart {
201 case urlPartNone:
202 s = append(s, "_html_template_urlfilter")
203 fallthrough
204 case urlPartPreQuery:
205 switch c.state {
206 case stateCSSDqStr, stateCSSSqStr:
207 s = append(s, "_html_template_cssescaper")
208 default:
209 s = append(s, "_html_template_urlnormalizer")
210 }
211 case urlPartQueryOrFrag:
212 s = append(s, "_html_template_urlescaper")
213 case urlPartUnknown:
214 return context{
215 state: stateError,
216 err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
217 }
218 default:
219 panic(c.urlPart.String())
220 }
221 case stateJS:
222 s = append(s, "_html_template_jsvalescaper")
223 // A slash after a value starts a div operator.
224 c.jsCtx = jsCtxDivOp
225 case stateJSDqStr, stateJSSqStr:
226 s = append(s, "_html_template_jsstrescaper")
227 case stateJSRegexp:
228 s = append(s, "_html_template_jsregexpescaper")
229 case stateCSS:
230 s = append(s, "_html_template_cssvaluefilter")
231 case stateText:
232 s = append(s, "_html_template_htmlescaper")
233 case stateRCDATA:
234 s = append(s, "_html_template_rcdataescaper")
235 case stateAttr:
236 // Handled below in delim check.
237 case stateAttrName, stateTag:
238 c.state = stateAttrName
239 s = append(s, "_html_template_htmlnamefilter")
240 case stateSrcset:
241 s = append(s, "_html_template_srcsetescaper")
242 default:
243 if isComment(c.state) {
244 s = append(s, "_html_template_commentescaper")
245 } else {
246 panic("unexpected state " + c.state.String())
247 }
248 }
249 switch c.delim {
250 case delimNone:
251 // No extra-escaping needed for raw text content.
252 case delimSpaceOrTagEnd:
253 s = append(s, "_html_template_nospaceescaper")
254 default:
255 s = append(s, "_html_template_attrescaper")
256 }
257 e.editActionNode(n, s)
258 return c
259 }
260
261 // ensurePipelineContains ensures that the pipeline ends with the commands with
262 // the identifiers in s in order. If the pipeline ends with a predefined escaper
263 // (i.e. "html" or "urlquery"), merge it with the identifiers in s.
264 func ensurePipelineContains(p *parse.PipeNode, s []string) {
265 if len(s) == 0 {
266 // Do not rewrite pipeline if we have no escapers to insert.
267 return
268 }
269 // Precondition: p.Cmds contains at most one predefined escaper and the
270 // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
271 // always true because of the checks in escapeAction.
272 pipelineLen := len(p.Cmds)
273 if pipelineLen > 0 {
274 lastCmd := p.Cmds[pipelineLen-1]
275 if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
276 if esc := idNode.Ident; predefinedEscapers[esc] {
277 // Pipeline ends with a predefined escaper.
278 if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
279 // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
280 // where esc is the predefined escaper, and arg1...argN are its arguments.
281 // Convert this into the equivalent form
282 // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
283 // merged with the escapers in s.
284 lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
285 p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
286 pipelineLen++
287 }
288 // If any of the commands in s that we are about to insert is equivalent
289 // to the predefined escaper, use the predefined escaper instead.
290 dup := false
291 for i, escaper := range s {
292 if escFnsEq(esc, escaper) {
293 s[i] = idNode.Ident
294 dup = true
295 }
296 }
297 if dup {
298 // The predefined escaper will already be inserted along with the
299 // escapers in s, so do not copy it to the rewritten pipeline.
300 pipelineLen--
301 }
302 }
303 }
304 }
305 // Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
306 newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
307 insertedIdents := make(map[string]bool)
308 for i := 0; i < pipelineLen; i++ {
309 cmd := p.Cmds[i]
310 newCmds[i] = cmd
311 if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
312 insertedIdents[normalizeEscFn(idNode.Ident)] = true
313 }
314 }
315 for _, name := range s {
316 if !insertedIdents[normalizeEscFn(name)] {
317 // When two templates share an underlying parse tree via the use of
318 // AddParseTree and one template is executed after the other, this check
319 // ensures that escapers that were already inserted into the pipeline on
320 // the first escaping pass do not get inserted again.
321 newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
322 }
323 }
324 p.Cmds = newCmds
325 }
326
327 // predefinedEscapers contains template predefined escapers that are equivalent
328 // to some contextual escapers. Keep in sync with equivEscapers.
329 var predefinedEscapers = map[string]bool{
330 "html": true,
331 "urlquery": true,
332 }
333
334 // equivEscapers matches contextual escapers to equivalent predefined
335 // template escapers.
336 var equivEscapers = map[string]string{
337 // The following pairs of HTML escapers provide equivalent security
338 // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'.
339 "_html_template_attrescaper": "html",
340 "_html_template_htmlescaper": "html",
341 "_html_template_rcdataescaper": "html",
342 // These two URL escapers produce URLs safe for embedding in a URL query by
343 // percent-encoding all the reserved characters specified in RFC 3986 Section
344 // 2.2
345 "_html_template_urlescaper": "urlquery",
346 // These two functions are not actually equivalent; urlquery is stricter as it
347 // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer
348 // does not. It is therefore only safe to replace _html_template_urlnormalizer
349 // with urlquery (this happens in ensurePipelineContains), but not the otherI've
350 // way around. We keep this entry around to preserve the behavior of templates
351 // written before Go 1.9, which might depend on this substitution taking place.
352 "_html_template_urlnormalizer": "urlquery",
353 }
354
355 // escFnsEq reports whether the two escaping functions are equivalent.
356 func escFnsEq(a, b string) bool {
357 return normalizeEscFn(a) == normalizeEscFn(b)
358 }
359
360 // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of
361 // escaper functions a and b that are equivalent.
362 func normalizeEscFn(e string) string {
363 if norm := equivEscapers[e]; norm != "" {
364 return norm
365 }
366 return e
367 }
368
369 // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
370 // for all x.
371 var redundantFuncs = map[string]map[string]bool{
372 "_html_template_commentescaper": {
373 "_html_template_attrescaper": true,
374 "_html_template_nospaceescaper": true,
375 "_html_template_htmlescaper": true,
376 },
377 "_html_template_cssescaper": {
378 "_html_template_attrescaper": true,
379 },
380 "_html_template_jsregexpescaper": {
381 "_html_template_attrescaper": true,
382 },
383 "_html_template_jsstrescaper": {
384 "_html_template_attrescaper": true,
385 },
386 "_html_template_urlescaper": {
387 "_html_template_urlnormalizer": true,
388 },
389 }
390
391 // appendCmd appends the given command to the end of the command pipeline
392 // unless it is redundant with the last command.
393 func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
394 if n := len(cmds); n != 0 {
395 last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
396 next, okNext := cmd.Args[0].(*parse.IdentifierNode)
397 if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
398 return cmds
399 }
400 }
401 return append(cmds, cmd)
402 }
403
404 // newIdentCmd produces a command containing a single identifier node.
405 func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
406 return &parse.CommandNode{
407 NodeType: parse.NodeCommand,
408 Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
409 }
410 }
411
412 // nudge returns the context that would result from following empty string
413 // transitions from the input context.
414 // For example, parsing:
415 // `<a href=`
416 // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
417 // `<a href=x`
418 // will end in context{stateURL, delimSpaceOrTagEnd, ...}.
419 // There are two transitions that happen when the 'x' is seen:
420 // (1) Transition from a before-value state to a start-of-value state without
421 // consuming any character.
422 // (2) Consume 'x' and transition past the first value character.
423 // In this case, nudging produces the context after (1) happens.
424 func nudge(c context) context {
425 switch c.state {
426 case stateTag:
427 // In `<foo {{.}}`, the action should emit an attribute.
428 c.state = stateAttrName
429 case stateBeforeValue:
430 // In `<foo bar={{.}}`, the action is an undelimited value.
431 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
432 case stateAfterName:
433 // In `<foo bar {{.}}`, the action is an attribute name.
434 c.state, c.attr = stateAttrName, attrNone
435 }
436 return c
437 }
438
439 // join joins the two contexts of a branch template node. The result is an
440 // error context if either of the input contexts are error contexts, or if the
441 // input contexts differ.
442 func join(a, b context, node parse.Node, nodeName string) context {
443 if a.state == stateError {
444 return a
445 }
446 if b.state == stateError {
447 return b
448 }
449 if a.state == stateDead {
450 return b
451 }
452 if b.state == stateDead {
453 return a
454 }
455 if a.eq(b) {
456 return a
457 }
458
459 c := a
460 c.urlPart = b.urlPart
461 if c.eq(b) {
462 // The contexts differ only by urlPart.
463 c.urlPart = urlPartUnknown
464 return c
465 }
466
467 c = a
468 c.jsCtx = b.jsCtx
469 if c.eq(b) {
470 // The contexts differ only by jsCtx.
471 c.jsCtx = jsCtxUnknown
472 return c
473 }
474
475 // Allow a nudged context to join with an unnudged one.
476 // This means that
477 // <p title={{if .C}}{{.}}{{end}}
478 // ends in an unquoted value state even though the else branch
479 // ends in stateBeforeValue.
480 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
481 if e := join(c, d, node, nodeName); e.state != stateError {
482 return e
483 }
484 }
485
486 return context{
487 state: stateError,
488 err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
489 }
490 }
491
492 // escapeBranch escapes a branch template node: "if", "range" and "with".
493 func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
494 if nodeName == "range" {
495 e.rangeContext = &rangeContext{outer: e.rangeContext}
496 }
497 c0 := e.escapeList(c, n.List)
498 if nodeName == "range" {
499 if c0.state != stateError {
500 c0 = joinRange(c0, e.rangeContext)
501 }
502 e.rangeContext = e.rangeContext.outer
503 if c0.state == stateError {
504 return c0
505 }
506
507 // The "true" branch of a "range" node can execute multiple times.
508 // We check that executing n.List once results in the same context
509 // as executing n.List twice.
510 e.rangeContext = &rangeContext{outer: e.rangeContext}
511 c1, _ := e.escapeListConditionally(c0, n.List, nil)
512 c0 = join(c0, c1, n, nodeName)
513 if c0.state == stateError {
514 e.rangeContext = e.rangeContext.outer
515 // Make clear that this is a problem on loop re-entry
516 // since developers tend to overlook that branch when
517 // debugging templates.
518 c0.err.Line = n.Line
519 c0.err.Description = "on range loop re-entry: " + c0.err.Description
520 return c0
521 }
522 c0 = joinRange(c0, e.rangeContext)
523 e.rangeContext = e.rangeContext.outer
524 if c0.state == stateError {
525 return c0
526 }
527 }
528 c1 := e.escapeList(c, n.ElseList)
529 return join(c0, c1, n, nodeName)
530 }
531
532 func joinRange(c0 context, rc *rangeContext) context {
533 // Merge contexts at break and continue statements into overall body context.
534 // In theory we could treat breaks differently from continues, but for now it is
535 // enough to treat them both as going back to the start of the loop (which may then stop).
536 for _, c := range rc.breaks {
537 c0 = join(c0, c, c.n, "range")
538 if c0.state == stateError {
539 c0.err.Line = c.n.(*parse.BreakNode).Line
540 c0.err.Description = "at range loop break: " + c0.err.Description
541 return c0
542 }
543 }
544 for _, c := range rc.continues {
545 c0 = join(c0, c, c.n, "range")
546 if c0.state == stateError {
547 c0.err.Line = c.n.(*parse.ContinueNode).Line
548 c0.err.Description = "at range loop continue: " + c0.err.Description
549 return c0
550 }
551 }
552 return c0
553 }
554
555 // escapeList escapes a list template node.
556 func (e *escaper) escapeList(c context, n *parse.ListNode) context {
557 if n == nil {
558 return c
559 }
560 for _, m := range n.Nodes {
561 c = e.escape(c, m)
562 if c.state == stateDead {
563 break
564 }
565 }
566 return c
567 }
568
569 // escapeListConditionally escapes a list node but only preserves edits and
570 // inferences in e if the inferences and output context satisfy filter.
571 // It returns the best guess at an output context, and the result of the filter
572 // which is the same as whether e was updated.
573 func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
574 e1 := makeEscaper(e.ns)
575 e1.rangeContext = e.rangeContext
576 // Make type inferences available to f.
577 for k, v := range e.output {
578 e1.output[k] = v
579 }
580 c = e1.escapeList(c, n)
581 ok := filter != nil && filter(&e1, c)
582 if ok {
583 // Copy inferences and edits from e1 back into e.
584 for k, v := range e1.output {
585 e.output[k] = v
586 }
587 for k, v := range e1.derived {
588 e.derived[k] = v
589 }
590 for k, v := range e1.called {
591 e.called[k] = v
592 }
593 for k, v := range e1.actionNodeEdits {
594 e.editActionNode(k, v)
595 }
596 for k, v := range e1.templateNodeEdits {
597 e.editTemplateNode(k, v)
598 }
599 for k, v := range e1.textNodeEdits {
600 e.editTextNode(k, v)
601 }
602 }
603 return c, ok
604 }
605
606 // escapeTemplate escapes a {{template}} call node.
607 func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
608 c, name := e.escapeTree(c, n, n.Name, n.Line)
609 if name != n.Name {
610 e.editTemplateNode(n, name)
611 }
612 return c
613 }
614
615 // escapeTree escapes the named template starting in the given context as
616 // necessary and returns its output context.
617 func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) {
618 // Mangle the template name with the input context to produce a reliable
619 // identifier.
620 dname := c.mangle(name)
621 e.called[dname] = true
622 if out, ok := e.output[dname]; ok {
623 // Already escaped.
624 return out, dname
625 }
626 t := e.template(name)
627 if t == nil {
628 // Two cases: The template exists but is empty, or has never been mentioned at
629 // all. Distinguish the cases in the error messages.
630 if e.ns.set[name] != nil {
631 return context{
632 state: stateError,
633 err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name),
634 }, dname
635 }
636 return context{
637 state: stateError,
638 err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name),
639 }, dname
640 }
641 if dname != name {
642 // Use any template derived during an earlier call to escapeTemplate
643 // with different top level templates, or clone if necessary.
644 dt := e.template(dname)
645 if dt == nil {
646 dt = template.New(dname)
647 dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()}
648 e.derived[dname] = dt
649 }
650 t = dt
651 }
652 return e.computeOutCtx(c, t), dname
653 }
654
655 // computeOutCtx takes a template and its start context and computes the output
656 // context while storing any inferences in e.
657 func (e *escaper) computeOutCtx(c context, t *template.Template) context {
658 // Propagate context over the body.
659 c1, ok := e.escapeTemplateBody(c, t)
660 if !ok {
661 // Look for a fixed point by assuming c1 as the output context.
662 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
663 c1, ok = c2, true
664 }
665 // Use c1 as the error context if neither assumption worked.
666 }
667 if !ok && c1.state != stateError {
668 return context{
669 state: stateError,
670 err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()),
671 }
672 }
673 return c1
674 }
675
676 // escapeTemplateBody escapes the given template assuming the given output
677 // context, and returns the best guess at the output context and whether the
678 // assumption was correct.
679 func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
680 filter := func(e1 *escaper, c1 context) bool {
681 if c1.state == stateError {
682 // Do not update the input escaper, e.
683 return false
684 }
685 if !e1.called[t.Name()] {
686 // If t is not recursively called, then c1 is an
687 // accurate output context.
688 return true
689 }
690 // c1 is accurate if it matches our assumed output context.
691 return c.eq(c1)
692 }
693 // We need to assume an output context so that recursive template calls
694 // take the fast path out of escapeTree instead of infinitely recurring.
695 // Naively assuming that the input context is the same as the output
696 // works >90% of the time.
697 e.output[t.Name()] = c
698 return e.escapeListConditionally(c, t.Tree.Root, filter)
699 }
700
701 // delimEnds maps each delim to a string of characters that terminate it.
702 var delimEnds = [...]string{
703 delimDoubleQuote: `"`,
704 delimSingleQuote: "'",
705 // Determined empirically by running the below in various browsers.
706 // var div = document.createElement("DIV");
707 // for (var i = 0; i < 0x10000; ++i) {
708 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
709 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
710 // document.write("<p>U+" + i.toString(16));
711 // }
712 delimSpaceOrTagEnd: " \t\n\f\r>",
713 }
714
715 var doctypeBytes = []byte("<!DOCTYPE")
716
717 // escapeText escapes a text template node.
718 func (e *escaper) escapeText(c context, n *parse.TextNode) context {
719 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
720 for i != len(s) {
721 c1, nread := contextAfterText(c, s[i:])
722 i1 := i + nread
723 if c.state == stateText || c.state == stateRCDATA {
724 end := i1
725 if c1.state != c.state {
726 for j := end - 1; j >= i; j-- {
727 if s[j] == '<' {
728 end = j
729 break
730 }
731 }
732 }
733 for j := i; j < end; j++ {
734 if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
735 b.Write(s[written:j])
736 b.WriteString("<")
737 written = j + 1
738 }
739 }
740 } else if isComment(c.state) && c.delim == delimNone {
741 switch c.state {
742 case stateJSBlockCmt:
743 // https://es5.github.com/#x7.4:
744 // "Comments behave like white space and are
745 // discarded except that, if a MultiLineComment
746 // contains a line terminator character, then
747 // the entire comment is considered to be a
748 // LineTerminator for purposes of parsing by
749 // the syntactic grammar."
750 if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
751 b.WriteByte('\n')
752 } else {
753 b.WriteByte(' ')
754 }
755 case stateCSSBlockCmt:
756 b.WriteByte(' ')
757 }
758 written = i1
759 }
760 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
761 // Preserve the portion between written and the comment start.
762 cs := i1 - 2
763 if c1.state == stateHTMLCmt {
764 // "<!--" instead of "/*" or "//"
765 cs -= 2
766 }
767 b.Write(s[written:cs])
768 written = i1
769 }
770 if i == i1 && c.state == c1.state {
771 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
772 }
773 c, i = c1, i1
774 }
775
776 if written != 0 && c.state != stateError {
777 if !isComment(c.state) || c.delim != delimNone {
778 b.Write(n.Text[written:])
779 }
780 e.editTextNode(n, b.Bytes())
781 }
782 return c
783 }
784
785 // contextAfterText starts in context c, consumes some tokens from the front of
786 // s, then returns the context after those tokens and the unprocessed suffix.
787 func contextAfterText(c context, s []byte) (context, int) {
788 if c.delim == delimNone {
789 c1, i := tSpecialTagEnd(c, s)
790 if i == 0 {
791 // A special end tag (`</script>`) has been seen and
792 // all content preceding it has been consumed.
793 return c1, 0
794 }
795 // Consider all content up to any end tag.
796 return transitionFunc[c.state](c, s[:i])
797 }
798
799 // We are at the beginning of an attribute value.
800
801 i := bytes.IndexAny(s, delimEnds[c.delim])
802 if i == -1 {
803 i = len(s)
804 }
805 if c.delim == delimSpaceOrTagEnd {
806 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
807 // lists the runes below as error characters.
808 // Error out because HTML parsers may differ on whether
809 // "<a id= onclick=f(" ends inside id's or onclick's value,
810 // "<a class=`foo " ends inside a value,
811 // "<a style=font:'Arial'" needs open-quote fixup.
812 // IE treats '`' as a quotation character.
813 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
814 return context{
815 state: stateError,
816 err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
817 }, len(s)
818 }
819 }
820 if i == len(s) {
821 // Remain inside the attribute.
822 // Decode the value so non-HTML rules can easily handle
823 // <button onclick="alert("Hi!")">
824 // without having to entity decode token boundaries.
825 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
826 c1, i1 := transitionFunc[c.state](c, u)
827 c, u = c1, u[i1:]
828 }
829 return c, len(s)
830 }
831
832 element := c.element
833
834 // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
835 if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
836 element = elementNone
837 }
838
839 if c.delim != delimSpaceOrTagEnd {
840 // Consume any quote.
841 i++
842 }
843 // On exiting an attribute, we discard all state information
844 // except the state and element.
845 return context{state: stateTag, element: element}, i
846 }
847
848 // editActionNode records a change to an action pipeline for later commit.
849 func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
850 if _, ok := e.actionNodeEdits[n]; ok {
851 panic(fmt.Sprintf("node %s shared between templates", n))
852 }
853 e.actionNodeEdits[n] = cmds
854 }
855
856 // editTemplateNode records a change to a {{template}} callee for later commit.
857 func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
858 if _, ok := e.templateNodeEdits[n]; ok {
859 panic(fmt.Sprintf("node %s shared between templates", n))
860 }
861 e.templateNodeEdits[n] = callee
862 }
863
864 // editTextNode records a change to a text node for later commit.
865 func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
866 if _, ok := e.textNodeEdits[n]; ok {
867 panic(fmt.Sprintf("node %s shared between templates", n))
868 }
869 e.textNodeEdits[n] = text
870 }
871
872 // commit applies changes to actions and template calls needed to contextually
873 // autoescape content and adds any derived templates to the set.
874 func (e *escaper) commit() {
875 for name := range e.output {
876 e.template(name).Funcs(funcMap)
877 }
878 // Any template from the name space associated with this escaper can be used
879 // to add derived templates to the underlying text/template name space.
880 tmpl := e.arbitraryTemplate()
881 for _, t := range e.derived {
882 if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
883 panic("error adding derived template")
884 }
885 }
886 for n, s := range e.actionNodeEdits {
887 ensurePipelineContains(n.Pipe, s)
888 }
889 for n, name := range e.templateNodeEdits {
890 n.Name = name
891 }
892 for n, s := range e.textNodeEdits {
893 n.Text = s
894 }
895 // Reset state that is specific to this commit so that the same changes are
896 // not re-applied to the template on subsequent calls to commit.
897 e.called = make(map[string]bool)
898 e.actionNodeEdits = make(map[*parse.ActionNode][]string)
899 e.templateNodeEdits = make(map[*parse.TemplateNode]string)
900 e.textNodeEdits = make(map[*parse.TextNode][]byte)
901 }
902
903 // template returns the named template given a mangled template name.
904 func (e *escaper) template(name string) *template.Template {
905 // Any template from the name space associated with this escaper can be used
906 // to look up templates in the underlying text/template name space.
907 t := e.arbitraryTemplate().text.Lookup(name)
908 if t == nil {
909 t = e.derived[name]
910 }
911 return t
912 }
913
914 // arbitraryTemplate returns an arbitrary template from the name space
915 // associated with e and panics if no templates are found.
916 func (e *escaper) arbitraryTemplate() *Template {
917 for _, t := range e.ns.set {
918 return t
919 }
920 panic("no templates in name space")
921 }
922
923 // Forwarding functions so that clients need only import this package
924 // to reach the general escaping functions of text/template.
925
926 // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
927 func HTMLEscape(w io.Writer, b []byte) {
928 template.HTMLEscape(w, b)
929 }
930
931 // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
932 func HTMLEscapeString(s string) string {
933 return template.HTMLEscapeString(s)
934 }
935
936 // HTMLEscaper returns the escaped HTML equivalent of the textual
937 // representation of its arguments.
938 func HTMLEscaper(args ...any) string {
939 return template.HTMLEscaper(args...)
940 }
941
942 // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
943 func JSEscape(w io.Writer, b []byte) {
944 template.JSEscape(w, b)
945 }
946
947 // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
948 func JSEscapeString(s string) string {
949 return template.JSEscapeString(s)
950 }
951
952 // JSEscaper returns the escaped JavaScript equivalent of the textual
953 // representation of its arguments.
954 func JSEscaper(args ...any) string {
955 return template.JSEscaper(args...)
956 }
957
958 // URLQueryEscaper returns the escaped value of the textual representation of
959 // its arguments in a form suitable for embedding in a URL query.
960 func URLQueryEscaper(args ...any) string {
961 return template.URLQueryEscaper(args...)
962 }