url.go (6800B)
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "fmt"
10 "strings"
11 )
12
13 // urlFilter returns its input unless it contains an unsafe scheme in which
14 // case it defangs the entire URL.
15 //
16 // Schemes that cause unintended side effects that are irreversible without user
17 // interaction are considered unsafe. For example, clicking on a "javascript:"
18 // link can immediately trigger JavaScript code execution.
19 //
20 // This filter conservatively assumes that all schemes other than the following
21 // are unsafe:
22 // * http: Navigates to a new website, and may open a new window or tab.
23 // These side effects can be reversed by navigating back to the
24 // previous website, or closing the window or tab. No irreversible
25 // changes will take place without further user interaction with
26 // the new website.
27 // * https: Same as http.
28 // * mailto: Opens an email program and starts a new draft. This side effect
29 // is not irreversible until the user explicitly clicks send; it
30 // can be undone by closing the email program.
31 //
32 // To allow URLs containing other schemes to bypass this filter, developers must
33 // explicitly indicate that such a URL is expected and safe by encapsulating it
34 // in a template.URL value.
35 func urlFilter(args ...any) string {
36 s, t := stringify(args...)
37 if t == contentTypeURL {
38 return s
39 }
40 if !isSafeURL(s) {
41 return "#" + filterFailsafe
42 }
43 return s
44 }
45
46 // isSafeURL is true if s is a relative URL or if URL has a protocol in
47 // (http, https, mailto).
48 func isSafeURL(s string) bool {
49 if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
50 if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
51 return false
52 }
53 }
54 return true
55 }
56
57 // urlEscaper produces an output that can be embedded in a URL query.
58 // The output can be embedded in an HTML attribute without further escaping.
59 func urlEscaper(args ...any) string {
60 return urlProcessor(false, args...)
61 }
62
63 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
64 // string or parenthesis delimited url(...).
65 // The normalizer does not encode all HTML specials. Specifically, it does not
66 // encode '&' so correct embedding in an HTML attribute requires escaping of
67 // '&' to '&'.
68 func urlNormalizer(args ...any) string {
69 return urlProcessor(true, args...)
70 }
71
72 // urlProcessor normalizes (when norm is true) or escapes its input to produce
73 // a valid hierarchical or opaque URL part.
74 func urlProcessor(norm bool, args ...any) string {
75 s, t := stringify(args...)
76 if t == contentTypeURL {
77 norm = true
78 }
79 var b bytes.Buffer
80 if processURLOnto(s, norm, &b) {
81 return b.String()
82 }
83 return s
84 }
85
86 // processURLOnto appends a normalized URL corresponding to its input to b
87 // and reports whether the appended content differs from s.
88 func processURLOnto(s string, norm bool, b *bytes.Buffer) bool {
89 b.Grow(len(s) + 16)
90 written := 0
91 // The byte loop below assumes that all URLs use UTF-8 as the
92 // content-encoding. This is similar to the URI to IRI encoding scheme
93 // defined in section 3.1 of RFC 3987, and behaves the same as the
94 // EcmaScript builtin encodeURIComponent.
95 // It should not cause any misencoding of URLs in pages with
96 // Content-type: text/html;charset=UTF-8.
97 for i, n := 0, len(s); i < n; i++ {
98 c := s[i]
99 switch c {
100 // Single quote and parens are sub-delims in RFC 3986, but we
101 // escape them so the output can be embedded in single
102 // quoted attributes and unquoted CSS url(...) constructs.
103 // Single quotes are reserved in URLs, but are only used in
104 // the obsolete "mark" rule in an appendix in RFC 3986
105 // so can be safely encoded.
106 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
107 if norm {
108 continue
109 }
110 // Unreserved according to RFC 3986 sec 2.3
111 // "For consistency, percent-encoded octets in the ranges of
112 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
113 // period (%2E), underscore (%5F), or tilde (%7E) should not be
114 // created by URI producers
115 case '-', '.', '_', '~':
116 continue
117 case '%':
118 // When normalizing do not re-encode valid escapes.
119 if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
120 continue
121 }
122 default:
123 // Unreserved according to RFC 3986 sec 2.3
124 if 'a' <= c && c <= 'z' {
125 continue
126 }
127 if 'A' <= c && c <= 'Z' {
128 continue
129 }
130 if '0' <= c && c <= '9' {
131 continue
132 }
133 }
134 b.WriteString(s[written:i])
135 fmt.Fprintf(b, "%%%02x", c)
136 written = i + 1
137 }
138 b.WriteString(s[written:])
139 return written != 0
140 }
141
142 // Filters and normalizes srcset values which are comma separated
143 // URLs followed by metadata.
144 func srcsetFilterAndEscaper(args ...any) string {
145 s, t := stringify(args...)
146 switch t {
147 case contentTypeSrcset:
148 return s
149 case contentTypeURL:
150 // Normalizing gets rid of all HTML whitespace
151 // which separate the image URL from its metadata.
152 var b bytes.Buffer
153 if processURLOnto(s, true, &b) {
154 s = b.String()
155 }
156 // Additionally, commas separate one source from another.
157 return strings.ReplaceAll(s, ",", "%2c")
158 }
159
160 var b bytes.Buffer
161 written := 0
162 for i := 0; i < len(s); i++ {
163 if s[i] == ',' {
164 filterSrcsetElement(s, written, i, &b)
165 b.WriteString(",")
166 written = i + 1
167 }
168 }
169 filterSrcsetElement(s, written, len(s), &b)
170 return b.String()
171 }
172
173 // Derived from https://play.golang.org/p/Dhmj7FORT5
174 const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
175
176 // isHTMLSpace is true iff c is a whitespace character per
177 // https://infra.spec.whatwg.org/#ascii-whitespace
178 func isHTMLSpace(c byte) bool {
179 return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
180 }
181
182 func isHTMLSpaceOrASCIIAlnum(c byte) bool {
183 return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
184 }
185
186 func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
187 start := left
188 for start < right && isHTMLSpace(s[start]) {
189 start++
190 }
191 end := right
192 for i := start; i < right; i++ {
193 if isHTMLSpace(s[i]) {
194 end = i
195 break
196 }
197 }
198 if url := s[start:end]; isSafeURL(url) {
199 // If image metadata is only spaces or alnums then
200 // we don't need to URL normalize it.
201 metadataOk := true
202 for i := end; i < right; i++ {
203 if !isHTMLSpaceOrASCIIAlnum(s[i]) {
204 metadataOk = false
205 break
206 }
207 }
208 if metadataOk {
209 b.WriteString(s[left:start])
210 processURLOnto(url, true, b)
211 b.WriteString(s[end:right])
212 return
213 }
214 }
215 b.WriteString("#")
216 b.WriteString(filterFailsafe)
217 }