css.go (7157B)
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "fmt"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13 )
14
15 // endsWithCSSKeyword reports whether b ends with an ident that
16 // case-insensitively matches the lower-case kw.
17 func endsWithCSSKeyword(b []byte, kw string) bool {
18 i := len(b) - len(kw)
19 if i < 0 {
20 // Too short.
21 return false
22 }
23 if i != 0 {
24 r, _ := utf8.DecodeLastRune(b[:i])
25 if isCSSNmchar(r) {
26 // Too long.
27 return false
28 }
29 }
30 // Many CSS keywords, such as "!important" can have characters encoded,
31 // but the URI production does not allow that according to
32 // https://www.w3.org/TR/css3-syntax/#TOK-URI
33 // This does not attempt to recognize encoded keywords. For example,
34 // given "\75\72\6c" and "url" this return false.
35 return string(bytes.ToLower(b[i:])) == kw
36 }
37
38 // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
39 func isCSSNmchar(r rune) bool {
40 // Based on the CSS3 nmchar production but ignores multi-rune escape
41 // sequences.
42 // https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
43 return 'a' <= r && r <= 'z' ||
44 'A' <= r && r <= 'Z' ||
45 '0' <= r && r <= '9' ||
46 r == '-' ||
47 r == '_' ||
48 // Non-ASCII cases below.
49 0x80 <= r && r <= 0xd7ff ||
50 0xe000 <= r && r <= 0xfffd ||
51 0x10000 <= r && r <= 0x10ffff
52 }
53
54 // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
55 // If there is no change, it returns the input, otherwise it returns a slice
56 // backed by a new array.
57 // https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
58 func decodeCSS(s []byte) []byte {
59 i := bytes.IndexByte(s, '\\')
60 if i == -1 {
61 return s
62 }
63 // The UTF-8 sequence for a codepoint is never longer than 1 + the
64 // number hex digits need to represent that codepoint, so len(s) is an
65 // upper bound on the output length.
66 b := make([]byte, 0, len(s))
67 for len(s) != 0 {
68 i := bytes.IndexByte(s, '\\')
69 if i == -1 {
70 i = len(s)
71 }
72 b, s = append(b, s[:i]...), s[i:]
73 if len(s) < 2 {
74 break
75 }
76 // https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
77 // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
78 if isHex(s[1]) {
79 // https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
80 // unicode ::= '\' [0-9a-fA-F]{1,6} wc?
81 j := 2
82 for j < len(s) && j < 7 && isHex(s[j]) {
83 j++
84 }
85 r := hexDecode(s[1:j])
86 if r > unicode.MaxRune {
87 r, j = r/16, j-1
88 }
89 n := utf8.EncodeRune(b[len(b):cap(b)], r)
90 // The optional space at the end allows a hex
91 // sequence to be followed by a literal hex.
92 // string(decodeCSS([]byte(`\A B`))) == "\nB"
93 b, s = b[:len(b)+n], skipCSSSpace(s[j:])
94 } else {
95 // `\\` decodes to `\` and `\"` to `"`.
96 _, n := utf8.DecodeRune(s[1:])
97 b, s = append(b, s[1:1+n]...), s[1+n:]
98 }
99 }
100 return b
101 }
102
103 // isHex reports whether the given character is a hex digit.
104 func isHex(c byte) bool {
105 return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
106 }
107
108 // hexDecode decodes a short hex digit sequence: "10" -> 16.
109 func hexDecode(s []byte) rune {
110 n := '\x00'
111 for _, c := range s {
112 n <<= 4
113 switch {
114 case '0' <= c && c <= '9':
115 n |= rune(c - '0')
116 case 'a' <= c && c <= 'f':
117 n |= rune(c-'a') + 10
118 case 'A' <= c && c <= 'F':
119 n |= rune(c-'A') + 10
120 default:
121 panic(fmt.Sprintf("Bad hex digit in %q", s))
122 }
123 }
124 return n
125 }
126
127 // skipCSSSpace returns a suffix of c, skipping over a single space.
128 func skipCSSSpace(c []byte) []byte {
129 if len(c) == 0 {
130 return c
131 }
132 // wc ::= #x9 | #xA | #xC | #xD | #x20
133 switch c[0] {
134 case '\t', '\n', '\f', ' ':
135 return c[1:]
136 case '\r':
137 // This differs from CSS3's wc production because it contains a
138 // probable spec error whereby wc contains all the single byte
139 // sequences in nl (newline) but not CRLF.
140 if len(c) >= 2 && c[1] == '\n' {
141 return c[2:]
142 }
143 return c[1:]
144 }
145 return c
146 }
147
148 // isCSSSpace reports whether b is a CSS space char as defined in wc.
149 func isCSSSpace(b byte) bool {
150 switch b {
151 case '\t', '\n', '\f', '\r', ' ':
152 return true
153 }
154 return false
155 }
156
157 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
158 func cssEscaper(args ...any) string {
159 s, _ := stringify(args...)
160 var b strings.Builder
161 r, w, written := rune(0), 0, 0
162 for i := 0; i < len(s); i += w {
163 // See comment in htmlEscaper.
164 r, w = utf8.DecodeRuneInString(s[i:])
165 var repl string
166 switch {
167 case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
168 repl = cssReplacementTable[r]
169 default:
170 continue
171 }
172 if written == 0 {
173 b.Grow(len(s))
174 }
175 b.WriteString(s[written:i])
176 b.WriteString(repl)
177 written = i + w
178 if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
179 b.WriteByte(' ')
180 }
181 }
182 if written == 0 {
183 return s
184 }
185 b.WriteString(s[written:])
186 return b.String()
187 }
188
189 var cssReplacementTable = []string{
190 0: `\0`,
191 '\t': `\9`,
192 '\n': `\a`,
193 '\f': `\c`,
194 '\r': `\d`,
195 // Encode HTML specials as hex so the output can be embedded
196 // in HTML attributes without further encoding.
197 '"': `\22`,
198 '&': `\26`,
199 '\'': `\27`,
200 '(': `\28`,
201 ')': `\29`,
202 '+': `\2b`,
203 '/': `\2f`,
204 ':': `\3a`,
205 ';': `\3b`,
206 '<': `\3c`,
207 '>': `\3e`,
208 '\\': `\\`,
209 '{': `\7b`,
210 '}': `\7d`,
211 }
212
213 var expressionBytes = []byte("expression")
214 var mozBindingBytes = []byte("mozbinding")
215
216 // cssValueFilter allows innocuous CSS values in the output including CSS
217 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
218 // (inherit, blue), and colors (#888).
219 // It filters out unsafe values, such as those that affect token boundaries,
220 // and anything that might execute scripts.
221 func cssValueFilter(args ...any) string {
222 s, t := stringify(args...)
223 if t == contentTypeCSS {
224 return s
225 }
226 b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
227
228 // CSS3 error handling is specified as honoring string boundaries per
229 // https://www.w3.org/TR/css3-syntax/#error-handling :
230 // Malformed declarations. User agents must handle unexpected
231 // tokens encountered while parsing a declaration by reading until
232 // the end of the declaration, while observing the rules for
233 // matching pairs of (), [], {}, "", and '', and correctly handling
234 // escapes. For example, a malformed declaration may be missing a
235 // property, colon (:) or value.
236 // So we need to make sure that values do not have mismatched bracket
237 // or quote characters to prevent the browser from restarting parsing
238 // inside a string that might embed JavaScript source.
239 for i, c := range b {
240 switch c {
241 case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
242 return filterFailsafe
243 case '-':
244 // Disallow <!-- or -->.
245 // -- should not appear in valid identifiers.
246 if i != 0 && b[i-1] == '-' {
247 return filterFailsafe
248 }
249 default:
250 if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
251 id = append(id, c)
252 }
253 }
254 }
255 id = bytes.ToLower(id)
256 if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
257 return filterFailsafe
258 }
259 return string(b)
260 }