css.go (7157B)
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode" 12 "unicode/utf8" 13 ) 14 15 // endsWithCSSKeyword reports whether b ends with an ident that 16 // case-insensitively matches the lower-case kw. 17 func endsWithCSSKeyword(b []byte, kw string) bool { 18 i := len(b) - len(kw) 19 if i < 0 { 20 // Too short. 21 return false 22 } 23 if i != 0 { 24 r, _ := utf8.DecodeLastRune(b[:i]) 25 if isCSSNmchar(r) { 26 // Too long. 27 return false 28 } 29 } 30 // Many CSS keywords, such as "!important" can have characters encoded, 31 // but the URI production does not allow that according to 32 // https://www.w3.org/TR/css3-syntax/#TOK-URI 33 // This does not attempt to recognize encoded keywords. For example, 34 // given "\75\72\6c" and "url" this return false. 35 return string(bytes.ToLower(b[i:])) == kw 36 } 37 38 // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier. 39 func isCSSNmchar(r rune) bool { 40 // Based on the CSS3 nmchar production but ignores multi-rune escape 41 // sequences. 42 // https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar 43 return 'a' <= r && r <= 'z' || 44 'A' <= r && r <= 'Z' || 45 '0' <= r && r <= '9' || 46 r == '-' || 47 r == '_' || 48 // Non-ASCII cases below. 49 0x80 <= r && r <= 0xd7ff || 50 0xe000 <= r && r <= 0xfffd || 51 0x10000 <= r && r <= 0x10ffff 52 } 53 54 // decodeCSS decodes CSS3 escapes given a sequence of stringchars. 55 // If there is no change, it returns the input, otherwise it returns a slice 56 // backed by a new array. 57 // https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar. 58 func decodeCSS(s []byte) []byte { 59 i := bytes.IndexByte(s, '\\') 60 if i == -1 { 61 return s 62 } 63 // The UTF-8 sequence for a codepoint is never longer than 1 + the 64 // number hex digits need to represent that codepoint, so len(s) is an 65 // upper bound on the output length. 66 b := make([]byte, 0, len(s)) 67 for len(s) != 0 { 68 i := bytes.IndexByte(s, '\\') 69 if i == -1 { 70 i = len(s) 71 } 72 b, s = append(b, s[:i]...), s[i:] 73 if len(s) < 2 { 74 break 75 } 76 // https://www.w3.org/TR/css3-syntax/#SUBTOK-escape 77 // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] 78 if isHex(s[1]) { 79 // https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode 80 // unicode ::= '\' [0-9a-fA-F]{1,6} wc? 81 j := 2 82 for j < len(s) && j < 7 && isHex(s[j]) { 83 j++ 84 } 85 r := hexDecode(s[1:j]) 86 if r > unicode.MaxRune { 87 r, j = r/16, j-1 88 } 89 n := utf8.EncodeRune(b[len(b):cap(b)], r) 90 // The optional space at the end allows a hex 91 // sequence to be followed by a literal hex. 92 // string(decodeCSS([]byte(`\A B`))) == "\nB" 93 b, s = b[:len(b)+n], skipCSSSpace(s[j:]) 94 } else { 95 // `\\` decodes to `\` and `\"` to `"`. 96 _, n := utf8.DecodeRune(s[1:]) 97 b, s = append(b, s[1:1+n]...), s[1+n:] 98 } 99 } 100 return b 101 } 102 103 // isHex reports whether the given character is a hex digit. 104 func isHex(c byte) bool { 105 return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' 106 } 107 108 // hexDecode decodes a short hex digit sequence: "10" -> 16. 109 func hexDecode(s []byte) rune { 110 n := '\x00' 111 for _, c := range s { 112 n <<= 4 113 switch { 114 case '0' <= c && c <= '9': 115 n |= rune(c - '0') 116 case 'a' <= c && c <= 'f': 117 n |= rune(c-'a') + 10 118 case 'A' <= c && c <= 'F': 119 n |= rune(c-'A') + 10 120 default: 121 panic(fmt.Sprintf("Bad hex digit in %q", s)) 122 } 123 } 124 return n 125 } 126 127 // skipCSSSpace returns a suffix of c, skipping over a single space. 128 func skipCSSSpace(c []byte) []byte { 129 if len(c) == 0 { 130 return c 131 } 132 // wc ::= #x9 | #xA | #xC | #xD | #x20 133 switch c[0] { 134 case '\t', '\n', '\f', ' ': 135 return c[1:] 136 case '\r': 137 // This differs from CSS3's wc production because it contains a 138 // probable spec error whereby wc contains all the single byte 139 // sequences in nl (newline) but not CRLF. 140 if len(c) >= 2 && c[1] == '\n' { 141 return c[2:] 142 } 143 return c[1:] 144 } 145 return c 146 } 147 148 // isCSSSpace reports whether b is a CSS space char as defined in wc. 149 func isCSSSpace(b byte) bool { 150 switch b { 151 case '\t', '\n', '\f', '\r', ' ': 152 return true 153 } 154 return false 155 } 156 157 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes. 158 func cssEscaper(args ...any) string { 159 s, _ := stringify(args...) 160 var b strings.Builder 161 r, w, written := rune(0), 0, 0 162 for i := 0; i < len(s); i += w { 163 // See comment in htmlEscaper. 164 r, w = utf8.DecodeRuneInString(s[i:]) 165 var repl string 166 switch { 167 case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "": 168 repl = cssReplacementTable[r] 169 default: 170 continue 171 } 172 if written == 0 { 173 b.Grow(len(s)) 174 } 175 b.WriteString(s[written:i]) 176 b.WriteString(repl) 177 written = i + w 178 if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) { 179 b.WriteByte(' ') 180 } 181 } 182 if written == 0 { 183 return s 184 } 185 b.WriteString(s[written:]) 186 return b.String() 187 } 188 189 var cssReplacementTable = []string{ 190 0: `\0`, 191 '\t': `\9`, 192 '\n': `\a`, 193 '\f': `\c`, 194 '\r': `\d`, 195 // Encode HTML specials as hex so the output can be embedded 196 // in HTML attributes without further encoding. 197 '"': `\22`, 198 '&': `\26`, 199 '\'': `\27`, 200 '(': `\28`, 201 ')': `\29`, 202 '+': `\2b`, 203 '/': `\2f`, 204 ':': `\3a`, 205 ';': `\3b`, 206 '<': `\3c`, 207 '>': `\3e`, 208 '\\': `\\`, 209 '{': `\7b`, 210 '}': `\7d`, 211 } 212 213 var expressionBytes = []byte("expression") 214 var mozBindingBytes = []byte("mozbinding") 215 216 // cssValueFilter allows innocuous CSS values in the output including CSS 217 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values 218 // (inherit, blue), and colors (#888). 219 // It filters out unsafe values, such as those that affect token boundaries, 220 // and anything that might execute scripts. 221 func cssValueFilter(args ...any) string { 222 s, t := stringify(args...) 223 if t == contentTypeCSS { 224 return s 225 } 226 b, id := decodeCSS([]byte(s)), make([]byte, 0, 64) 227 228 // CSS3 error handling is specified as honoring string boundaries per 229 // https://www.w3.org/TR/css3-syntax/#error-handling : 230 // Malformed declarations. User agents must handle unexpected 231 // tokens encountered while parsing a declaration by reading until 232 // the end of the declaration, while observing the rules for 233 // matching pairs of (), [], {}, "", and '', and correctly handling 234 // escapes. For example, a malformed declaration may be missing a 235 // property, colon (:) or value. 236 // So we need to make sure that values do not have mismatched bracket 237 // or quote characters to prevent the browser from restarting parsing 238 // inside a string that might embed JavaScript source. 239 for i, c := range b { 240 switch c { 241 case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}': 242 return filterFailsafe 243 case '-': 244 // Disallow <!-- or -->. 245 // -- should not appear in valid identifiers. 246 if i != 0 && b[i-1] == '-' { 247 return filterFailsafe 248 } 249 default: 250 if c < utf8.RuneSelf && isCSSNmchar(rune(c)) { 251 id = append(id, c) 252 } 253 } 254 } 255 id = bytes.ToLower(id) 256 if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) { 257 return filterFailsafe 258 } 259 return string(b) 260 }