html.go - hugo - Fork of github.com/gohugoio/hugo with reverse pagination support

html.go (7437B)

    1 // Copyright 2011 The Go Authors. All rights reserved.
    2 // Use of this source code is governed by a BSD-style
    3 // license that can be found in the LICENSE file.
    4 
    5 package template
    6 
    7 import (
    8 	"bytes"
    9 	"fmt"
   10 	"strings"
   11 	"unicode/utf8"
   12 )
   13 
   14 // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
   15 func htmlNospaceEscaper(args ...any) string {
   16 	s, t := stringify(args...)
   17 	if t == contentTypeHTML {
   18 		return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
   19 	}
   20 	return htmlReplacer(s, htmlNospaceReplacementTable, false)
   21 }
   22 
   23 // attrEscaper escapes for inclusion in quoted attribute values.
   24 func attrEscaper(args ...any) string {
   25 	s, t := stringify(args...)
   26 	if t == contentTypeHTML {
   27 		return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
   28 	}
   29 	return htmlReplacer(s, htmlReplacementTable, true)
   30 }
   31 
   32 // rcdataEscaper escapes for inclusion in an RCDATA element body.
   33 func rcdataEscaper(args ...any) string {
   34 	s, t := stringify(args...)
   35 	if t == contentTypeHTML {
   36 		return htmlReplacer(s, htmlNormReplacementTable, true)
   37 	}
   38 	return htmlReplacer(s, htmlReplacementTable, true)
   39 }
   40 
   41 // htmlEscaper escapes for inclusion in HTML text.
   42 func htmlEscaper(args ...any) string {
   43 	s, t := stringify(args...)
   44 	if t == contentTypeHTML {
   45 		return s
   46 	}
   47 	return htmlReplacer(s, htmlReplacementTable, true)
   48 }
   49 
   50 // htmlReplacementTable contains the runes that need to be escaped
   51 // inside a quoted attribute value or in a text node.
   52 var htmlReplacementTable = []string{
   53 	// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
   54 	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
   55 	// CHARACTER character to the current attribute's value.
   56 	// "
   57 	// and similarly
   58 	// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
   59 	0:    "\uFFFD",
   60 	'"':  "&#34;",
   61 	'&':  "&amp;",
   62 	'\'': "&#39;",
   63 	'+':  "&#43;",
   64 	'<':  "&lt;",
   65 	'>':  "&gt;",
   66 }
   67 
   68 // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
   69 // avoid over-encoding existing entities.
   70 var htmlNormReplacementTable = []string{
   71 	0:    "\uFFFD",
   72 	'"':  "&#34;",
   73 	'\'': "&#39;",
   74 	'+':  "&#43;",
   75 	'<':  "&lt;",
   76 	'>':  "&gt;",
   77 }
   78 
   79 // htmlNospaceReplacementTable contains the runes that need to be escaped
   80 // inside an unquoted attribute value.
   81 // The set of runes escaped is the union of the HTML specials and
   82 // those determined by running the JS below in browsers:
   83 // <div id=d></div>
   84 // <script>(function () {
   85 // var a = [], d = document.getElementById("d"), i, c, s;
   86 // for (i = 0; i < 0x10000; ++i) {
   87 //   c = String.fromCharCode(i);
   88 //   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
   89 //   s = d.getElementsByTagName("SPAN")[0];
   90 //   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
   91 // }
   92 // document.write(a.join(", "));
   93 // })()</script>
   94 var htmlNospaceReplacementTable = []string{
   95 	0:    "&#xfffd;",
   96 	'\t': "&#9;",
   97 	'\n': "&#10;",
   98 	'\v': "&#11;",
   99 	'\f': "&#12;",
  100 	'\r': "&#13;",
  101 	' ':  "&#32;",
  102 	'"':  "&#34;",
  103 	'&':  "&amp;",
  104 	'\'': "&#39;",
  105 	'+':  "&#43;",
  106 	'<':  "&lt;",
  107 	'=':  "&#61;",
  108 	'>':  "&gt;",
  109 	// A parse error in the attribute value (unquoted) and
  110 	// before attribute value states.
  111 	// Treated as a quoting character by IE.
  112 	'`': "&#96;",
  113 }
  114 
  115 // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
  116 // without '&' to avoid over-encoding existing entities.
  117 var htmlNospaceNormReplacementTable = []string{
  118 	0:    "&#xfffd;",
  119 	'\t': "&#9;",
  120 	'\n': "&#10;",
  121 	'\v': "&#11;",
  122 	'\f': "&#12;",
  123 	'\r': "&#13;",
  124 	' ':  "&#32;",
  125 	'"':  "&#34;",
  126 	'\'': "&#39;",
  127 	'+':  "&#43;",
  128 	'<':  "&lt;",
  129 	'=':  "&#61;",
  130 	'>':  "&gt;",
  131 	// A parse error in the attribute value (unquoted) and
  132 	// before attribute value states.
  133 	// Treated as a quoting character by IE.
  134 	'`': "&#96;",
  135 }
  136 
  137 // htmlReplacer returns s with runes replaced according to replacementTable
  138 // and when badRunes is true, certain bad runes are allowed through unescaped.
  139 func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
  140 	written, b := 0, new(strings.Builder)
  141 	r, w := rune(0), 0
  142 	for i := 0; i < len(s); i += w {
  143 		// Cannot use 'for range s' because we need to preserve the width
  144 		// of the runes in the input. If we see a decoding error, the input
  145 		// width will not be utf8.Runelen(r) and we will overrun the buffer.
  146 		r, w = utf8.DecodeRuneInString(s[i:])
  147 		if int(r) < len(replacementTable) {
  148 			if repl := replacementTable[r]; len(repl) != 0 {
  149 				if written == 0 {
  150 					b.Grow(len(s))
  151 				}
  152 				b.WriteString(s[written:i])
  153 				b.WriteString(repl)
  154 				written = i + w
  155 			}
  156 		} else if badRunes {
  157 			// No-op.
  158 			// IE does not allow these ranges in unquoted attrs.
  159 		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
  160 			if written == 0 {
  161 				b.Grow(len(s))
  162 			}
  163 			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
  164 			written = i + w
  165 		}
  166 	}
  167 	if written == 0 {
  168 		return s
  169 	}
  170 	b.WriteString(s[written:])
  171 	return b.String()
  172 }
  173 
  174 // stripTags takes a snippet of HTML and returns only the text content.
  175 // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
  176 func stripTags(html string) string {
  177 	var b bytes.Buffer
  178 	s, c, i, allText := []byte(html), context{}, 0, true
  179 	// Using the transition funcs helps us avoid mangling
  180 	// `<div title="1>2">` or `I <3 Ponies!`.
  181 	for i != len(s) {
  182 		if c.delim == delimNone {
  183 			st := c.state
  184 			// Use RCDATA instead of parsing into JS or CSS styles.
  185 			if c.element != elementNone && !isInTag(st) {
  186 				st = stateRCDATA
  187 			}
  188 			d, nread := transitionFunc[st](c, s[i:])
  189 			i1 := i + nread
  190 			if c.state == stateText || c.state == stateRCDATA {
  191 				// Emit text up to the start of the tag or comment.
  192 				j := i1
  193 				if d.state != c.state {
  194 					for j1 := j - 1; j1 >= i; j1-- {
  195 						if s[j1] == '<' {
  196 							j = j1
  197 							break
  198 						}
  199 					}
  200 				}
  201 				b.Write(s[i:j])
  202 			} else {
  203 				allText = false
  204 			}
  205 			c, i = d, i1
  206 			continue
  207 		}
  208 		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
  209 		if i1 < i {
  210 			break
  211 		}
  212 		if c.delim != delimSpaceOrTagEnd {
  213 			// Consume any quote.
  214 			i1++
  215 		}
  216 		c, i = context{state: stateTag, element: c.element}, i1
  217 	}
  218 	if allText {
  219 		return html
  220 	} else if c.state == stateText || c.state == stateRCDATA {
  221 		b.Write(s[i:])
  222 	}
  223 	return b.String()
  224 }
  225 
  226 // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
  227 // a known-safe HTML attribute.
  228 func htmlNameFilter(args ...any) string {
  229 	s, t := stringify(args...)
  230 	if t == contentTypeHTMLAttr {
  231 		return s
  232 	}
  233 	if len(s) == 0 {
  234 		// Avoid violation of structure preservation.
  235 		// <input checked {{.K}}={{.V}}>.
  236 		// Without this, if .K is empty then .V is the value of
  237 		// checked, but otherwise .V is the value of the attribute
  238 		// named .K.
  239 		return filterFailsafe
  240 	}
  241 	s = strings.ToLower(s)
  242 	if t := attrType(s); t != contentTypePlain {
  243 		// TODO: Split attr and element name part filters so we can recognize known attributes.
  244 		return filterFailsafe
  245 	}
  246 	for _, r := range s {
  247 		switch {
  248 		case '0' <= r && r <= '9':
  249 		case 'a' <= r && r <= 'z':
  250 		default:
  251 			return filterFailsafe
  252 		}
  253 	}
  254 	return s
  255 }
  256 
  257 // commentEscaper returns the empty string regardless of input.
  258 // Comment content does not correspond to any parsed structure or
  259 // human-readable content, so the simplest and most secure policy is to drop
  260 // content interpolated into comments.
  261 // This approach is equally valid whether or not static comment content is
  262 // removed from the template.
  263 func commentEscaper(args ...any) string {
  264 	return ""
  265 }