hugo

Fork of github.com/gohugoio/hugo with reverse pagination support

git clone git://git.shimmy1996.com/hugo.git

url.go (6800B)

    1 // Copyright 2011 The Go Authors. All rights reserved.
    2 // Use of this source code is governed by a BSD-style
    3 // license that can be found in the LICENSE file.
    4 
    5 package template
    6 
    7 import (
    8 	"bytes"
    9 	"fmt"
   10 	"strings"
   11 )
   12 
   13 // urlFilter returns its input unless it contains an unsafe scheme in which
   14 // case it defangs the entire URL.
   15 //
   16 // Schemes that cause unintended side effects that are irreversible without user
   17 // interaction are considered unsafe. For example, clicking on a "javascript:"
   18 // link can immediately trigger JavaScript code execution.
   19 //
   20 // This filter conservatively assumes that all schemes other than the following
   21 // are unsafe:
   22 //    * http:   Navigates to a new website, and may open a new window or tab.
   23 //              These side effects can be reversed by navigating back to the
   24 //              previous website, or closing the window or tab. No irreversible
   25 //              changes will take place without further user interaction with
   26 //              the new website.
   27 //    * https:  Same as http.
   28 //    * mailto: Opens an email program and starts a new draft. This side effect
   29 //              is not irreversible until the user explicitly clicks send; it
   30 //              can be undone by closing the email program.
   31 //
   32 // To allow URLs containing other schemes to bypass this filter, developers must
   33 // explicitly indicate that such a URL is expected and safe by encapsulating it
   34 // in a template.URL value.
   35 func urlFilter(args ...any) string {
   36 	s, t := stringify(args...)
   37 	if t == contentTypeURL {
   38 		return s
   39 	}
   40 	if !isSafeURL(s) {
   41 		return "#" + filterFailsafe
   42 	}
   43 	return s
   44 }
   45 
   46 // isSafeURL is true if s is a relative URL or if URL has a protocol in
   47 // (http, https, mailto).
   48 func isSafeURL(s string) bool {
   49 	if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
   50 		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
   51 			return false
   52 		}
   53 	}
   54 	return true
   55 }
   56 
   57 // urlEscaper produces an output that can be embedded in a URL query.
   58 // The output can be embedded in an HTML attribute without further escaping.
   59 func urlEscaper(args ...any) string {
   60 	return urlProcessor(false, args...)
   61 }
   62 
   63 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
   64 // string or parenthesis delimited url(...).
   65 // The normalizer does not encode all HTML specials. Specifically, it does not
   66 // encode '&' so correct embedding in an HTML attribute requires escaping of
   67 // '&' to '&'.
   68 func urlNormalizer(args ...any) string {
   69 	return urlProcessor(true, args...)
   70 }
   71 
   72 // urlProcessor normalizes (when norm is true) or escapes its input to produce
   73 // a valid hierarchical or opaque URL part.
   74 func urlProcessor(norm bool, args ...any) string {
   75 	s, t := stringify(args...)
   76 	if t == contentTypeURL {
   77 		norm = true
   78 	}
   79 	var b bytes.Buffer
   80 	if processURLOnto(s, norm, &b) {
   81 		return b.String()
   82 	}
   83 	return s
   84 }
   85 
   86 // processURLOnto appends a normalized URL corresponding to its input to b
   87 // and reports whether the appended content differs from s.
   88 func processURLOnto(s string, norm bool, b *bytes.Buffer) bool {
   89 	b.Grow(len(s) + 16)
   90 	written := 0
   91 	// The byte loop below assumes that all URLs use UTF-8 as the
   92 	// content-encoding. This is similar to the URI to IRI encoding scheme
   93 	// defined in section 3.1 of  RFC 3987, and behaves the same as the
   94 	// EcmaScript builtin encodeURIComponent.
   95 	// It should not cause any misencoding of URLs in pages with
   96 	// Content-type: text/html;charset=UTF-8.
   97 	for i, n := 0, len(s); i < n; i++ {
   98 		c := s[i]
   99 		switch c {
  100 		// Single quote and parens are sub-delims in RFC 3986, but we
  101 		// escape them so the output can be embedded in single
  102 		// quoted attributes and unquoted CSS url(...) constructs.
  103 		// Single quotes are reserved in URLs, but are only used in
  104 		// the obsolete "mark" rule in an appendix in RFC 3986
  105 		// so can be safely encoded.
  106 		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
  107 			if norm {
  108 				continue
  109 			}
  110 		// Unreserved according to RFC 3986 sec 2.3
  111 		// "For consistency, percent-encoded octets in the ranges of
  112 		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
  113 		// period (%2E), underscore (%5F), or tilde (%7E) should not be
  114 		// created by URI producers
  115 		case '-', '.', '_', '~':
  116 			continue
  117 		case '%':
  118 			// When normalizing do not re-encode valid escapes.
  119 			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
  120 				continue
  121 			}
  122 		default:
  123 			// Unreserved according to RFC 3986 sec 2.3
  124 			if 'a' <= c && c <= 'z' {
  125 				continue
  126 			}
  127 			if 'A' <= c && c <= 'Z' {
  128 				continue
  129 			}
  130 			if '0' <= c && c <= '9' {
  131 				continue
  132 			}
  133 		}
  134 		b.WriteString(s[written:i])
  135 		fmt.Fprintf(b, "%%%02x", c)
  136 		written = i + 1
  137 	}
  138 	b.WriteString(s[written:])
  139 	return written != 0
  140 }
  141 
  142 // Filters and normalizes srcset values which are comma separated
  143 // URLs followed by metadata.
  144 func srcsetFilterAndEscaper(args ...any) string {
  145 	s, t := stringify(args...)
  146 	switch t {
  147 	case contentTypeSrcset:
  148 		return s
  149 	case contentTypeURL:
  150 		// Normalizing gets rid of all HTML whitespace
  151 		// which separate the image URL from its metadata.
  152 		var b bytes.Buffer
  153 		if processURLOnto(s, true, &b) {
  154 			s = b.String()
  155 		}
  156 		// Additionally, commas separate one source from another.
  157 		return strings.ReplaceAll(s, ",", "%2c")
  158 	}
  159 
  160 	var b bytes.Buffer
  161 	written := 0
  162 	for i := 0; i < len(s); i++ {
  163 		if s[i] == ',' {
  164 			filterSrcsetElement(s, written, i, &b)
  165 			b.WriteString(",")
  166 			written = i + 1
  167 		}
  168 	}
  169 	filterSrcsetElement(s, written, len(s), &b)
  170 	return b.String()
  171 }
  172 
  173 // Derived from https://play.golang.org/p/Dhmj7FORT5
  174 const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
  175 
  176 // isHTMLSpace is true iff c is a whitespace character per
  177 // https://infra.spec.whatwg.org/#ascii-whitespace
  178 func isHTMLSpace(c byte) bool {
  179 	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
  180 }
  181 
  182 func isHTMLSpaceOrASCIIAlnum(c byte) bool {
  183 	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
  184 }
  185 
  186 func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
  187 	start := left
  188 	for start < right && isHTMLSpace(s[start]) {
  189 		start++
  190 	}
  191 	end := right
  192 	for i := start; i < right; i++ {
  193 		if isHTMLSpace(s[i]) {
  194 			end = i
  195 			break
  196 		}
  197 	}
  198 	if url := s[start:end]; isSafeURL(url) {
  199 		// If image metadata is only spaces or alnums then
  200 		// we don't need to URL normalize it.
  201 		metadataOk := true
  202 		for i := end; i < right; i++ {
  203 			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
  204 				metadataOk = false
  205 				break
  206 			}
  207 		}
  208 		if metadataOk {
  209 			b.WriteString(s[left:start])
  210 			processURLOnto(url, true, b)
  211 			b.WriteString(s[end:right])
  212 			return
  213 		}
  214 	}
  215 	b.WriteString("#")
  216 	b.WriteString(filterFailsafe)
  217 }