absurlreplacer.go - hugo - Fork of github.com/gohugoio/hugo with reverse pagination support

absurlreplacer.go (4892B)

    1 // Copyright 2018 The Hugo Authors. All rights reserved.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 // http://www.apache.org/licenses/LICENSE-2.0
    7 //
    8 // Unless required by applicable law or agreed to in writing, software
    9 // distributed under the License is distributed on an "AS IS" BASIS,
   10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11 // See the License for the specific language governing permissions and
   12 // limitations under the License.
   13 
   14 package urlreplacers
   15 
   16 import (
   17 	"bytes"
   18 	"io"
   19 	"unicode"
   20 	"unicode/utf8"
   21 
   22 	"github.com/gohugoio/hugo/transform"
   23 )
   24 
   25 type absurllexer struct {
   26 	// the source to absurlify
   27 	content []byte
   28 	// the target for the new absurlified content
   29 	w io.Writer
   30 
   31 	// path may be set to a "." relative path
   32 	path []byte
   33 
   34 	pos   int // input position
   35 	start int // item start position
   36 
   37 	quotes [][]byte
   38 }
   39 
   40 type prefix struct {
   41 	disabled bool
   42 	b        []byte
   43 	f        func(l *absurllexer)
   44 
   45 	nextPos int
   46 }
   47 
   48 func (p *prefix) find(bs []byte, start int) bool {
   49 	if p.disabled {
   50 		return false
   51 	}
   52 
   53 	if p.nextPos == -1 {
   54 		idx := bytes.Index(bs[start:], p.b)
   55 
   56 		if idx == -1 {
   57 			p.disabled = true
   58 			// Find the closest match
   59 			return false
   60 		}
   61 
   62 		p.nextPos = start + idx + len(p.b)
   63 	}
   64 
   65 	return true
   66 }
   67 
   68 func newPrefixState() []*prefix {
   69 	return []*prefix{
   70 		{b: []byte("src="), f: checkCandidateBase},
   71 		{b: []byte("href="), f: checkCandidateBase},
   72 		{b: []byte("url="), f: checkCandidateBase},
   73 		{b: []byte("action="), f: checkCandidateBase},
   74 		{b: []byte("srcset="), f: checkCandidateSrcset},
   75 	}
   76 }
   77 
   78 func (l *absurllexer) emit() {
   79 	l.w.Write(l.content[l.start:l.pos])
   80 	l.start = l.pos
   81 }
   82 
   83 var (
   84 	relURLPrefix    = []byte("/")
   85 	relURLPrefixLen = len(relURLPrefix)
   86 )
   87 
   88 func (l *absurllexer) consumeQuote() []byte {
   89 	for _, q := range l.quotes {
   90 		if bytes.HasPrefix(l.content[l.pos:], q) {
   91 			l.pos += len(q)
   92 			l.emit()
   93 			return q
   94 		}
   95 	}
   96 	return nil
   97 }
   98 
   99 // handle URLs in src and href.
  100 func checkCandidateBase(l *absurllexer) {
  101 	l.consumeQuote()
  102 
  103 	if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  104 		return
  105 	}
  106 
  107 	// check for schemaless URLs
  108 	posAfter := l.pos + relURLPrefixLen
  109 	if posAfter >= len(l.content) {
  110 		return
  111 	}
  112 	r, _ := utf8.DecodeRune(l.content[posAfter:])
  113 	if r == '/' {
  114 		// schemaless: skip
  115 		return
  116 	}
  117 	if l.pos > l.start {
  118 		l.emit()
  119 	}
  120 	l.pos += relURLPrefixLen
  121 	l.w.Write(l.path)
  122 	l.start = l.pos
  123 }
  124 
  125 func (l *absurllexer) posAfterURL(q []byte) int {
  126 	if len(q) > 0 {
  127 		// look for end quote
  128 		return bytes.Index(l.content[l.pos:], q)
  129 	}
  130 
  131 	return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
  132 		return r == '>' || unicode.IsSpace(r)
  133 	})
  134 }
  135 
  136 // handle URLs in srcset.
  137 func checkCandidateSrcset(l *absurllexer) {
  138 	q := l.consumeQuote()
  139 	if q == nil {
  140 		// srcset needs to be quoted.
  141 		return
  142 	}
  143 
  144 	// special case, not frequent (me think)
  145 	if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  146 		return
  147 	}
  148 
  149 	// check for schemaless URLs
  150 	posAfter := l.pos + relURLPrefixLen
  151 	if posAfter >= len(l.content) {
  152 		return
  153 	}
  154 	r, _ := utf8.DecodeRune(l.content[posAfter:])
  155 	if r == '/' {
  156 		// schemaless: skip
  157 		return
  158 	}
  159 
  160 	posEnd := l.posAfterURL(q)
  161 
  162 	// safe guard
  163 	if posEnd < 0 || posEnd > 2000 {
  164 		return
  165 	}
  166 
  167 	if l.pos > l.start {
  168 		l.emit()
  169 	}
  170 
  171 	section := l.content[l.pos : l.pos+posEnd+1]
  172 
  173 	fields := bytes.Fields(section)
  174 	for i, f := range fields {
  175 		if f[0] == '/' {
  176 			l.w.Write(l.path)
  177 			l.w.Write(f[1:])
  178 
  179 		} else {
  180 			l.w.Write(f)
  181 		}
  182 
  183 		if i < len(fields)-1 {
  184 			l.w.Write([]byte(" "))
  185 		}
  186 	}
  187 
  188 	l.pos += len(section)
  189 	l.start = l.pos
  190 }
  191 
  192 // main loop
  193 func (l *absurllexer) replace() {
  194 	contentLength := len(l.content)
  195 
  196 	prefixes := newPrefixState()
  197 
  198 	for {
  199 		if l.pos >= contentLength {
  200 			break
  201 		}
  202 
  203 		var match *prefix
  204 
  205 		for _, p := range prefixes {
  206 			if !p.find(l.content, l.pos) {
  207 				continue
  208 			}
  209 
  210 			if match == nil || p.nextPos < match.nextPos {
  211 				match = p
  212 			}
  213 		}
  214 
  215 		if match == nil {
  216 			// Done!
  217 			l.pos = contentLength
  218 			break
  219 		} else {
  220 			l.pos = match.nextPos
  221 			match.nextPos = -1
  222 			match.f(l)
  223 		}
  224 	}
  225 	// Done!
  226 	if l.pos > l.start {
  227 		l.emit()
  228 	}
  229 }
  230 
  231 func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
  232 	lexer := &absurllexer{
  233 		content: ct.From().Bytes(),
  234 		w:       ct.To(),
  235 		path:    []byte(path),
  236 		quotes:  quotes,
  237 	}
  238 
  239 	lexer.replace()
  240 }
  241 
  242 type absURLReplacer struct {
  243 	htmlQuotes [][]byte
  244 	xmlQuotes  [][]byte
  245 }
  246 
  247 func newAbsURLReplacer() *absURLReplacer {
  248 	return &absURLReplacer{
  249 		htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
  250 		xmlQuotes:  [][]byte{[]byte("&#34;"), []byte("&#39;")},
  251 	}
  252 }
  253 
  254 func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
  255 	doReplace(path, ct, au.htmlQuotes)
  256 }
  257 
  258 func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
  259 	doReplace(path, ct, au.xmlQuotes)
  260 }