absurlreplacer.go (4892B)
1 // Copyright 2018 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package urlreplacers 15 16 import ( 17 "bytes" 18 "io" 19 "unicode" 20 "unicode/utf8" 21 22 "github.com/gohugoio/hugo/transform" 23 ) 24 25 type absurllexer struct { 26 // the source to absurlify 27 content []byte 28 // the target for the new absurlified content 29 w io.Writer 30 31 // path may be set to a "." relative path 32 path []byte 33 34 pos int // input position 35 start int // item start position 36 37 quotes [][]byte 38 } 39 40 type prefix struct { 41 disabled bool 42 b []byte 43 f func(l *absurllexer) 44 45 nextPos int 46 } 47 48 func (p *prefix) find(bs []byte, start int) bool { 49 if p.disabled { 50 return false 51 } 52 53 if p.nextPos == -1 { 54 idx := bytes.Index(bs[start:], p.b) 55 56 if idx == -1 { 57 p.disabled = true 58 // Find the closest match 59 return false 60 } 61 62 p.nextPos = start + idx + len(p.b) 63 } 64 65 return true 66 } 67 68 func newPrefixState() []*prefix { 69 return []*prefix{ 70 {b: []byte("src="), f: checkCandidateBase}, 71 {b: []byte("href="), f: checkCandidateBase}, 72 {b: []byte("url="), f: checkCandidateBase}, 73 {b: []byte("action="), f: checkCandidateBase}, 74 {b: []byte("srcset="), f: checkCandidateSrcset}, 75 } 76 } 77 78 func (l *absurllexer) emit() { 79 l.w.Write(l.content[l.start:l.pos]) 80 l.start = l.pos 81 } 82 83 var ( 84 relURLPrefix = []byte("/") 85 relURLPrefixLen = len(relURLPrefix) 86 ) 87 88 func (l *absurllexer) consumeQuote() []byte { 89 for _, q := range l.quotes { 90 if bytes.HasPrefix(l.content[l.pos:], q) { 91 l.pos += len(q) 92 l.emit() 93 return q 94 } 95 } 96 return nil 97 } 98 99 // handle URLs in src and href. 100 func checkCandidateBase(l *absurllexer) { 101 l.consumeQuote() 102 103 if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) { 104 return 105 } 106 107 // check for schemaless URLs 108 posAfter := l.pos + relURLPrefixLen 109 if posAfter >= len(l.content) { 110 return 111 } 112 r, _ := utf8.DecodeRune(l.content[posAfter:]) 113 if r == '/' { 114 // schemaless: skip 115 return 116 } 117 if l.pos > l.start { 118 l.emit() 119 } 120 l.pos += relURLPrefixLen 121 l.w.Write(l.path) 122 l.start = l.pos 123 } 124 125 func (l *absurllexer) posAfterURL(q []byte) int { 126 if len(q) > 0 { 127 // look for end quote 128 return bytes.Index(l.content[l.pos:], q) 129 } 130 131 return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool { 132 return r == '>' || unicode.IsSpace(r) 133 }) 134 } 135 136 // handle URLs in srcset. 137 func checkCandidateSrcset(l *absurllexer) { 138 q := l.consumeQuote() 139 if q == nil { 140 // srcset needs to be quoted. 141 return 142 } 143 144 // special case, not frequent (me think) 145 if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) { 146 return 147 } 148 149 // check for schemaless URLs 150 posAfter := l.pos + relURLPrefixLen 151 if posAfter >= len(l.content) { 152 return 153 } 154 r, _ := utf8.DecodeRune(l.content[posAfter:]) 155 if r == '/' { 156 // schemaless: skip 157 return 158 } 159 160 posEnd := l.posAfterURL(q) 161 162 // safe guard 163 if posEnd < 0 || posEnd > 2000 { 164 return 165 } 166 167 if l.pos > l.start { 168 l.emit() 169 } 170 171 section := l.content[l.pos : l.pos+posEnd+1] 172 173 fields := bytes.Fields(section) 174 for i, f := range fields { 175 if f[0] == '/' { 176 l.w.Write(l.path) 177 l.w.Write(f[1:]) 178 179 } else { 180 l.w.Write(f) 181 } 182 183 if i < len(fields)-1 { 184 l.w.Write([]byte(" ")) 185 } 186 } 187 188 l.pos += len(section) 189 l.start = l.pos 190 } 191 192 // main loop 193 func (l *absurllexer) replace() { 194 contentLength := len(l.content) 195 196 prefixes := newPrefixState() 197 198 for { 199 if l.pos >= contentLength { 200 break 201 } 202 203 var match *prefix 204 205 for _, p := range prefixes { 206 if !p.find(l.content, l.pos) { 207 continue 208 } 209 210 if match == nil || p.nextPos < match.nextPos { 211 match = p 212 } 213 } 214 215 if match == nil { 216 // Done! 217 l.pos = contentLength 218 break 219 } else { 220 l.pos = match.nextPos 221 match.nextPos = -1 222 match.f(l) 223 } 224 } 225 // Done! 226 if l.pos > l.start { 227 l.emit() 228 } 229 } 230 231 func doReplace(path string, ct transform.FromTo, quotes [][]byte) { 232 lexer := &absurllexer{ 233 content: ct.From().Bytes(), 234 w: ct.To(), 235 path: []byte(path), 236 quotes: quotes, 237 } 238 239 lexer.replace() 240 } 241 242 type absURLReplacer struct { 243 htmlQuotes [][]byte 244 xmlQuotes [][]byte 245 } 246 247 func newAbsURLReplacer() *absURLReplacer { 248 return &absURLReplacer{ 249 htmlQuotes: [][]byte{[]byte("\""), []byte("'")}, 250 xmlQuotes: [][]byte{[]byte("""), []byte("'")}, 251 } 252 } 253 254 func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) { 255 doReplace(path, ct, au.htmlQuotes) 256 } 257 258 func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) { 259 doReplace(path, ct, au.xmlQuotes) 260 }