pageparser.go (5057B)
1 // Copyright 2019 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package pageparser 15 16 import ( 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 22 "github.com/gohugoio/hugo/parser/metadecoders" 23 ) 24 25 // Result holds the parse result. 26 type Result interface { 27 // Iterator returns a new Iterator positioned at the beginning of the parse tree. 28 Iterator() *Iterator 29 // Input returns the input to Parse. 30 Input() []byte 31 } 32 33 var _ Result = (*pageLexer)(nil) 34 35 // Parse parses the page in the given reader according to the given Config. 36 // TODO(bep) now that we have improved the "lazy order" init, it *may* be 37 // some potential saving in doing a buffered approach where the first pass does 38 // the frontmatter only. 39 func Parse(r io.Reader, cfg Config) (Result, error) { 40 return parseSection(r, cfg, lexIntroSection) 41 } 42 43 type ContentFrontMatter struct { 44 Content []byte 45 FrontMatter map[string]any 46 FrontMatterFormat metadecoders.Format 47 } 48 49 // ParseFrontMatterAndContent is a convenience method to extract front matter 50 // and content from a content page. 51 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { 52 var cf ContentFrontMatter 53 54 psr, err := Parse(r, Config{}) 55 if err != nil { 56 return cf, err 57 } 58 59 var frontMatterSource []byte 60 61 iter := psr.Iterator() 62 63 walkFn := func(item Item) bool { 64 if frontMatterSource != nil { 65 // The rest is content. 66 cf.Content = psr.Input()[item.Pos:] 67 // Done 68 return false 69 } else if item.IsFrontMatter() { 70 cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) 71 frontMatterSource = item.Val 72 } 73 return true 74 } 75 76 iter.PeekWalk(walkFn) 77 78 cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat) 79 return cf, err 80 } 81 82 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format { 83 switch typ { 84 case TypeFrontMatterJSON: 85 return metadecoders.JSON 86 case TypeFrontMatterORG: 87 return metadecoders.ORG 88 case TypeFrontMatterTOML: 89 return metadecoders.TOML 90 case TypeFrontMatterYAML: 91 return metadecoders.YAML 92 default: 93 return "" 94 } 95 } 96 97 // ParseMain parses starting with the main section. Used in tests. 98 func ParseMain(r io.Reader, cfg Config) (Result, error) { 99 return parseSection(r, cfg, lexMainSection) 100 } 101 102 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) { 103 b, err := ioutil.ReadAll(r) 104 if err != nil { 105 return nil, fmt.Errorf("failed to read page content: %w", err) 106 } 107 return parseBytes(b, cfg, start) 108 } 109 110 func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) { 111 lexer := newPageLexer(b, start, cfg) 112 lexer.run() 113 return lexer, nil 114 } 115 116 // An Iterator has methods to iterate a parsed page with support going back 117 // if needed. 118 type Iterator struct { 119 l *pageLexer 120 lastPos int // position of the last item returned by nextItem 121 } 122 123 // consumes and returns the next item 124 func (t *Iterator) Next() Item { 125 t.lastPos++ 126 return t.Current() 127 } 128 129 // Input returns the input source. 130 func (t *Iterator) Input() []byte { 131 return t.l.Input() 132 } 133 134 var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true} 135 136 // Current will repeatably return the current item. 137 func (t *Iterator) Current() Item { 138 if t.lastPos >= len(t.l.items) { 139 return errIndexOutOfBounds 140 } 141 return t.l.items[t.lastPos] 142 } 143 144 // backs up one token. 145 func (t *Iterator) Backup() { 146 if t.lastPos < 0 { 147 panic("need to go forward before going back") 148 } 149 t.lastPos-- 150 } 151 152 // Pos returns the current position in the input. 153 func (t *Iterator) Pos() int { 154 return t.lastPos 155 } 156 157 // check for non-error and non-EOF types coming next 158 func (t *Iterator) IsValueNext() bool { 159 i := t.Peek() 160 return i.Type != tError && i.Type != tEOF 161 } 162 163 // look at, but do not consume, the next item 164 // repeated, sequential calls will return the same item 165 func (t *Iterator) Peek() Item { 166 return t.l.items[t.lastPos+1] 167 } 168 169 // PeekWalk will feed the next items in the iterator to walkFn 170 // until it returns false. 171 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { 172 for i := t.lastPos + 1; i < len(t.l.items); i++ { 173 item := t.l.items[i] 174 if !walkFn(item) { 175 break 176 } 177 } 178 } 179 180 // Consume is a convenience method to consume the next n tokens, 181 // but back off Errors and EOF. 182 func (t *Iterator) Consume(cnt int) { 183 for i := 0; i < cnt; i++ { 184 token := t.Next() 185 if token.Type == tError || token.Type == tEOF { 186 t.Backup() 187 break 188 } 189 } 190 } 191 192 // LineNumber returns the current line number. Used for logging. 193 func (t *Iterator) LineNumber() int { 194 return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1 195 }