hugo

Fork of github.com/gohugoio/hugo with reverse pagination support

git clone git://git.shimmy1996.com/hugo.git

pageparser.go (5057B)

    1 // Copyright 2019 The Hugo Authors. All rights reserved.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 // http://www.apache.org/licenses/LICENSE-2.0
    7 //
    8 // Unless required by applicable law or agreed to in writing, software
    9 // distributed under the License is distributed on an "AS IS" BASIS,
   10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11 // See the License for the specific language governing permissions and
   12 // limitations under the License.
   13 
   14 package pageparser
   15 
   16 import (
   17 	"bytes"
   18 	"fmt"
   19 	"io"
   20 	"io/ioutil"
   21 
   22 	"github.com/gohugoio/hugo/parser/metadecoders"
   23 )
   24 
   25 // Result holds the parse result.
   26 type Result interface {
   27 	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
   28 	Iterator() *Iterator
   29 	// Input returns the input to Parse.
   30 	Input() []byte
   31 }
   32 
   33 var _ Result = (*pageLexer)(nil)
   34 
   35 // Parse parses the page in the given reader according to the given Config.
   36 // TODO(bep) now that we have improved the "lazy order" init, it *may* be
   37 // some potential saving in doing a buffered approach where the first pass does
   38 // the frontmatter only.
   39 func Parse(r io.Reader, cfg Config) (Result, error) {
   40 	return parseSection(r, cfg, lexIntroSection)
   41 }
   42 
   43 type ContentFrontMatter struct {
   44 	Content           []byte
   45 	FrontMatter       map[string]any
   46 	FrontMatterFormat metadecoders.Format
   47 }
   48 
   49 // ParseFrontMatterAndContent is a convenience method to extract front matter
   50 // and content from a content page.
   51 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
   52 	var cf ContentFrontMatter
   53 
   54 	psr, err := Parse(r, Config{})
   55 	if err != nil {
   56 		return cf, err
   57 	}
   58 
   59 	var frontMatterSource []byte
   60 
   61 	iter := psr.Iterator()
   62 
   63 	walkFn := func(item Item) bool {
   64 		if frontMatterSource != nil {
   65 			// The rest is content.
   66 			cf.Content = psr.Input()[item.Pos:]
   67 			// Done
   68 			return false
   69 		} else if item.IsFrontMatter() {
   70 			cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
   71 			frontMatterSource = item.Val
   72 		}
   73 		return true
   74 	}
   75 
   76 	iter.PeekWalk(walkFn)
   77 
   78 	cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
   79 	return cf, err
   80 }
   81 
   82 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
   83 	switch typ {
   84 	case TypeFrontMatterJSON:
   85 		return metadecoders.JSON
   86 	case TypeFrontMatterORG:
   87 		return metadecoders.ORG
   88 	case TypeFrontMatterTOML:
   89 		return metadecoders.TOML
   90 	case TypeFrontMatterYAML:
   91 		return metadecoders.YAML
   92 	default:
   93 		return ""
   94 	}
   95 }
   96 
   97 // ParseMain parses starting with the main section. Used in tests.
   98 func ParseMain(r io.Reader, cfg Config) (Result, error) {
   99 	return parseSection(r, cfg, lexMainSection)
  100 }
  101 
  102 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
  103 	b, err := ioutil.ReadAll(r)
  104 	if err != nil {
  105 		return nil, fmt.Errorf("failed to read page content: %w", err)
  106 	}
  107 	return parseBytes(b, cfg, start)
  108 }
  109 
  110 func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
  111 	lexer := newPageLexer(b, start, cfg)
  112 	lexer.run()
  113 	return lexer, nil
  114 }
  115 
  116 // An Iterator has methods to iterate a parsed page with support going back
  117 // if needed.
  118 type Iterator struct {
  119 	l       *pageLexer
  120 	lastPos int // position of the last item returned by nextItem
  121 }
  122 
  123 // consumes and returns the next item
  124 func (t *Iterator) Next() Item {
  125 	t.lastPos++
  126 	return t.Current()
  127 }
  128 
  129 // Input returns the input source.
  130 func (t *Iterator) Input() []byte {
  131 	return t.l.Input()
  132 }
  133 
  134 var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true}
  135 
  136 // Current will repeatably return the current item.
  137 func (t *Iterator) Current() Item {
  138 	if t.lastPos >= len(t.l.items) {
  139 		return errIndexOutOfBounds
  140 	}
  141 	return t.l.items[t.lastPos]
  142 }
  143 
  144 // backs up one token.
  145 func (t *Iterator) Backup() {
  146 	if t.lastPos < 0 {
  147 		panic("need to go forward before going back")
  148 	}
  149 	t.lastPos--
  150 }
  151 
  152 // Pos returns the current position in the input.
  153 func (t *Iterator) Pos() int {
  154 	return t.lastPos
  155 }
  156 
  157 // check for non-error and non-EOF types coming next
  158 func (t *Iterator) IsValueNext() bool {
  159 	i := t.Peek()
  160 	return i.Type != tError && i.Type != tEOF
  161 }
  162 
  163 // look at, but do not consume, the next item
  164 // repeated, sequential calls will return the same item
  165 func (t *Iterator) Peek() Item {
  166 	return t.l.items[t.lastPos+1]
  167 }
  168 
  169 // PeekWalk will feed the next items in the iterator to walkFn
  170 // until it returns false.
  171 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
  172 	for i := t.lastPos + 1; i < len(t.l.items); i++ {
  173 		item := t.l.items[i]
  174 		if !walkFn(item) {
  175 			break
  176 		}
  177 	}
  178 }
  179 
  180 // Consume is a convenience method to consume the next n tokens,
  181 // but back off Errors and EOF.
  182 func (t *Iterator) Consume(cnt int) {
  183 	for i := 0; i < cnt; i++ {
  184 		token := t.Next()
  185 		if token.Type == tError || token.Type == tEOF {
  186 			t.Backup()
  187 			break
  188 		}
  189 	}
  190 }
  191 
  192 // LineNumber returns the current line number. Used for logging.
  193 func (t *Iterator) LineNumber() int {
  194 	return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1
  195 }