hugo

Fork of github.com/gohugoio/hugo with reverse pagination support

git clone git://git.shimmy1996.com/hugo.git

import_jekyll.go (15674B)

    1 // Copyright 2019 The Hugo Authors. All rights reserved.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 // http://www.apache.org/licenses/LICENSE-2.0
    7 //
    8 // Unless required by applicable law or agreed to in writing, software
    9 // distributed under the License is distributed on an "AS IS" BASIS,
   10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11 // See the License for the specific language governing permissions and
   12 // limitations under the License.
   13 
   14 package commands
   15 
   16 import (
   17 	"bytes"
   18 	"errors"
   19 	"fmt"
   20 	"io/ioutil"
   21 	"os"
   22 	"path/filepath"
   23 	"regexp"
   24 	"strconv"
   25 	"strings"
   26 	"time"
   27 	"unicode"
   28 
   29 	"github.com/gohugoio/hugo/parser/pageparser"
   30 
   31 	"github.com/gohugoio/hugo/common/htime"
   32 	"github.com/gohugoio/hugo/common/hugio"
   33 
   34 	"github.com/gohugoio/hugo/parser/metadecoders"
   35 
   36 	"github.com/gohugoio/hugo/common/maps"
   37 	"github.com/gohugoio/hugo/helpers"
   38 	"github.com/gohugoio/hugo/hugofs"
   39 	"github.com/gohugoio/hugo/parser"
   40 	"github.com/spf13/afero"
   41 	"github.com/spf13/cobra"
   42 	jww "github.com/spf13/jwalterweatherman"
   43 )
   44 
   45 var _ cmder = (*importCmd)(nil)
   46 
   47 type importCmd struct {
   48 	*baseCmd
   49 }
   50 
   51 func newImportCmd() *importCmd {
   52 	cc := &importCmd{}
   53 
   54 	cc.baseCmd = newBaseCmd(&cobra.Command{
   55 		Use:   "import",
   56 		Short: "Import your site from others.",
   57 		Long: `Import your site from other web site generators like Jekyll.
   58 
   59 Import requires a subcommand, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
   60 		RunE: nil,
   61 	})
   62 
   63 	importJekyllCmd := &cobra.Command{
   64 		Use:   "jekyll",
   65 		Short: "hugo import from Jekyll",
   66 		Long: `hugo import from Jekyll.
   67 
   68 Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
   69 		RunE: cc.importFromJekyll,
   70 	}
   71 
   72 	importJekyllCmd.Flags().Bool("force", false, "allow import into non-empty target directory")
   73 
   74 	cc.cmd.AddCommand(importJekyllCmd)
   75 
   76 	return cc
   77 }
   78 
   79 func (i *importCmd) importFromJekyll(cmd *cobra.Command, args []string) error {
   80 	if len(args) < 2 {
   81 		return newUserError(`import from jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.")
   82 	}
   83 
   84 	jekyllRoot, err := filepath.Abs(filepath.Clean(args[0]))
   85 	if err != nil {
   86 		return newUserError("path error:", args[0])
   87 	}
   88 
   89 	targetDir, err := filepath.Abs(filepath.Clean(args[1]))
   90 	if err != nil {
   91 		return newUserError("path error:", args[1])
   92 	}
   93 
   94 	jww.INFO.Println("Import Jekyll from:", jekyllRoot, "to:", targetDir)
   95 
   96 	if strings.HasPrefix(filepath.Dir(targetDir), jekyllRoot) {
   97 		return newUserError("abort: target path should not be inside the Jekyll root")
   98 	}
   99 
  100 	forceImport, _ := cmd.Flags().GetBool("force")
  101 
  102 	fs := afero.NewOsFs()
  103 	jekyllPostDirs, hasAnyPost := i.getJekyllDirInfo(fs, jekyllRoot)
  104 	if !hasAnyPost {
  105 		return errors.New("abort: jekyll root contains neither posts nor drafts")
  106 	}
  107 
  108 	err = i.createSiteFromJekyll(jekyllRoot, targetDir, jekyllPostDirs, forceImport)
  109 
  110 	if err != nil {
  111 		return newUserError(err)
  112 	}
  113 
  114 	jww.FEEDBACK.Println("Importing...")
  115 
  116 	fileCount := 0
  117 	callback := func(path string, fi hugofs.FileMetaInfo, err error) error {
  118 		if err != nil {
  119 			return err
  120 		}
  121 
  122 		if fi.IsDir() {
  123 			return nil
  124 		}
  125 
  126 		relPath, err := filepath.Rel(jekyllRoot, path)
  127 		if err != nil {
  128 			return newUserError("get rel path error:", path)
  129 		}
  130 
  131 		relPath = filepath.ToSlash(relPath)
  132 		draft := false
  133 
  134 		switch {
  135 		case strings.Contains(relPath, "_posts/"):
  136 			relPath = filepath.Join("content/post", strings.Replace(relPath, "_posts/", "", -1))
  137 		case strings.Contains(relPath, "_drafts/"):
  138 			relPath = filepath.Join("content/draft", strings.Replace(relPath, "_drafts/", "", -1))
  139 			draft = true
  140 		default:
  141 			return nil
  142 		}
  143 
  144 		fileCount++
  145 		return convertJekyllPost(path, relPath, targetDir, draft)
  146 	}
  147 
  148 	for jekyllPostDir, hasAnyPostInDir := range jekyllPostDirs {
  149 		if hasAnyPostInDir {
  150 			if err = helpers.SymbolicWalk(hugofs.Os, filepath.Join(jekyllRoot, jekyllPostDir), callback); err != nil {
  151 				return err
  152 			}
  153 		}
  154 	}
  155 
  156 	jww.FEEDBACK.Println("Congratulations!", fileCount, "post(s) imported!")
  157 	jww.FEEDBACK.Println("Now, start Hugo by yourself:\n" +
  158 		"$ git clone https://github.com/spf13/herring-cove.git " + args[1] + "/themes/herring-cove")
  159 	jww.FEEDBACK.Println("$ cd " + args[1] + "\n$ hugo server --theme=herring-cove")
  160 
  161 	return nil
  162 }
  163 
  164 func (i *importCmd) getJekyllDirInfo(fs afero.Fs, jekyllRoot string) (map[string]bool, bool) {
  165 	postDirs := make(map[string]bool)
  166 	hasAnyPost := false
  167 	if entries, err := ioutil.ReadDir(jekyllRoot); err == nil {
  168 		for _, entry := range entries {
  169 			if entry.IsDir() {
  170 				subDir := filepath.Join(jekyllRoot, entry.Name())
  171 				if isPostDir, hasAnyPostInDir := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
  172 					postDirs[entry.Name()] = hasAnyPostInDir
  173 					if hasAnyPostInDir {
  174 						hasAnyPost = true
  175 					}
  176 				}
  177 			}
  178 		}
  179 	}
  180 	return postDirs, hasAnyPost
  181 }
  182 
  183 func (i *importCmd) retrieveJekyllPostDir(fs afero.Fs, dir string) (bool, bool) {
  184 	if strings.HasSuffix(dir, "_posts") || strings.HasSuffix(dir, "_drafts") {
  185 		isEmpty, _ := helpers.IsEmpty(dir, fs)
  186 		return true, !isEmpty
  187 	}
  188 
  189 	if entries, err := ioutil.ReadDir(dir); err == nil {
  190 		for _, entry := range entries {
  191 			if entry.IsDir() {
  192 				subDir := filepath.Join(dir, entry.Name())
  193 				if isPostDir, hasAnyPost := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
  194 					return isPostDir, hasAnyPost
  195 				}
  196 			}
  197 		}
  198 	}
  199 
  200 	return false, true
  201 }
  202 
  203 func (i *importCmd) createSiteFromJekyll(jekyllRoot, targetDir string, jekyllPostDirs map[string]bool, force bool) error {
  204 	fs := &afero.OsFs{}
  205 	if exists, _ := helpers.Exists(targetDir, fs); exists {
  206 		if isDir, _ := helpers.IsDir(targetDir, fs); !isDir {
  207 			return errors.New("target path \"" + targetDir + "\" exists but is not a directory")
  208 		}
  209 
  210 		isEmpty, _ := helpers.IsEmpty(targetDir, fs)
  211 
  212 		if !isEmpty && !force {
  213 			return errors.New("target path \"" + targetDir + "\" exists and is not empty")
  214 		}
  215 	}
  216 
  217 	jekyllConfig := i.loadJekyllConfig(fs, jekyllRoot)
  218 
  219 	mkdir(targetDir, "layouts")
  220 	mkdir(targetDir, "content")
  221 	mkdir(targetDir, "archetypes")
  222 	mkdir(targetDir, "static")
  223 	mkdir(targetDir, "data")
  224 	mkdir(targetDir, "themes")
  225 
  226 	i.createConfigFromJekyll(fs, targetDir, "yaml", jekyllConfig)
  227 
  228 	i.copyJekyllFilesAndFolders(jekyllRoot, filepath.Join(targetDir, "static"), jekyllPostDirs)
  229 
  230 	return nil
  231 }
  232 
  233 func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]any {
  234 	path := filepath.Join(jekyllRoot, "_config.yml")
  235 
  236 	exists, err := helpers.Exists(path, fs)
  237 
  238 	if err != nil || !exists {
  239 		jww.WARN.Println("_config.yaml not found: Is the specified Jekyll root correct?")
  240 		return nil
  241 	}
  242 
  243 	f, err := fs.Open(path)
  244 	if err != nil {
  245 		return nil
  246 	}
  247 
  248 	defer f.Close()
  249 
  250 	b, err := ioutil.ReadAll(f)
  251 	if err != nil {
  252 		return nil
  253 	}
  254 
  255 	c, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.YAML)
  256 	if err != nil {
  257 		return nil
  258 	}
  259 
  260 	return c
  261 }
  262 
  263 func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind metadecoders.Format, jekyllConfig map[string]any) (err error) {
  264 	title := "My New Hugo Site"
  265 	baseURL := "http://example.org/"
  266 
  267 	for key, value := range jekyllConfig {
  268 		lowerKey := strings.ToLower(key)
  269 
  270 		switch lowerKey {
  271 		case "title":
  272 			if str, ok := value.(string); ok {
  273 				title = str
  274 			}
  275 
  276 		case "url":
  277 			if str, ok := value.(string); ok {
  278 				baseURL = str
  279 			}
  280 		}
  281 	}
  282 
  283 	in := map[string]any{
  284 		"baseURL":            baseURL,
  285 		"title":              title,
  286 		"languageCode":       "en-us",
  287 		"disablePathToLower": true,
  288 	}
  289 
  290 	var buf bytes.Buffer
  291 	err = parser.InterfaceToConfig(in, kind, &buf)
  292 	if err != nil {
  293 		return err
  294 	}
  295 
  296 	return helpers.WriteToDisk(filepath.Join(inpath, "config."+string(kind)), &buf, fs)
  297 }
  298 
  299 func (i *importCmd) copyJekyllFilesAndFolders(jekyllRoot, dest string, jekyllPostDirs map[string]bool) (err error) {
  300 	fs := hugofs.Os
  301 
  302 	fi, err := fs.Stat(jekyllRoot)
  303 	if err != nil {
  304 		return err
  305 	}
  306 	if !fi.IsDir() {
  307 		return errors.New(jekyllRoot + " is not a directory")
  308 	}
  309 	err = os.MkdirAll(dest, fi.Mode())
  310 	if err != nil {
  311 		return err
  312 	}
  313 	entries, err := ioutil.ReadDir(jekyllRoot)
  314 	if err != nil {
  315 		return err
  316 	}
  317 
  318 	for _, entry := range entries {
  319 		sfp := filepath.Join(jekyllRoot, entry.Name())
  320 		dfp := filepath.Join(dest, entry.Name())
  321 		if entry.IsDir() {
  322 			if entry.Name()[0] != '_' && entry.Name()[0] != '.' {
  323 				if _, ok := jekyllPostDirs[entry.Name()]; !ok {
  324 					err = hugio.CopyDir(fs, sfp, dfp, nil)
  325 					if err != nil {
  326 						jww.ERROR.Println(err)
  327 					}
  328 				}
  329 			}
  330 		} else {
  331 			lowerEntryName := strings.ToLower(entry.Name())
  332 			exceptSuffix := []string{
  333 				".md", ".markdown", ".html", ".htm",
  334 				".xml", ".textile", "rakefile", "gemfile", ".lock",
  335 			}
  336 			isExcept := false
  337 			for _, suffix := range exceptSuffix {
  338 				if strings.HasSuffix(lowerEntryName, suffix) {
  339 					isExcept = true
  340 					break
  341 				}
  342 			}
  343 
  344 			if !isExcept && entry.Name()[0] != '.' && entry.Name()[0] != '_' {
  345 				err = hugio.CopyFile(fs, sfp, dfp)
  346 				if err != nil {
  347 					jww.ERROR.Println(err)
  348 				}
  349 			}
  350 		}
  351 
  352 	}
  353 	return nil
  354 }
  355 
  356 func parseJekyllFilename(filename string) (time.Time, string, error) {
  357 	re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`)
  358 	r := re.FindAllStringSubmatch(filename, -1)
  359 	if len(r) == 0 {
  360 		return htime.Now(), "", errors.New("filename not match")
  361 	}
  362 
  363 	postDate, err := time.Parse("2006-1-2", r[0][1])
  364 	if err != nil {
  365 		return htime.Now(), "", err
  366 	}
  367 
  368 	postName := r[0][2]
  369 
  370 	return postDate, postName, nil
  371 }
  372 
  373 func convertJekyllPost(path, relPath, targetDir string, draft bool) error {
  374 	jww.TRACE.Println("Converting", path)
  375 
  376 	filename := filepath.Base(path)
  377 	postDate, postName, err := parseJekyllFilename(filename)
  378 	if err != nil {
  379 		jww.WARN.Printf("Failed to parse filename '%s': %s. Skipping.", filename, err)
  380 		return nil
  381 	}
  382 
  383 	jww.TRACE.Println(filename, postDate, postName)
  384 
  385 	targetFile := filepath.Join(targetDir, relPath)
  386 	targetParentDir := filepath.Dir(targetFile)
  387 	os.MkdirAll(targetParentDir, 0777)
  388 
  389 	contentBytes, err := ioutil.ReadFile(path)
  390 	if err != nil {
  391 		jww.ERROR.Println("Read file error:", path)
  392 		return err
  393 	}
  394 
  395 	pf, err := pageparser.ParseFrontMatterAndContent(bytes.NewReader(contentBytes))
  396 	if err != nil {
  397 		jww.ERROR.Println("Parse file error:", path)
  398 		return err
  399 	}
  400 
  401 	newmetadata, err := convertJekyllMetaData(pf.FrontMatter, postName, postDate, draft)
  402 	if err != nil {
  403 		jww.ERROR.Println("Convert metadata error:", path)
  404 		return err
  405 	}
  406 
  407 	content, err := convertJekyllContent(newmetadata, string(pf.Content))
  408 	if err != nil {
  409 		jww.ERROR.Println("Converting Jekyll error:", path)
  410 		return err
  411 	}
  412 
  413 	fs := hugofs.Os
  414 	if err := helpers.WriteToDisk(targetFile, strings.NewReader(content), fs); err != nil {
  415 		return fmt.Errorf("failed to save file %q: %s", filename, err)
  416 	}
  417 
  418 	return nil
  419 }
  420 
  421 func convertJekyllMetaData(m any, postName string, postDate time.Time, draft bool) (any, error) {
  422 	metadata, err := maps.ToStringMapE(m)
  423 	if err != nil {
  424 		return nil, err
  425 	}
  426 
  427 	if draft {
  428 		metadata["draft"] = true
  429 	}
  430 
  431 	for key, value := range metadata {
  432 		lowerKey := strings.ToLower(key)
  433 
  434 		switch lowerKey {
  435 		case "layout":
  436 			delete(metadata, key)
  437 		case "permalink":
  438 			if str, ok := value.(string); ok {
  439 				metadata["url"] = str
  440 			}
  441 			delete(metadata, key)
  442 		case "category":
  443 			if str, ok := value.(string); ok {
  444 				metadata["categories"] = []string{str}
  445 			}
  446 			delete(metadata, key)
  447 		case "excerpt_separator":
  448 			if key != lowerKey {
  449 				delete(metadata, key)
  450 				metadata[lowerKey] = value
  451 			}
  452 		case "date":
  453 			if str, ok := value.(string); ok {
  454 				re := regexp.MustCompile(`(\d+):(\d+):(\d+)`)
  455 				r := re.FindAllStringSubmatch(str, -1)
  456 				if len(r) > 0 {
  457 					hour, _ := strconv.Atoi(r[0][1])
  458 					minute, _ := strconv.Atoi(r[0][2])
  459 					second, _ := strconv.Atoi(r[0][3])
  460 					postDate = time.Date(postDate.Year(), postDate.Month(), postDate.Day(), hour, minute, second, 0, time.UTC)
  461 				}
  462 			}
  463 			delete(metadata, key)
  464 		}
  465 
  466 	}
  467 
  468 	metadata["date"] = postDate.Format(time.RFC3339)
  469 
  470 	return metadata, nil
  471 }
  472 
  473 func convertJekyllContent(m any, content string) (string, error) {
  474 	metadata, _ := maps.ToStringMapE(m)
  475 
  476 	lines := strings.Split(content, "\n")
  477 	var resultLines []string
  478 	for _, line := range lines {
  479 		resultLines = append(resultLines, strings.Trim(line, "\r\n"))
  480 	}
  481 
  482 	content = strings.Join(resultLines, "\n")
  483 
  484 	excerptSep := "<!--more-->"
  485 	if value, ok := metadata["excerpt_separator"]; ok {
  486 		if str, strOk := value.(string); strOk {
  487 			content = strings.Replace(content, strings.TrimSpace(str), excerptSep, -1)
  488 		}
  489 	}
  490 
  491 	replaceList := []struct {
  492 		re      *regexp.Regexp
  493 		replace string
  494 	}{
  495 		{regexp.MustCompile("(?i)<!-- more -->"), "<!--more-->"},
  496 		{regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"},
  497 		{regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"},
  498 	}
  499 
  500 	for _, replace := range replaceList {
  501 		content = replace.re.ReplaceAllString(content, replace.replace)
  502 	}
  503 
  504 	replaceListFunc := []struct {
  505 		re      *regexp.Regexp
  506 		replace func(string) string
  507 	}{
  508 		// Octopress image tag: http://octopress.org/docs/plugins/image-tag/
  509 		{regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag},
  510 		{regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), replaceHighlightTag},
  511 	}
  512 
  513 	for _, replace := range replaceListFunc {
  514 		content = replace.re.ReplaceAllStringFunc(content, replace.replace)
  515 	}
  516 
  517 	var buf bytes.Buffer
  518 	if len(metadata) != 0 {
  519 		err := parser.InterfaceToFrontMatter(m, metadecoders.YAML, &buf)
  520 		if err != nil {
  521 			return "", err
  522 		}
  523 	}
  524 	buf.WriteString(content)
  525 
  526 	return buf.String(), nil
  527 }
  528 
  529 func replaceHighlightTag(match string) string {
  530 	r := regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`)
  531 	parts := r.FindStringSubmatch(match)
  532 	lastQuote := rune(0)
  533 	f := func(c rune) bool {
  534 		switch {
  535 		case c == lastQuote:
  536 			lastQuote = rune(0)
  537 			return false
  538 		case lastQuote != rune(0):
  539 			return false
  540 		case unicode.In(c, unicode.Quotation_Mark):
  541 			lastQuote = c
  542 			return false
  543 		default:
  544 			return unicode.IsSpace(c)
  545 		}
  546 	}
  547 	// splitting string by space but considering quoted section
  548 	items := strings.FieldsFunc(parts[1], f)
  549 
  550 	result := bytes.NewBufferString("{{< highlight ")
  551 	result.WriteString(items[0]) // language
  552 	options := items[1:]
  553 	for i, opt := range options {
  554 		opt = strings.Replace(opt, "\"", "", -1)
  555 		if opt == "linenos" {
  556 			opt = "linenos=table"
  557 		}
  558 		if i == 0 {
  559 			opt = " \"" + opt
  560 		}
  561 		if i < len(options)-1 {
  562 			opt += ","
  563 		} else if i == len(options)-1 {
  564 			opt += "\""
  565 		}
  566 		result.WriteString(opt)
  567 	}
  568 
  569 	result.WriteString(" >}}")
  570 	return result.String()
  571 }
  572 
  573 func replaceImageTag(match string) string {
  574 	r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`)
  575 	result := bytes.NewBufferString("{{< figure ")
  576 	parts := r.FindStringSubmatch(match)
  577 	// Index 0 is the entire string, ignore
  578 	replaceOptionalPart(result, "class", parts[1])
  579 	replaceOptionalPart(result, "src", parts[2])
  580 	replaceOptionalPart(result, "width", parts[3])
  581 	replaceOptionalPart(result, "height", parts[4])
  582 	// title + alt
  583 	part := parts[5]
  584 	if len(part) > 0 {
  585 		splits := strings.Split(part, "'")
  586 		lenSplits := len(splits)
  587 		if lenSplits == 1 {
  588 			replaceOptionalPart(result, "title", splits[0])
  589 		} else if lenSplits == 3 {
  590 			replaceOptionalPart(result, "title", splits[1])
  591 		} else if lenSplits == 5 {
  592 			replaceOptionalPart(result, "title", splits[1])
  593 			replaceOptionalPart(result, "alt", splits[3])
  594 		}
  595 	}
  596 	result.WriteString(">}}")
  597 	return result.String()
  598 }
  599 
  600 func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) {
  601 	if len(part) > 0 {
  602 		buffer.WriteString(partName + "=\"" + part + "\" ")
  603 	}
  604 }