import_jekyll.go (15674B)
1 // Copyright 2019 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package commands
15
16 import (
17 "bytes"
18 "errors"
19 "fmt"
20 "io/ioutil"
21 "os"
22 "path/filepath"
23 "regexp"
24 "strconv"
25 "strings"
26 "time"
27 "unicode"
28
29 "github.com/gohugoio/hugo/parser/pageparser"
30
31 "github.com/gohugoio/hugo/common/htime"
32 "github.com/gohugoio/hugo/common/hugio"
33
34 "github.com/gohugoio/hugo/parser/metadecoders"
35
36 "github.com/gohugoio/hugo/common/maps"
37 "github.com/gohugoio/hugo/helpers"
38 "github.com/gohugoio/hugo/hugofs"
39 "github.com/gohugoio/hugo/parser"
40 "github.com/spf13/afero"
41 "github.com/spf13/cobra"
42 jww "github.com/spf13/jwalterweatherman"
43 )
44
45 var _ cmder = (*importCmd)(nil)
46
47 type importCmd struct {
48 *baseCmd
49 }
50
51 func newImportCmd() *importCmd {
52 cc := &importCmd{}
53
54 cc.baseCmd = newBaseCmd(&cobra.Command{
55 Use: "import",
56 Short: "Import your site from others.",
57 Long: `Import your site from other web site generators like Jekyll.
58
59 Import requires a subcommand, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
60 RunE: nil,
61 })
62
63 importJekyllCmd := &cobra.Command{
64 Use: "jekyll",
65 Short: "hugo import from Jekyll",
66 Long: `hugo import from Jekyll.
67
68 Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
69 RunE: cc.importFromJekyll,
70 }
71
72 importJekyllCmd.Flags().Bool("force", false, "allow import into non-empty target directory")
73
74 cc.cmd.AddCommand(importJekyllCmd)
75
76 return cc
77 }
78
79 func (i *importCmd) importFromJekyll(cmd *cobra.Command, args []string) error {
80 if len(args) < 2 {
81 return newUserError(`import from jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.")
82 }
83
84 jekyllRoot, err := filepath.Abs(filepath.Clean(args[0]))
85 if err != nil {
86 return newUserError("path error:", args[0])
87 }
88
89 targetDir, err := filepath.Abs(filepath.Clean(args[1]))
90 if err != nil {
91 return newUserError("path error:", args[1])
92 }
93
94 jww.INFO.Println("Import Jekyll from:", jekyllRoot, "to:", targetDir)
95
96 if strings.HasPrefix(filepath.Dir(targetDir), jekyllRoot) {
97 return newUserError("abort: target path should not be inside the Jekyll root")
98 }
99
100 forceImport, _ := cmd.Flags().GetBool("force")
101
102 fs := afero.NewOsFs()
103 jekyllPostDirs, hasAnyPost := i.getJekyllDirInfo(fs, jekyllRoot)
104 if !hasAnyPost {
105 return errors.New("abort: jekyll root contains neither posts nor drafts")
106 }
107
108 err = i.createSiteFromJekyll(jekyllRoot, targetDir, jekyllPostDirs, forceImport)
109
110 if err != nil {
111 return newUserError(err)
112 }
113
114 jww.FEEDBACK.Println("Importing...")
115
116 fileCount := 0
117 callback := func(path string, fi hugofs.FileMetaInfo, err error) error {
118 if err != nil {
119 return err
120 }
121
122 if fi.IsDir() {
123 return nil
124 }
125
126 relPath, err := filepath.Rel(jekyllRoot, path)
127 if err != nil {
128 return newUserError("get rel path error:", path)
129 }
130
131 relPath = filepath.ToSlash(relPath)
132 draft := false
133
134 switch {
135 case strings.Contains(relPath, "_posts/"):
136 relPath = filepath.Join("content/post", strings.Replace(relPath, "_posts/", "", -1))
137 case strings.Contains(relPath, "_drafts/"):
138 relPath = filepath.Join("content/draft", strings.Replace(relPath, "_drafts/", "", -1))
139 draft = true
140 default:
141 return nil
142 }
143
144 fileCount++
145 return convertJekyllPost(path, relPath, targetDir, draft)
146 }
147
148 for jekyllPostDir, hasAnyPostInDir := range jekyllPostDirs {
149 if hasAnyPostInDir {
150 if err = helpers.SymbolicWalk(hugofs.Os, filepath.Join(jekyllRoot, jekyllPostDir), callback); err != nil {
151 return err
152 }
153 }
154 }
155
156 jww.FEEDBACK.Println("Congratulations!", fileCount, "post(s) imported!")
157 jww.FEEDBACK.Println("Now, start Hugo by yourself:\n" +
158 "$ git clone https://github.com/spf13/herring-cove.git " + args[1] + "/themes/herring-cove")
159 jww.FEEDBACK.Println("$ cd " + args[1] + "\n$ hugo server --theme=herring-cove")
160
161 return nil
162 }
163
164 func (i *importCmd) getJekyllDirInfo(fs afero.Fs, jekyllRoot string) (map[string]bool, bool) {
165 postDirs := make(map[string]bool)
166 hasAnyPost := false
167 if entries, err := ioutil.ReadDir(jekyllRoot); err == nil {
168 for _, entry := range entries {
169 if entry.IsDir() {
170 subDir := filepath.Join(jekyllRoot, entry.Name())
171 if isPostDir, hasAnyPostInDir := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
172 postDirs[entry.Name()] = hasAnyPostInDir
173 if hasAnyPostInDir {
174 hasAnyPost = true
175 }
176 }
177 }
178 }
179 }
180 return postDirs, hasAnyPost
181 }
182
183 func (i *importCmd) retrieveJekyllPostDir(fs afero.Fs, dir string) (bool, bool) {
184 if strings.HasSuffix(dir, "_posts") || strings.HasSuffix(dir, "_drafts") {
185 isEmpty, _ := helpers.IsEmpty(dir, fs)
186 return true, !isEmpty
187 }
188
189 if entries, err := ioutil.ReadDir(dir); err == nil {
190 for _, entry := range entries {
191 if entry.IsDir() {
192 subDir := filepath.Join(dir, entry.Name())
193 if isPostDir, hasAnyPost := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
194 return isPostDir, hasAnyPost
195 }
196 }
197 }
198 }
199
200 return false, true
201 }
202
203 func (i *importCmd) createSiteFromJekyll(jekyllRoot, targetDir string, jekyllPostDirs map[string]bool, force bool) error {
204 fs := &afero.OsFs{}
205 if exists, _ := helpers.Exists(targetDir, fs); exists {
206 if isDir, _ := helpers.IsDir(targetDir, fs); !isDir {
207 return errors.New("target path \"" + targetDir + "\" exists but is not a directory")
208 }
209
210 isEmpty, _ := helpers.IsEmpty(targetDir, fs)
211
212 if !isEmpty && !force {
213 return errors.New("target path \"" + targetDir + "\" exists and is not empty")
214 }
215 }
216
217 jekyllConfig := i.loadJekyllConfig(fs, jekyllRoot)
218
219 mkdir(targetDir, "layouts")
220 mkdir(targetDir, "content")
221 mkdir(targetDir, "archetypes")
222 mkdir(targetDir, "static")
223 mkdir(targetDir, "data")
224 mkdir(targetDir, "themes")
225
226 i.createConfigFromJekyll(fs, targetDir, "yaml", jekyllConfig)
227
228 i.copyJekyllFilesAndFolders(jekyllRoot, filepath.Join(targetDir, "static"), jekyllPostDirs)
229
230 return nil
231 }
232
233 func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]any {
234 path := filepath.Join(jekyllRoot, "_config.yml")
235
236 exists, err := helpers.Exists(path, fs)
237
238 if err != nil || !exists {
239 jww.WARN.Println("_config.yaml not found: Is the specified Jekyll root correct?")
240 return nil
241 }
242
243 f, err := fs.Open(path)
244 if err != nil {
245 return nil
246 }
247
248 defer f.Close()
249
250 b, err := ioutil.ReadAll(f)
251 if err != nil {
252 return nil
253 }
254
255 c, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.YAML)
256 if err != nil {
257 return nil
258 }
259
260 return c
261 }
262
263 func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind metadecoders.Format, jekyllConfig map[string]any) (err error) {
264 title := "My New Hugo Site"
265 baseURL := "http://example.org/"
266
267 for key, value := range jekyllConfig {
268 lowerKey := strings.ToLower(key)
269
270 switch lowerKey {
271 case "title":
272 if str, ok := value.(string); ok {
273 title = str
274 }
275
276 case "url":
277 if str, ok := value.(string); ok {
278 baseURL = str
279 }
280 }
281 }
282
283 in := map[string]any{
284 "baseURL": baseURL,
285 "title": title,
286 "languageCode": "en-us",
287 "disablePathToLower": true,
288 }
289
290 var buf bytes.Buffer
291 err = parser.InterfaceToConfig(in, kind, &buf)
292 if err != nil {
293 return err
294 }
295
296 return helpers.WriteToDisk(filepath.Join(inpath, "config."+string(kind)), &buf, fs)
297 }
298
299 func (i *importCmd) copyJekyllFilesAndFolders(jekyllRoot, dest string, jekyllPostDirs map[string]bool) (err error) {
300 fs := hugofs.Os
301
302 fi, err := fs.Stat(jekyllRoot)
303 if err != nil {
304 return err
305 }
306 if !fi.IsDir() {
307 return errors.New(jekyllRoot + " is not a directory")
308 }
309 err = os.MkdirAll(dest, fi.Mode())
310 if err != nil {
311 return err
312 }
313 entries, err := ioutil.ReadDir(jekyllRoot)
314 if err != nil {
315 return err
316 }
317
318 for _, entry := range entries {
319 sfp := filepath.Join(jekyllRoot, entry.Name())
320 dfp := filepath.Join(dest, entry.Name())
321 if entry.IsDir() {
322 if entry.Name()[0] != '_' && entry.Name()[0] != '.' {
323 if _, ok := jekyllPostDirs[entry.Name()]; !ok {
324 err = hugio.CopyDir(fs, sfp, dfp, nil)
325 if err != nil {
326 jww.ERROR.Println(err)
327 }
328 }
329 }
330 } else {
331 lowerEntryName := strings.ToLower(entry.Name())
332 exceptSuffix := []string{
333 ".md", ".markdown", ".html", ".htm",
334 ".xml", ".textile", "rakefile", "gemfile", ".lock",
335 }
336 isExcept := false
337 for _, suffix := range exceptSuffix {
338 if strings.HasSuffix(lowerEntryName, suffix) {
339 isExcept = true
340 break
341 }
342 }
343
344 if !isExcept && entry.Name()[0] != '.' && entry.Name()[0] != '_' {
345 err = hugio.CopyFile(fs, sfp, dfp)
346 if err != nil {
347 jww.ERROR.Println(err)
348 }
349 }
350 }
351
352 }
353 return nil
354 }
355
356 func parseJekyllFilename(filename string) (time.Time, string, error) {
357 re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`)
358 r := re.FindAllStringSubmatch(filename, -1)
359 if len(r) == 0 {
360 return htime.Now(), "", errors.New("filename not match")
361 }
362
363 postDate, err := time.Parse("2006-1-2", r[0][1])
364 if err != nil {
365 return htime.Now(), "", err
366 }
367
368 postName := r[0][2]
369
370 return postDate, postName, nil
371 }
372
373 func convertJekyllPost(path, relPath, targetDir string, draft bool) error {
374 jww.TRACE.Println("Converting", path)
375
376 filename := filepath.Base(path)
377 postDate, postName, err := parseJekyllFilename(filename)
378 if err != nil {
379 jww.WARN.Printf("Failed to parse filename '%s': %s. Skipping.", filename, err)
380 return nil
381 }
382
383 jww.TRACE.Println(filename, postDate, postName)
384
385 targetFile := filepath.Join(targetDir, relPath)
386 targetParentDir := filepath.Dir(targetFile)
387 os.MkdirAll(targetParentDir, 0777)
388
389 contentBytes, err := ioutil.ReadFile(path)
390 if err != nil {
391 jww.ERROR.Println("Read file error:", path)
392 return err
393 }
394
395 pf, err := pageparser.ParseFrontMatterAndContent(bytes.NewReader(contentBytes))
396 if err != nil {
397 jww.ERROR.Println("Parse file error:", path)
398 return err
399 }
400
401 newmetadata, err := convertJekyllMetaData(pf.FrontMatter, postName, postDate, draft)
402 if err != nil {
403 jww.ERROR.Println("Convert metadata error:", path)
404 return err
405 }
406
407 content, err := convertJekyllContent(newmetadata, string(pf.Content))
408 if err != nil {
409 jww.ERROR.Println("Converting Jekyll error:", path)
410 return err
411 }
412
413 fs := hugofs.Os
414 if err := helpers.WriteToDisk(targetFile, strings.NewReader(content), fs); err != nil {
415 return fmt.Errorf("failed to save file %q: %s", filename, err)
416 }
417
418 return nil
419 }
420
421 func convertJekyllMetaData(m any, postName string, postDate time.Time, draft bool) (any, error) {
422 metadata, err := maps.ToStringMapE(m)
423 if err != nil {
424 return nil, err
425 }
426
427 if draft {
428 metadata["draft"] = true
429 }
430
431 for key, value := range metadata {
432 lowerKey := strings.ToLower(key)
433
434 switch lowerKey {
435 case "layout":
436 delete(metadata, key)
437 case "permalink":
438 if str, ok := value.(string); ok {
439 metadata["url"] = str
440 }
441 delete(metadata, key)
442 case "category":
443 if str, ok := value.(string); ok {
444 metadata["categories"] = []string{str}
445 }
446 delete(metadata, key)
447 case "excerpt_separator":
448 if key != lowerKey {
449 delete(metadata, key)
450 metadata[lowerKey] = value
451 }
452 case "date":
453 if str, ok := value.(string); ok {
454 re := regexp.MustCompile(`(\d+):(\d+):(\d+)`)
455 r := re.FindAllStringSubmatch(str, -1)
456 if len(r) > 0 {
457 hour, _ := strconv.Atoi(r[0][1])
458 minute, _ := strconv.Atoi(r[0][2])
459 second, _ := strconv.Atoi(r[0][3])
460 postDate = time.Date(postDate.Year(), postDate.Month(), postDate.Day(), hour, minute, second, 0, time.UTC)
461 }
462 }
463 delete(metadata, key)
464 }
465
466 }
467
468 metadata["date"] = postDate.Format(time.RFC3339)
469
470 return metadata, nil
471 }
472
473 func convertJekyllContent(m any, content string) (string, error) {
474 metadata, _ := maps.ToStringMapE(m)
475
476 lines := strings.Split(content, "\n")
477 var resultLines []string
478 for _, line := range lines {
479 resultLines = append(resultLines, strings.Trim(line, "\r\n"))
480 }
481
482 content = strings.Join(resultLines, "\n")
483
484 excerptSep := "<!--more-->"
485 if value, ok := metadata["excerpt_separator"]; ok {
486 if str, strOk := value.(string); strOk {
487 content = strings.Replace(content, strings.TrimSpace(str), excerptSep, -1)
488 }
489 }
490
491 replaceList := []struct {
492 re *regexp.Regexp
493 replace string
494 }{
495 {regexp.MustCompile("(?i)<!-- more -->"), "<!--more-->"},
496 {regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"},
497 {regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"},
498 }
499
500 for _, replace := range replaceList {
501 content = replace.re.ReplaceAllString(content, replace.replace)
502 }
503
504 replaceListFunc := []struct {
505 re *regexp.Regexp
506 replace func(string) string
507 }{
508 // Octopress image tag: http://octopress.org/docs/plugins/image-tag/
509 {regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag},
510 {regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), replaceHighlightTag},
511 }
512
513 for _, replace := range replaceListFunc {
514 content = replace.re.ReplaceAllStringFunc(content, replace.replace)
515 }
516
517 var buf bytes.Buffer
518 if len(metadata) != 0 {
519 err := parser.InterfaceToFrontMatter(m, metadecoders.YAML, &buf)
520 if err != nil {
521 return "", err
522 }
523 }
524 buf.WriteString(content)
525
526 return buf.String(), nil
527 }
528
529 func replaceHighlightTag(match string) string {
530 r := regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`)
531 parts := r.FindStringSubmatch(match)
532 lastQuote := rune(0)
533 f := func(c rune) bool {
534 switch {
535 case c == lastQuote:
536 lastQuote = rune(0)
537 return false
538 case lastQuote != rune(0):
539 return false
540 case unicode.In(c, unicode.Quotation_Mark):
541 lastQuote = c
542 return false
543 default:
544 return unicode.IsSpace(c)
545 }
546 }
547 // splitting string by space but considering quoted section
548 items := strings.FieldsFunc(parts[1], f)
549
550 result := bytes.NewBufferString("{{< highlight ")
551 result.WriteString(items[0]) // language
552 options := items[1:]
553 for i, opt := range options {
554 opt = strings.Replace(opt, "\"", "", -1)
555 if opt == "linenos" {
556 opt = "linenos=table"
557 }
558 if i == 0 {
559 opt = " \"" + opt
560 }
561 if i < len(options)-1 {
562 opt += ","
563 } else if i == len(options)-1 {
564 opt += "\""
565 }
566 result.WriteString(opt)
567 }
568
569 result.WriteString(" >}}")
570 return result.String()
571 }
572
573 func replaceImageTag(match string) string {
574 r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`)
575 result := bytes.NewBufferString("{{< figure ")
576 parts := r.FindStringSubmatch(match)
577 // Index 0 is the entire string, ignore
578 replaceOptionalPart(result, "class", parts[1])
579 replaceOptionalPart(result, "src", parts[2])
580 replaceOptionalPart(result, "width", parts[3])
581 replaceOptionalPart(result, "height", parts[4])
582 // title + alt
583 part := parts[5]
584 if len(part) > 0 {
585 splits := strings.Split(part, "'")
586 lenSplits := len(splits)
587 if lenSplits == 1 {
588 replaceOptionalPart(result, "title", splits[0])
589 } else if lenSplits == 3 {
590 replaceOptionalPart(result, "title", splits[1])
591 } else if lenSplits == 5 {
592 replaceOptionalPart(result, "title", splits[1])
593 replaceOptionalPart(result, "alt", splits[3])
594 }
595 }
596 result.WriteString(">}}")
597 return result.String()
598 }
599
600 func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) {
601 if len(part) > 0 {
602 buffer.WriteString(partName + "=\"" + part + "\" ")
603 }
604 }