import_jekyll.go (15674B)
1 // Copyright 2019 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package commands 15 16 import ( 17 "bytes" 18 "errors" 19 "fmt" 20 "io/ioutil" 21 "os" 22 "path/filepath" 23 "regexp" 24 "strconv" 25 "strings" 26 "time" 27 "unicode" 28 29 "github.com/gohugoio/hugo/parser/pageparser" 30 31 "github.com/gohugoio/hugo/common/htime" 32 "github.com/gohugoio/hugo/common/hugio" 33 34 "github.com/gohugoio/hugo/parser/metadecoders" 35 36 "github.com/gohugoio/hugo/common/maps" 37 "github.com/gohugoio/hugo/helpers" 38 "github.com/gohugoio/hugo/hugofs" 39 "github.com/gohugoio/hugo/parser" 40 "github.com/spf13/afero" 41 "github.com/spf13/cobra" 42 jww "github.com/spf13/jwalterweatherman" 43 ) 44 45 var _ cmder = (*importCmd)(nil) 46 47 type importCmd struct { 48 *baseCmd 49 } 50 51 func newImportCmd() *importCmd { 52 cc := &importCmd{} 53 54 cc.baseCmd = newBaseCmd(&cobra.Command{ 55 Use: "import", 56 Short: "Import your site from others.", 57 Long: `Import your site from other web site generators like Jekyll. 58 59 Import requires a subcommand, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.", 60 RunE: nil, 61 }) 62 63 importJekyllCmd := &cobra.Command{ 64 Use: "jekyll", 65 Short: "hugo import from Jekyll", 66 Long: `hugo import from Jekyll. 67 68 Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.", 69 RunE: cc.importFromJekyll, 70 } 71 72 importJekyllCmd.Flags().Bool("force", false, "allow import into non-empty target directory") 73 74 cc.cmd.AddCommand(importJekyllCmd) 75 76 return cc 77 } 78 79 func (i *importCmd) importFromJekyll(cmd *cobra.Command, args []string) error { 80 if len(args) < 2 { 81 return newUserError(`import from jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.") 82 } 83 84 jekyllRoot, err := filepath.Abs(filepath.Clean(args[0])) 85 if err != nil { 86 return newUserError("path error:", args[0]) 87 } 88 89 targetDir, err := filepath.Abs(filepath.Clean(args[1])) 90 if err != nil { 91 return newUserError("path error:", args[1]) 92 } 93 94 jww.INFO.Println("Import Jekyll from:", jekyllRoot, "to:", targetDir) 95 96 if strings.HasPrefix(filepath.Dir(targetDir), jekyllRoot) { 97 return newUserError("abort: target path should not be inside the Jekyll root") 98 } 99 100 forceImport, _ := cmd.Flags().GetBool("force") 101 102 fs := afero.NewOsFs() 103 jekyllPostDirs, hasAnyPost := i.getJekyllDirInfo(fs, jekyllRoot) 104 if !hasAnyPost { 105 return errors.New("abort: jekyll root contains neither posts nor drafts") 106 } 107 108 err = i.createSiteFromJekyll(jekyllRoot, targetDir, jekyllPostDirs, forceImport) 109 110 if err != nil { 111 return newUserError(err) 112 } 113 114 jww.FEEDBACK.Println("Importing...") 115 116 fileCount := 0 117 callback := func(path string, fi hugofs.FileMetaInfo, err error) error { 118 if err != nil { 119 return err 120 } 121 122 if fi.IsDir() { 123 return nil 124 } 125 126 relPath, err := filepath.Rel(jekyllRoot, path) 127 if err != nil { 128 return newUserError("get rel path error:", path) 129 } 130 131 relPath = filepath.ToSlash(relPath) 132 draft := false 133 134 switch { 135 case strings.Contains(relPath, "_posts/"): 136 relPath = filepath.Join("content/post", strings.Replace(relPath, "_posts/", "", -1)) 137 case strings.Contains(relPath, "_drafts/"): 138 relPath = filepath.Join("content/draft", strings.Replace(relPath, "_drafts/", "", -1)) 139 draft = true 140 default: 141 return nil 142 } 143 144 fileCount++ 145 return convertJekyllPost(path, relPath, targetDir, draft) 146 } 147 148 for jekyllPostDir, hasAnyPostInDir := range jekyllPostDirs { 149 if hasAnyPostInDir { 150 if err = helpers.SymbolicWalk(hugofs.Os, filepath.Join(jekyllRoot, jekyllPostDir), callback); err != nil { 151 return err 152 } 153 } 154 } 155 156 jww.FEEDBACK.Println("Congratulations!", fileCount, "post(s) imported!") 157 jww.FEEDBACK.Println("Now, start Hugo by yourself:\n" + 158 "$ git clone https://github.com/spf13/herring-cove.git " + args[1] + "/themes/herring-cove") 159 jww.FEEDBACK.Println("$ cd " + args[1] + "\n$ hugo server --theme=herring-cove") 160 161 return nil 162 } 163 164 func (i *importCmd) getJekyllDirInfo(fs afero.Fs, jekyllRoot string) (map[string]bool, bool) { 165 postDirs := make(map[string]bool) 166 hasAnyPost := false 167 if entries, err := ioutil.ReadDir(jekyllRoot); err == nil { 168 for _, entry := range entries { 169 if entry.IsDir() { 170 subDir := filepath.Join(jekyllRoot, entry.Name()) 171 if isPostDir, hasAnyPostInDir := i.retrieveJekyllPostDir(fs, subDir); isPostDir { 172 postDirs[entry.Name()] = hasAnyPostInDir 173 if hasAnyPostInDir { 174 hasAnyPost = true 175 } 176 } 177 } 178 } 179 } 180 return postDirs, hasAnyPost 181 } 182 183 func (i *importCmd) retrieveJekyllPostDir(fs afero.Fs, dir string) (bool, bool) { 184 if strings.HasSuffix(dir, "_posts") || strings.HasSuffix(dir, "_drafts") { 185 isEmpty, _ := helpers.IsEmpty(dir, fs) 186 return true, !isEmpty 187 } 188 189 if entries, err := ioutil.ReadDir(dir); err == nil { 190 for _, entry := range entries { 191 if entry.IsDir() { 192 subDir := filepath.Join(dir, entry.Name()) 193 if isPostDir, hasAnyPost := i.retrieveJekyllPostDir(fs, subDir); isPostDir { 194 return isPostDir, hasAnyPost 195 } 196 } 197 } 198 } 199 200 return false, true 201 } 202 203 func (i *importCmd) createSiteFromJekyll(jekyllRoot, targetDir string, jekyllPostDirs map[string]bool, force bool) error { 204 fs := &afero.OsFs{} 205 if exists, _ := helpers.Exists(targetDir, fs); exists { 206 if isDir, _ := helpers.IsDir(targetDir, fs); !isDir { 207 return errors.New("target path \"" + targetDir + "\" exists but is not a directory") 208 } 209 210 isEmpty, _ := helpers.IsEmpty(targetDir, fs) 211 212 if !isEmpty && !force { 213 return errors.New("target path \"" + targetDir + "\" exists and is not empty") 214 } 215 } 216 217 jekyllConfig := i.loadJekyllConfig(fs, jekyllRoot) 218 219 mkdir(targetDir, "layouts") 220 mkdir(targetDir, "content") 221 mkdir(targetDir, "archetypes") 222 mkdir(targetDir, "static") 223 mkdir(targetDir, "data") 224 mkdir(targetDir, "themes") 225 226 i.createConfigFromJekyll(fs, targetDir, "yaml", jekyllConfig) 227 228 i.copyJekyllFilesAndFolders(jekyllRoot, filepath.Join(targetDir, "static"), jekyllPostDirs) 229 230 return nil 231 } 232 233 func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]any { 234 path := filepath.Join(jekyllRoot, "_config.yml") 235 236 exists, err := helpers.Exists(path, fs) 237 238 if err != nil || !exists { 239 jww.WARN.Println("_config.yaml not found: Is the specified Jekyll root correct?") 240 return nil 241 } 242 243 f, err := fs.Open(path) 244 if err != nil { 245 return nil 246 } 247 248 defer f.Close() 249 250 b, err := ioutil.ReadAll(f) 251 if err != nil { 252 return nil 253 } 254 255 c, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.YAML) 256 if err != nil { 257 return nil 258 } 259 260 return c 261 } 262 263 func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind metadecoders.Format, jekyllConfig map[string]any) (err error) { 264 title := "My New Hugo Site" 265 baseURL := "http://example.org/" 266 267 for key, value := range jekyllConfig { 268 lowerKey := strings.ToLower(key) 269 270 switch lowerKey { 271 case "title": 272 if str, ok := value.(string); ok { 273 title = str 274 } 275 276 case "url": 277 if str, ok := value.(string); ok { 278 baseURL = str 279 } 280 } 281 } 282 283 in := map[string]any{ 284 "baseURL": baseURL, 285 "title": title, 286 "languageCode": "en-us", 287 "disablePathToLower": true, 288 } 289 290 var buf bytes.Buffer 291 err = parser.InterfaceToConfig(in, kind, &buf) 292 if err != nil { 293 return err 294 } 295 296 return helpers.WriteToDisk(filepath.Join(inpath, "config."+string(kind)), &buf, fs) 297 } 298 299 func (i *importCmd) copyJekyllFilesAndFolders(jekyllRoot, dest string, jekyllPostDirs map[string]bool) (err error) { 300 fs := hugofs.Os 301 302 fi, err := fs.Stat(jekyllRoot) 303 if err != nil { 304 return err 305 } 306 if !fi.IsDir() { 307 return errors.New(jekyllRoot + " is not a directory") 308 } 309 err = os.MkdirAll(dest, fi.Mode()) 310 if err != nil { 311 return err 312 } 313 entries, err := ioutil.ReadDir(jekyllRoot) 314 if err != nil { 315 return err 316 } 317 318 for _, entry := range entries { 319 sfp := filepath.Join(jekyllRoot, entry.Name()) 320 dfp := filepath.Join(dest, entry.Name()) 321 if entry.IsDir() { 322 if entry.Name()[0] != '_' && entry.Name()[0] != '.' { 323 if _, ok := jekyllPostDirs[entry.Name()]; !ok { 324 err = hugio.CopyDir(fs, sfp, dfp, nil) 325 if err != nil { 326 jww.ERROR.Println(err) 327 } 328 } 329 } 330 } else { 331 lowerEntryName := strings.ToLower(entry.Name()) 332 exceptSuffix := []string{ 333 ".md", ".markdown", ".html", ".htm", 334 ".xml", ".textile", "rakefile", "gemfile", ".lock", 335 } 336 isExcept := false 337 for _, suffix := range exceptSuffix { 338 if strings.HasSuffix(lowerEntryName, suffix) { 339 isExcept = true 340 break 341 } 342 } 343 344 if !isExcept && entry.Name()[0] != '.' && entry.Name()[0] != '_' { 345 err = hugio.CopyFile(fs, sfp, dfp) 346 if err != nil { 347 jww.ERROR.Println(err) 348 } 349 } 350 } 351 352 } 353 return nil 354 } 355 356 func parseJekyllFilename(filename string) (time.Time, string, error) { 357 re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`) 358 r := re.FindAllStringSubmatch(filename, -1) 359 if len(r) == 0 { 360 return htime.Now(), "", errors.New("filename not match") 361 } 362 363 postDate, err := time.Parse("2006-1-2", r[0][1]) 364 if err != nil { 365 return htime.Now(), "", err 366 } 367 368 postName := r[0][2] 369 370 return postDate, postName, nil 371 } 372 373 func convertJekyllPost(path, relPath, targetDir string, draft bool) error { 374 jww.TRACE.Println("Converting", path) 375 376 filename := filepath.Base(path) 377 postDate, postName, err := parseJekyllFilename(filename) 378 if err != nil { 379 jww.WARN.Printf("Failed to parse filename '%s': %s. Skipping.", filename, err) 380 return nil 381 } 382 383 jww.TRACE.Println(filename, postDate, postName) 384 385 targetFile := filepath.Join(targetDir, relPath) 386 targetParentDir := filepath.Dir(targetFile) 387 os.MkdirAll(targetParentDir, 0777) 388 389 contentBytes, err := ioutil.ReadFile(path) 390 if err != nil { 391 jww.ERROR.Println("Read file error:", path) 392 return err 393 } 394 395 pf, err := pageparser.ParseFrontMatterAndContent(bytes.NewReader(contentBytes)) 396 if err != nil { 397 jww.ERROR.Println("Parse file error:", path) 398 return err 399 } 400 401 newmetadata, err := convertJekyllMetaData(pf.FrontMatter, postName, postDate, draft) 402 if err != nil { 403 jww.ERROR.Println("Convert metadata error:", path) 404 return err 405 } 406 407 content, err := convertJekyllContent(newmetadata, string(pf.Content)) 408 if err != nil { 409 jww.ERROR.Println("Converting Jekyll error:", path) 410 return err 411 } 412 413 fs := hugofs.Os 414 if err := helpers.WriteToDisk(targetFile, strings.NewReader(content), fs); err != nil { 415 return fmt.Errorf("failed to save file %q: %s", filename, err) 416 } 417 418 return nil 419 } 420 421 func convertJekyllMetaData(m any, postName string, postDate time.Time, draft bool) (any, error) { 422 metadata, err := maps.ToStringMapE(m) 423 if err != nil { 424 return nil, err 425 } 426 427 if draft { 428 metadata["draft"] = true 429 } 430 431 for key, value := range metadata { 432 lowerKey := strings.ToLower(key) 433 434 switch lowerKey { 435 case "layout": 436 delete(metadata, key) 437 case "permalink": 438 if str, ok := value.(string); ok { 439 metadata["url"] = str 440 } 441 delete(metadata, key) 442 case "category": 443 if str, ok := value.(string); ok { 444 metadata["categories"] = []string{str} 445 } 446 delete(metadata, key) 447 case "excerpt_separator": 448 if key != lowerKey { 449 delete(metadata, key) 450 metadata[lowerKey] = value 451 } 452 case "date": 453 if str, ok := value.(string); ok { 454 re := regexp.MustCompile(`(\d+):(\d+):(\d+)`) 455 r := re.FindAllStringSubmatch(str, -1) 456 if len(r) > 0 { 457 hour, _ := strconv.Atoi(r[0][1]) 458 minute, _ := strconv.Atoi(r[0][2]) 459 second, _ := strconv.Atoi(r[0][3]) 460 postDate = time.Date(postDate.Year(), postDate.Month(), postDate.Day(), hour, minute, second, 0, time.UTC) 461 } 462 } 463 delete(metadata, key) 464 } 465 466 } 467 468 metadata["date"] = postDate.Format(time.RFC3339) 469 470 return metadata, nil 471 } 472 473 func convertJekyllContent(m any, content string) (string, error) { 474 metadata, _ := maps.ToStringMapE(m) 475 476 lines := strings.Split(content, "\n") 477 var resultLines []string 478 for _, line := range lines { 479 resultLines = append(resultLines, strings.Trim(line, "\r\n")) 480 } 481 482 content = strings.Join(resultLines, "\n") 483 484 excerptSep := "<!--more-->" 485 if value, ok := metadata["excerpt_separator"]; ok { 486 if str, strOk := value.(string); strOk { 487 content = strings.Replace(content, strings.TrimSpace(str), excerptSep, -1) 488 } 489 } 490 491 replaceList := []struct { 492 re *regexp.Regexp 493 replace string 494 }{ 495 {regexp.MustCompile("(?i)<!-- more -->"), "<!--more-->"}, 496 {regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"}, 497 {regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"}, 498 } 499 500 for _, replace := range replaceList { 501 content = replace.re.ReplaceAllString(content, replace.replace) 502 } 503 504 replaceListFunc := []struct { 505 re *regexp.Regexp 506 replace func(string) string 507 }{ 508 // Octopress image tag: http://octopress.org/docs/plugins/image-tag/ 509 {regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag}, 510 {regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), replaceHighlightTag}, 511 } 512 513 for _, replace := range replaceListFunc { 514 content = replace.re.ReplaceAllStringFunc(content, replace.replace) 515 } 516 517 var buf bytes.Buffer 518 if len(metadata) != 0 { 519 err := parser.InterfaceToFrontMatter(m, metadecoders.YAML, &buf) 520 if err != nil { 521 return "", err 522 } 523 } 524 buf.WriteString(content) 525 526 return buf.String(), nil 527 } 528 529 func replaceHighlightTag(match string) string { 530 r := regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`) 531 parts := r.FindStringSubmatch(match) 532 lastQuote := rune(0) 533 f := func(c rune) bool { 534 switch { 535 case c == lastQuote: 536 lastQuote = rune(0) 537 return false 538 case lastQuote != rune(0): 539 return false 540 case unicode.In(c, unicode.Quotation_Mark): 541 lastQuote = c 542 return false 543 default: 544 return unicode.IsSpace(c) 545 } 546 } 547 // splitting string by space but considering quoted section 548 items := strings.FieldsFunc(parts[1], f) 549 550 result := bytes.NewBufferString("{{< highlight ") 551 result.WriteString(items[0]) // language 552 options := items[1:] 553 for i, opt := range options { 554 opt = strings.Replace(opt, "\"", "", -1) 555 if opt == "linenos" { 556 opt = "linenos=table" 557 } 558 if i == 0 { 559 opt = " \"" + opt 560 } 561 if i < len(options)-1 { 562 opt += "," 563 } else if i == len(options)-1 { 564 opt += "\"" 565 } 566 result.WriteString(opt) 567 } 568 569 result.WriteString(" >}}") 570 return result.String() 571 } 572 573 func replaceImageTag(match string) string { 574 r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`) 575 result := bytes.NewBufferString("{{< figure ") 576 parts := r.FindStringSubmatch(match) 577 // Index 0 is the entire string, ignore 578 replaceOptionalPart(result, "class", parts[1]) 579 replaceOptionalPart(result, "src", parts[2]) 580 replaceOptionalPart(result, "width", parts[3]) 581 replaceOptionalPart(result, "height", parts[4]) 582 // title + alt 583 part := parts[5] 584 if len(part) > 0 { 585 splits := strings.Split(part, "'") 586 lenSplits := len(splits) 587 if lenSplits == 1 { 588 replaceOptionalPart(result, "title", splits[0]) 589 } else if lenSplits == 3 { 590 replaceOptionalPart(result, "title", splits[1]) 591 } else if lenSplits == 5 { 592 replaceOptionalPart(result, "title", splits[1]) 593 replaceOptionalPart(result, "alt", splits[3]) 594 } 595 } 596 result.WriteString(">}}") 597 return result.String() 598 } 599 600 func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) { 601 if len(part) > 0 { 602 buffer.WriteString(partName + "=\"" + part + "\" ") 603 } 604 }