decoder.go (7849B)
1 // Copyright 2018 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package metadecoders 15 16 import ( 17 "bytes" 18 "encoding/csv" 19 "encoding/json" 20 "fmt" 21 "regexp" 22 "strings" 23 24 "github.com/gohugoio/hugo/common/herrors" 25 "github.com/niklasfasching/go-org/org" 26 27 xml "github.com/clbanning/mxj/v2" 28 toml "github.com/pelletier/go-toml/v2" 29 "github.com/spf13/afero" 30 "github.com/spf13/cast" 31 jww "github.com/spf13/jwalterweatherman" 32 yaml "gopkg.in/yaml.v2" 33 ) 34 35 // Decoder provides some configuration options for the decoders. 36 type Decoder struct { 37 // Delimiter is the field delimiter used in the CSV decoder. It defaults to ','. 38 Delimiter rune 39 40 // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the 41 // Comment character without preceding whitespace are ignored. 42 Comment rune 43 } 44 45 // OptionsKey is used in cache keys. 46 func (d Decoder) OptionsKey() string { 47 var sb strings.Builder 48 sb.WriteRune(d.Delimiter) 49 sb.WriteRune(d.Comment) 50 return sb.String() 51 } 52 53 // Default is a Decoder in its default configuration. 54 var Default = Decoder{ 55 Delimiter: ',', 56 } 57 58 // UnmarshalToMap will unmarshall data in format f into a new map. This is 59 // what's needed for Hugo's front matter decoding. 60 func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) { 61 m := make(map[string]any) 62 if data == nil { 63 return m, nil 64 } 65 66 err := d.UnmarshalTo(data, f, &m) 67 68 return m, err 69 } 70 71 // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from 72 // the given filename. 73 func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) { 74 format := FormatFromString(filename) 75 if format == "" { 76 return nil, fmt.Errorf("%q is not a valid configuration format", filename) 77 } 78 79 data, err := afero.ReadFile(fs, filename) 80 if err != nil { 81 return nil, err 82 } 83 return d.UnmarshalToMap(data, format) 84 } 85 86 // UnmarshalStringTo tries to unmarshal data to a new instance of type typ. 87 func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) { 88 data = strings.TrimSpace(data) 89 // We only check for the possible types in YAML, JSON and TOML. 90 switch typ.(type) { 91 case string: 92 return data, nil 93 case map[string]any: 94 format := d.FormatFromContentString(data) 95 return d.UnmarshalToMap([]byte(data), format) 96 case []any: 97 // A standalone slice. Let YAML handle it. 98 return d.Unmarshal([]byte(data), YAML) 99 case bool: 100 return cast.ToBoolE(data) 101 case int: 102 return cast.ToIntE(data) 103 case int64: 104 return cast.ToInt64E(data) 105 case float64: 106 return cast.ToFloat64E(data) 107 default: 108 return nil, fmt.Errorf("unmarshal: %T not supported", typ) 109 } 110 } 111 112 // Unmarshal will unmarshall data in format f into an interface{}. 113 // This is what's needed for Hugo's /data handling. 114 func (d Decoder) Unmarshal(data []byte, f Format) (any, error) { 115 if data == nil { 116 switch f { 117 case CSV: 118 return make([][]string, 0), nil 119 default: 120 return make(map[string]any), nil 121 } 122 } 123 var v any 124 err := d.UnmarshalTo(data, f, &v) 125 126 return v, err 127 } 128 129 // UnmarshalTo unmarshals data in format f into v. 130 func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error { 131 var err error 132 133 switch f { 134 case ORG: 135 err = d.unmarshalORG(data, v) 136 case JSON: 137 err = json.Unmarshal(data, v) 138 case XML: 139 var xmlRoot xml.Map 140 xmlRoot, err = xml.NewMapXml(data) 141 142 var xmlValue map[string]any 143 if err == nil { 144 xmlRootName, err := xmlRoot.Root() 145 if err != nil { 146 return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err)) 147 } 148 xmlValue = xmlRoot[xmlRootName].(map[string]any) 149 } 150 151 switch v := v.(type) { 152 case *map[string]any: 153 *v = xmlValue 154 case *any: 155 *v = xmlValue 156 } 157 case TOML: 158 err = toml.Unmarshal(data, v) 159 case YAML: 160 err = yaml.Unmarshal(data, v) 161 if err != nil { 162 return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err)) 163 } 164 165 // To support boolean keys, the YAML package unmarshals maps to 166 // map[interface{}]interface{}. Here we recurse through the result 167 // and change all maps to map[string]interface{} like we would've 168 // gotten from `json`. 169 var ptr any 170 switch v.(type) { 171 case *map[string]any: 172 ptr = *v.(*map[string]any) 173 case *any: 174 ptr = *v.(*any) 175 default: 176 // Not a map. 177 } 178 179 if ptr != nil { 180 if mm, changed := stringifyMapKeys(ptr); changed { 181 switch v.(type) { 182 case *map[string]any: 183 *v.(*map[string]any) = mm.(map[string]any) 184 case *any: 185 *v.(*any) = mm 186 } 187 } 188 } 189 case CSV: 190 return d.unmarshalCSV(data, v) 191 192 default: 193 return fmt.Errorf("unmarshal of format %q is not supported", f) 194 } 195 196 if err == nil { 197 return nil 198 } 199 200 return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err)) 201 } 202 203 func (d Decoder) unmarshalCSV(data []byte, v any) error { 204 r := csv.NewReader(bytes.NewReader(data)) 205 r.Comma = d.Delimiter 206 r.Comment = d.Comment 207 208 records, err := r.ReadAll() 209 if err != nil { 210 return err 211 } 212 213 switch v.(type) { 214 case *any: 215 *v.(*any) = records 216 default: 217 return fmt.Errorf("CSV cannot be unmarshaled into %T", v) 218 219 } 220 221 return nil 222 } 223 224 func parseORGDate(s string) string { 225 r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`) 226 if m := r.FindStringSubmatch(s); m != nil { 227 return m[1] 228 } 229 return s 230 } 231 232 func (d Decoder) unmarshalORG(data []byte, v any) error { 233 config := org.New() 234 config.Log = jww.WARN 235 document := config.Parse(bytes.NewReader(data), "") 236 if document.Error != nil { 237 return document.Error 238 } 239 frontMatter := make(map[string]any, len(document.BufferSettings)) 240 for k, v := range document.BufferSettings { 241 k = strings.ToLower(k) 242 if strings.HasSuffix(k, "[]") { 243 frontMatter[k[:len(k)-2]] = strings.Fields(v) 244 } else if k == "tags" || k == "categories" || k == "aliases" { 245 jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k) 246 frontMatter[k] = strings.Fields(v) 247 } else if k == "date" { 248 frontMatter[k] = parseORGDate(v) 249 } else { 250 frontMatter[k] = v 251 } 252 } 253 switch v.(type) { 254 case *map[string]any: 255 *v.(*map[string]any) = frontMatter 256 default: 257 *v.(*any) = frontMatter 258 } 259 return nil 260 } 261 262 func toFileError(f Format, data []byte, err error) error { 263 return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil) 264 } 265 266 // stringifyMapKeys recurses into in and changes all instances of 267 // map[interface{}]interface{} to map[string]interface{}. This is useful to 268 // work around the impedance mismatch between JSON and YAML unmarshaling that's 269 // described here: https://github.com/go-yaml/yaml/issues/139 270 // 271 // Inspired by https://github.com/stripe/stripe-mock, MIT licensed 272 func stringifyMapKeys(in any) (any, bool) { 273 switch in := in.(type) { 274 case []any: 275 for i, v := range in { 276 if vv, replaced := stringifyMapKeys(v); replaced { 277 in[i] = vv 278 } 279 } 280 case map[string]any: 281 for k, v := range in { 282 if vv, changed := stringifyMapKeys(v); changed { 283 in[k] = vv 284 } 285 } 286 case map[any]any: 287 res := make(map[string]any) 288 var ( 289 ok bool 290 err error 291 ) 292 for k, v := range in { 293 var ks string 294 295 if ks, ok = k.(string); !ok { 296 ks, err = cast.ToStringE(k) 297 if err != nil { 298 ks = fmt.Sprintf("%v", k) 299 } 300 } 301 if vv, replaced := stringifyMapKeys(v); replaced { 302 res[ks] = vv 303 } else { 304 res[ks] = v 305 } 306 } 307 return res, true 308 } 309 310 return nil, false 311 }