decoder.go (7849B)
1 // Copyright 2018 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package metadecoders
15
16 import (
17 "bytes"
18 "encoding/csv"
19 "encoding/json"
20 "fmt"
21 "regexp"
22 "strings"
23
24 "github.com/gohugoio/hugo/common/herrors"
25 "github.com/niklasfasching/go-org/org"
26
27 xml "github.com/clbanning/mxj/v2"
28 toml "github.com/pelletier/go-toml/v2"
29 "github.com/spf13/afero"
30 "github.com/spf13/cast"
31 jww "github.com/spf13/jwalterweatherman"
32 yaml "gopkg.in/yaml.v2"
33 )
34
35 // Decoder provides some configuration options for the decoders.
36 type Decoder struct {
37 // Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
38 Delimiter rune
39
40 // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
41 // Comment character without preceding whitespace are ignored.
42 Comment rune
43 }
44
45 // OptionsKey is used in cache keys.
46 func (d Decoder) OptionsKey() string {
47 var sb strings.Builder
48 sb.WriteRune(d.Delimiter)
49 sb.WriteRune(d.Comment)
50 return sb.String()
51 }
52
53 // Default is a Decoder in its default configuration.
54 var Default = Decoder{
55 Delimiter: ',',
56 }
57
58 // UnmarshalToMap will unmarshall data in format f into a new map. This is
59 // what's needed for Hugo's front matter decoding.
60 func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
61 m := make(map[string]any)
62 if data == nil {
63 return m, nil
64 }
65
66 err := d.UnmarshalTo(data, f, &m)
67
68 return m, err
69 }
70
71 // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
72 // the given filename.
73 func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
74 format := FormatFromString(filename)
75 if format == "" {
76 return nil, fmt.Errorf("%q is not a valid configuration format", filename)
77 }
78
79 data, err := afero.ReadFile(fs, filename)
80 if err != nil {
81 return nil, err
82 }
83 return d.UnmarshalToMap(data, format)
84 }
85
86 // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
87 func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
88 data = strings.TrimSpace(data)
89 // We only check for the possible types in YAML, JSON and TOML.
90 switch typ.(type) {
91 case string:
92 return data, nil
93 case map[string]any:
94 format := d.FormatFromContentString(data)
95 return d.UnmarshalToMap([]byte(data), format)
96 case []any:
97 // A standalone slice. Let YAML handle it.
98 return d.Unmarshal([]byte(data), YAML)
99 case bool:
100 return cast.ToBoolE(data)
101 case int:
102 return cast.ToIntE(data)
103 case int64:
104 return cast.ToInt64E(data)
105 case float64:
106 return cast.ToFloat64E(data)
107 default:
108 return nil, fmt.Errorf("unmarshal: %T not supported", typ)
109 }
110 }
111
112 // Unmarshal will unmarshall data in format f into an interface{}.
113 // This is what's needed for Hugo's /data handling.
114 func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
115 if data == nil {
116 switch f {
117 case CSV:
118 return make([][]string, 0), nil
119 default:
120 return make(map[string]any), nil
121 }
122 }
123 var v any
124 err := d.UnmarshalTo(data, f, &v)
125
126 return v, err
127 }
128
129 // UnmarshalTo unmarshals data in format f into v.
130 func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
131 var err error
132
133 switch f {
134 case ORG:
135 err = d.unmarshalORG(data, v)
136 case JSON:
137 err = json.Unmarshal(data, v)
138 case XML:
139 var xmlRoot xml.Map
140 xmlRoot, err = xml.NewMapXml(data)
141
142 var xmlValue map[string]any
143 if err == nil {
144 xmlRootName, err := xmlRoot.Root()
145 if err != nil {
146 return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
147 }
148 xmlValue = xmlRoot[xmlRootName].(map[string]any)
149 }
150
151 switch v := v.(type) {
152 case *map[string]any:
153 *v = xmlValue
154 case *any:
155 *v = xmlValue
156 }
157 case TOML:
158 err = toml.Unmarshal(data, v)
159 case YAML:
160 err = yaml.Unmarshal(data, v)
161 if err != nil {
162 return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
163 }
164
165 // To support boolean keys, the YAML package unmarshals maps to
166 // map[interface{}]interface{}. Here we recurse through the result
167 // and change all maps to map[string]interface{} like we would've
168 // gotten from `json`.
169 var ptr any
170 switch v.(type) {
171 case *map[string]any:
172 ptr = *v.(*map[string]any)
173 case *any:
174 ptr = *v.(*any)
175 default:
176 // Not a map.
177 }
178
179 if ptr != nil {
180 if mm, changed := stringifyMapKeys(ptr); changed {
181 switch v.(type) {
182 case *map[string]any:
183 *v.(*map[string]any) = mm.(map[string]any)
184 case *any:
185 *v.(*any) = mm
186 }
187 }
188 }
189 case CSV:
190 return d.unmarshalCSV(data, v)
191
192 default:
193 return fmt.Errorf("unmarshal of format %q is not supported", f)
194 }
195
196 if err == nil {
197 return nil
198 }
199
200 return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
201 }
202
203 func (d Decoder) unmarshalCSV(data []byte, v any) error {
204 r := csv.NewReader(bytes.NewReader(data))
205 r.Comma = d.Delimiter
206 r.Comment = d.Comment
207
208 records, err := r.ReadAll()
209 if err != nil {
210 return err
211 }
212
213 switch v.(type) {
214 case *any:
215 *v.(*any) = records
216 default:
217 return fmt.Errorf("CSV cannot be unmarshaled into %T", v)
218
219 }
220
221 return nil
222 }
223
224 func parseORGDate(s string) string {
225 r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
226 if m := r.FindStringSubmatch(s); m != nil {
227 return m[1]
228 }
229 return s
230 }
231
232 func (d Decoder) unmarshalORG(data []byte, v any) error {
233 config := org.New()
234 config.Log = jww.WARN
235 document := config.Parse(bytes.NewReader(data), "")
236 if document.Error != nil {
237 return document.Error
238 }
239 frontMatter := make(map[string]any, len(document.BufferSettings))
240 for k, v := range document.BufferSettings {
241 k = strings.ToLower(k)
242 if strings.HasSuffix(k, "[]") {
243 frontMatter[k[:len(k)-2]] = strings.Fields(v)
244 } else if k == "tags" || k == "categories" || k == "aliases" {
245 jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k)
246 frontMatter[k] = strings.Fields(v)
247 } else if k == "date" {
248 frontMatter[k] = parseORGDate(v)
249 } else {
250 frontMatter[k] = v
251 }
252 }
253 switch v.(type) {
254 case *map[string]any:
255 *v.(*map[string]any) = frontMatter
256 default:
257 *v.(*any) = frontMatter
258 }
259 return nil
260 }
261
262 func toFileError(f Format, data []byte, err error) error {
263 return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
264 }
265
266 // stringifyMapKeys recurses into in and changes all instances of
267 // map[interface{}]interface{} to map[string]interface{}. This is useful to
268 // work around the impedance mismatch between JSON and YAML unmarshaling that's
269 // described here: https://github.com/go-yaml/yaml/issues/139
270 //
271 // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
272 func stringifyMapKeys(in any) (any, bool) {
273 switch in := in.(type) {
274 case []any:
275 for i, v := range in {
276 if vv, replaced := stringifyMapKeys(v); replaced {
277 in[i] = vv
278 }
279 }
280 case map[string]any:
281 for k, v := range in {
282 if vv, changed := stringifyMapKeys(v); changed {
283 in[k] = vv
284 }
285 }
286 case map[any]any:
287 res := make(map[string]any)
288 var (
289 ok bool
290 err error
291 )
292 for k, v := range in {
293 var ks string
294
295 if ks, ok = k.(string); !ok {
296 ks, err = cast.ToStringE(k)
297 if err != nil {
298 ks = fmt.Sprintf("%v", k)
299 }
300 }
301 if vv, replaced := stringifyMapKeys(v); replaced {
302 res[ks] = vv
303 } else {
304 res[ks] = v
305 }
306 }
307 return res, true
308 }
309
310 return nil, false
311 }