mediaType.go (14772B)
1 // Copyright 2019 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package media
15
16 import (
17 "encoding/json"
18 "errors"
19 "fmt"
20 "net/http"
21 "sort"
22 "strings"
23
24 "github.com/spf13/cast"
25
26 "github.com/gohugoio/hugo/common/maps"
27
28 "github.com/mitchellh/mapstructure"
29 )
30
31 var zero Type
32
33 const (
34 defaultDelimiter = "."
35 )
36
37 // Type (also known as MIME type and content type) is a two-part identifier for
38 // file formats and format contents transmitted on the Internet.
39 // For Hugo's use case, we use the top-level type name / subtype name + suffix.
40 // One example would be application/svg+xml
41 // If suffix is not provided, the sub type will be used.
42 // See // https://en.wikipedia.org/wiki/Media_type
43 type Type struct {
44 MainType string `json:"mainType"` // i.e. text
45 SubType string `json:"subType"` // i.e. html
46 Delimiter string `json:"delimiter"` // e.g. "."
47
48 // FirstSuffix holds the first suffix defined for this Type.
49 FirstSuffix SuffixInfo `json:"firstSuffix"`
50
51 // This is the optional suffix after the "+" in the MIME type,
52 // e.g. "xml" in "application/rss+xml".
53 mimeSuffix string
54
55 // E.g. "jpg,jpeg"
56 // Stored as a string to make Type comparable.
57 suffixesCSV string
58 }
59
60 // SuffixInfo holds information about a Type's suffix.
61 type SuffixInfo struct {
62 Suffix string `json:"suffix"`
63 FullSuffix string `json:"fullSuffix"`
64 }
65
66 // FromContent resolve the Type primarily using http.DetectContentType.
67 // If http.DetectContentType resolves to application/octet-stream, a zero Type is returned.
68 // If http.DetectContentType resolves to text/plain or application/xml, we try to get more specific using types and ext.
69 func FromContent(types Types, extensionHints []string, content []byte) Type {
70 t := strings.Split(http.DetectContentType(content), ";")[0]
71 if t == "application/octet-stream" {
72 return zero
73 }
74
75 var found bool
76 m, found := types.GetByType(t)
77 if !found {
78 if t == "text/xml" {
79 // This is how it's configured in Hugo by default.
80 m, found = types.GetByType("application/xml")
81 }
82 }
83
84 if !found {
85 return zero
86 }
87
88 var mm Type
89
90 for _, extension := range extensionHints {
91 extension = strings.TrimPrefix(extension, ".")
92 mm, _, found = types.GetFirstBySuffix(extension)
93 if found {
94 break
95 }
96 }
97
98 if found {
99 if m == mm {
100 return m
101 }
102
103 if m.IsText() && mm.IsText() {
104 // http.DetectContentType isn't brilliant when it comes to common text formats, so we need to do better.
105 // For now we say that if it's detected to be a text format and the extension/content type in header reports
106 // it to be a text format, then we use that.
107 return mm
108 }
109
110 // E.g. an image with a *.js extension.
111 return zero
112 }
113
114 return m
115 }
116
117 // FromStringAndExt creates a Type from a MIME string and a given extension.
118 func FromStringAndExt(t, ext string) (Type, error) {
119 tp, err := fromString(t)
120 if err != nil {
121 return tp, err
122 }
123 tp.suffixesCSV = strings.TrimPrefix(ext, ".")
124 tp.Delimiter = defaultDelimiter
125 tp.init()
126 return tp, nil
127 }
128
129 // FromString creates a new Type given a type string on the form MainType/SubType and
130 // an optional suffix, e.g. "text/html" or "text/html+html".
131 func fromString(t string) (Type, error) {
132 t = strings.ToLower(t)
133 parts := strings.Split(t, "/")
134 if len(parts) != 2 {
135 return Type{}, fmt.Errorf("cannot parse %q as a media type", t)
136 }
137 mainType := parts[0]
138 subParts := strings.Split(parts[1], "+")
139
140 subType := strings.Split(subParts[0], ";")[0]
141
142 var suffix string
143
144 if len(subParts) > 1 {
145 suffix = subParts[1]
146 }
147
148 return Type{MainType: mainType, SubType: subType, mimeSuffix: suffix}, nil
149 }
150
151 // Type returns a string representing the main- and sub-type of a media type, e.g. "text/css".
152 // A suffix identifier will be appended after a "+" if set, e.g. "image/svg+xml".
153 // Hugo will register a set of default media types.
154 // These can be overridden by the user in the configuration,
155 // by defining a media type with the same Type.
156 func (m Type) Type() string {
157 // Examples are
158 // image/svg+xml
159 // text/css
160 if m.mimeSuffix != "" {
161 return m.MainType + "/" + m.SubType + "+" + m.mimeSuffix
162 }
163 return m.MainType + "/" + m.SubType
164 }
165
166 // For internal use.
167 func (m Type) String() string {
168 return m.Type()
169 }
170
171 // Suffixes returns all valid file suffixes for this type.
172 func (m Type) Suffixes() []string {
173 if m.suffixesCSV == "" {
174 return nil
175 }
176
177 return strings.Split(m.suffixesCSV, ",")
178 }
179
180 // IsText returns whether this Type is a text format.
181 // Note that this may currently return false negatives.
182 // TODO(bep) improve
183 func (m Type) IsText() bool {
184 if m.MainType == "text" {
185 return true
186 }
187 switch m.SubType {
188 case "javascript", "json", "rss", "xml", "svg", TOMLType.SubType, YAMLType.SubType:
189 return true
190 }
191 return false
192 }
193
194 func (m *Type) init() {
195 m.FirstSuffix.FullSuffix = ""
196 m.FirstSuffix.Suffix = ""
197 if suffixes := m.Suffixes(); suffixes != nil {
198 m.FirstSuffix.Suffix = suffixes[0]
199 m.FirstSuffix.FullSuffix = m.Delimiter + m.FirstSuffix.Suffix
200 }
201 }
202
203 // WithDelimiterAndSuffixes is used in tests.
204 func WithDelimiterAndSuffixes(t Type, delimiter, suffixesCSV string) Type {
205 t.Delimiter = delimiter
206 t.suffixesCSV = suffixesCSV
207 t.init()
208 return t
209 }
210
211 func newMediaType(main, sub string, suffixes []string) Type {
212 t := Type{MainType: main, SubType: sub, suffixesCSV: strings.Join(suffixes, ","), Delimiter: defaultDelimiter}
213 t.init()
214 return t
215 }
216
217 func newMediaTypeWithMimeSuffix(main, sub, mimeSuffix string, suffixes []string) Type {
218 mt := newMediaType(main, sub, suffixes)
219 mt.mimeSuffix = mimeSuffix
220 mt.init()
221 return mt
222 }
223
224 // Definitions from https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types etc.
225 // Note that from Hugo 0.44 we only set Suffix if it is part of the MIME type.
226 var (
227 CalendarType = newMediaType("text", "calendar", []string{"ics"})
228 CSSType = newMediaType("text", "css", []string{"css"})
229 SCSSType = newMediaType("text", "x-scss", []string{"scss"})
230 SASSType = newMediaType("text", "x-sass", []string{"sass"})
231 CSVType = newMediaType("text", "csv", []string{"csv"})
232 HTMLType = newMediaType("text", "html", []string{"html"})
233 JavascriptType = newMediaType("application", "javascript", []string{"js", "jsm", "mjs"})
234 TypeScriptType = newMediaType("application", "typescript", []string{"ts"})
235 TSXType = newMediaType("text", "tsx", []string{"tsx"})
236 JSXType = newMediaType("text", "jsx", []string{"jsx"})
237
238 JSONType = newMediaType("application", "json", []string{"json"})
239 WebAppManifestType = newMediaTypeWithMimeSuffix("application", "manifest", "json", []string{"webmanifest"})
240 RSSType = newMediaTypeWithMimeSuffix("application", "rss", "xml", []string{"xml", "rss"})
241 XMLType = newMediaType("application", "xml", []string{"xml"})
242 SVGType = newMediaTypeWithMimeSuffix("image", "svg", "xml", []string{"svg"})
243 TextType = newMediaType("text", "plain", []string{"txt"})
244 TOMLType = newMediaType("application", "toml", []string{"toml"})
245 YAMLType = newMediaType("application", "yaml", []string{"yaml", "yml"})
246
247 // Common image types
248 PNGType = newMediaType("image", "png", []string{"png"})
249 JPEGType = newMediaType("image", "jpeg", []string{"jpg", "jpeg", "jpe", "jif", "jfif"})
250 GIFType = newMediaType("image", "gif", []string{"gif"})
251 TIFFType = newMediaType("image", "tiff", []string{"tif", "tiff"})
252 BMPType = newMediaType("image", "bmp", []string{"bmp"})
253 WEBPType = newMediaType("image", "webp", []string{"webp"})
254
255 // Common font types
256 TrueTypeFontType = newMediaType("font", "ttf", []string{"ttf"})
257 OpenTypeFontType = newMediaType("font", "otf", []string{"otf"})
258
259 // Common document types
260 PDFType = newMediaType("application", "pdf", []string{"pdf"})
261 MarkdownType = newMediaType("text", "markdown", []string{"md", "markdown"})
262
263 // Common video types
264 AVIType = newMediaType("video", "x-msvideo", []string{"avi"})
265 MPEGType = newMediaType("video", "mpeg", []string{"mpg", "mpeg"})
266 MP4Type = newMediaType("video", "mp4", []string{"mp4"})
267 OGGType = newMediaType("video", "ogg", []string{"ogv"})
268 WEBMType = newMediaType("video", "webm", []string{"webm"})
269 GPPType = newMediaType("video", "3gpp", []string{"3gpp", "3gp"})
270
271 OctetType = newMediaType("application", "octet-stream", nil)
272 )
273
274 // DefaultTypes is the default media types supported by Hugo.
275 var DefaultTypes = Types{
276 CalendarType,
277 CSSType,
278 CSVType,
279 SCSSType,
280 SASSType,
281 HTMLType,
282 MarkdownType,
283 JavascriptType,
284 TypeScriptType,
285 TSXType,
286 JSXType,
287 JSONType,
288 WebAppManifestType,
289 RSSType,
290 XMLType,
291 SVGType,
292 TextType,
293 OctetType,
294 YAMLType,
295 TOMLType,
296 PNGType,
297 GIFType,
298 BMPType,
299 JPEGType,
300 WEBPType,
301 AVIType,
302 MPEGType,
303 MP4Type,
304 OGGType,
305 WEBMType,
306 GPPType,
307 OpenTypeFontType,
308 TrueTypeFontType,
309 PDFType,
310 }
311
312 func init() {
313 sort.Sort(DefaultTypes)
314
315 // Sanity check.
316 seen := make(map[Type]bool)
317 for _, t := range DefaultTypes {
318 if seen[t] {
319 panic(fmt.Sprintf("MediaType %s duplicated in list", t))
320 }
321 seen[t] = true
322 }
323 }
324
325 // Types is a slice of media types.
326 type Types []Type
327
328 func (t Types) Len() int { return len(t) }
329 func (t Types) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
330 func (t Types) Less(i, j int) bool { return t[i].Type() < t[j].Type() }
331
332 // GetByType returns a media type for tp.
333 func (t Types) GetByType(tp string) (Type, bool) {
334 for _, tt := range t {
335 if strings.EqualFold(tt.Type(), tp) {
336 return tt, true
337 }
338 }
339
340 if !strings.Contains(tp, "+") {
341 // Try with the main and sub type
342 parts := strings.Split(tp, "/")
343 if len(parts) == 2 {
344 return t.GetByMainSubType(parts[0], parts[1])
345 }
346 }
347
348 return Type{}, false
349 }
350
351 // BySuffix will return all media types matching a suffix.
352 func (t Types) BySuffix(suffix string) []Type {
353 suffix = strings.ToLower(suffix)
354 var types []Type
355 for _, tt := range t {
356 if tt.hasSuffix(suffix) {
357 types = append(types, tt)
358 }
359 }
360 return types
361 }
362
363 // GetFirstBySuffix will return the first type matching the given suffix.
364 func (t Types) GetFirstBySuffix(suffix string) (Type, SuffixInfo, bool) {
365 suffix = strings.ToLower(suffix)
366 for _, tt := range t {
367 if tt.hasSuffix(suffix) {
368 return tt, SuffixInfo{
369 FullSuffix: tt.Delimiter + suffix,
370 Suffix: suffix,
371 }, true
372 }
373 }
374 return Type{}, SuffixInfo{}, false
375 }
376
377 // GetBySuffix gets a media type given as suffix, e.g. "html".
378 // It will return false if no format could be found, or if the suffix given
379 // is ambiguous.
380 // The lookup is case insensitive.
381 func (t Types) GetBySuffix(suffix string) (tp Type, si SuffixInfo, found bool) {
382 suffix = strings.ToLower(suffix)
383 for _, tt := range t {
384 if tt.hasSuffix(suffix) {
385 if found {
386 // ambiguous
387 found = false
388 return
389 }
390 tp = tt
391 si = SuffixInfo{
392 FullSuffix: tt.Delimiter + suffix,
393 Suffix: suffix,
394 }
395 found = true
396 }
397 }
398 return
399 }
400
401 func (m Type) hasSuffix(suffix string) bool {
402 return strings.Contains(","+m.suffixesCSV+",", ","+suffix+",")
403 }
404
405 // GetByMainSubType gets a media type given a main and a sub type e.g. "text" and "plain".
406 // It will return false if no format could be found, or if the combination given
407 // is ambiguous.
408 // The lookup is case insensitive.
409 func (t Types) GetByMainSubType(mainType, subType string) (tp Type, found bool) {
410 for _, tt := range t {
411 if strings.EqualFold(mainType, tt.MainType) && strings.EqualFold(subType, tt.SubType) {
412 if found {
413 // ambiguous
414 found = false
415 return
416 }
417
418 tp = tt
419 found = true
420 }
421 }
422 return
423 }
424
425 func suffixIsRemoved() error {
426 return errors.New(`MediaType.Suffix is removed. Before Hugo 0.44 this was used both to set a custom file suffix and as way
427 to augment the mediatype definition (what you see after the "+", e.g. "image/svg+xml").
428
429 This had its limitations. For one, it was only possible with one file extension per MIME type.
430
431 Now you can specify multiple file suffixes using "suffixes", but you need to specify the full MIME type
432 identifier:
433
434 [mediaTypes]
435 [mediaTypes."image/svg+xml"]
436 suffixes = ["svg", "abc" ]
437
438 In most cases, it will be enough to just change:
439
440 [mediaTypes]
441 [mediaTypes."my/custom-mediatype"]
442 suffix = "txt"
443
444 To:
445
446 [mediaTypes]
447 [mediaTypes."my/custom-mediatype"]
448 suffixes = ["txt"]
449
450 Note that you can still get the Media Type's suffix from a template: {{ $mediaType.Suffix }}. But this will now map to the MIME type filename.
451 `)
452 }
453
454 // DecodeTypes takes a list of media type configurations and merges those,
455 // in the order given, with the Hugo defaults as the last resort.
456 func DecodeTypes(mms ...map[string]any) (Types, error) {
457 var m Types
458
459 // Maps type string to Type. Type string is the full application/svg+xml.
460 mmm := make(map[string]Type)
461 for _, dt := range DefaultTypes {
462 mmm[dt.Type()] = dt
463 }
464
465 for _, mm := range mms {
466 for k, v := range mm {
467 var mediaType Type
468
469 mediaType, found := mmm[k]
470 if !found {
471 var err error
472 mediaType, err = fromString(k)
473 if err != nil {
474 return m, err
475 }
476 }
477
478 if err := mapstructure.WeakDecode(v, &mediaType); err != nil {
479 return m, err
480 }
481
482 vm := maps.ToStringMap(v)
483 maps.PrepareParams(vm)
484 _, delimiterSet := vm["delimiter"]
485 _, suffixSet := vm["suffix"]
486
487 if suffixSet {
488 return Types{}, suffixIsRemoved()
489 }
490
491 if suffixes, found := vm["suffixes"]; found {
492 mediaType.suffixesCSV = strings.TrimSpace(strings.ToLower(strings.Join(cast.ToStringSlice(suffixes), ",")))
493 }
494
495 // The user may set the delimiter as an empty string.
496 if !delimiterSet && mediaType.suffixesCSV != "" {
497 mediaType.Delimiter = defaultDelimiter
498 }
499
500 mediaType.init()
501
502 mmm[k] = mediaType
503
504 }
505 }
506
507 for _, v := range mmm {
508 m = append(m, v)
509 }
510 sort.Sort(m)
511
512 return m, nil
513 }
514
515 // IsZero reports whether this Type represents a zero value.
516 // For internal use.
517 func (m Type) IsZero() bool {
518 return m.SubType == ""
519 }
520
521 // MarshalJSON returns the JSON encoding of m.
522 // For internal use.
523 func (m Type) MarshalJSON() ([]byte, error) {
524 type Alias Type
525 return json.Marshal(&struct {
526 Alias
527 Type string `json:"type"`
528 String string `json:"string"`
529 Suffixes []string `json:"suffixes"`
530 }{
531 Alias: (Alias)(m),
532 Type: m.Type(),
533 String: m.String(),
534 Suffixes: strings.Split(m.suffixesCSV, ","),
535 })
536 }