123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package http
- import (
- "bytes"
- "encoding/binary"
- )
- // The algorithm uses at most sniffLen bytes to make its decision.
- const sniffLen = 512
- // DetectContentType implements the algorithm described
- // at http://mimesniff.spec.whatwg.org/ to determine the
- // Content-Type of the given data. It considers at most the
- // first 512 bytes of data. DetectContentType always returns
- // a valid MIME type: if it cannot determine a more specific one, it
- // returns "application/octet-stream".
- func DetectContentType(data []byte) string {
- if len(data) > sniffLen {
- data = data[:sniffLen]
- }
- // Index of the first non-whitespace byte in data.
- firstNonWS := 0
- for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ {
- }
- for _, sig := range sniffSignatures {
- if ct := sig.match(data, firstNonWS); ct != "" {
- return ct
- }
- }
- return "application/octet-stream" // fallback
- }
- func isWS(b byte) bool {
- switch b {
- case '\t', '\n', '\x0c', '\r', ' ':
- return true
- }
- return false
- }
- type sniffSig interface {
- // match returns the MIME type of the data, or "" if unknown.
- match(data []byte, firstNonWS int) string
- }
- // Data matching the table in section 6.
- var sniffSignatures = []sniffSig{
- htmlSig("<!DOCTYPE HTML"),
- htmlSig("<HTML"),
- htmlSig("<HEAD"),
- htmlSig("<SCRIPT"),
- htmlSig("<IFRAME"),
- htmlSig("<H1"),
- htmlSig("<DIV"),
- htmlSig("<FONT"),
- htmlSig("<TABLE"),
- htmlSig("<A"),
- htmlSig("<STYLE"),
- htmlSig("<TITLE"),
- htmlSig("<B"),
- htmlSig("<BODY"),
- htmlSig("<BR"),
- htmlSig("<P"),
- htmlSig("<!--"),
- &maskedSig{mask: []byte("\xFF\xFF\xFF\xFF\xFF"), pat: []byte("<?xml"), skipWS: true, ct: "text/xml; charset=utf-8"},
- &exactSig{[]byte("%PDF-"), "application/pdf"},
- &exactSig{[]byte("%!PS-Adobe-"), "application/postscript"},
- // UTF BOMs.
- &maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFE\xFF\x00\x00"), ct: "text/plain; charset=utf-16be"},
- &maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFF\xFE\x00\x00"), ct: "text/plain; charset=utf-16le"},
- &maskedSig{mask: []byte("\xFF\xFF\xFF\x00"), pat: []byte("\xEF\xBB\xBF\x00"), ct: "text/plain; charset=utf-8"},
- &exactSig{[]byte("GIF87a"), "image/gif"},
- &exactSig{[]byte("GIF89a"), "image/gif"},
- &exactSig{[]byte("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), "image/png"},
- &exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"},
- &exactSig{[]byte("BM"), "image/bmp"},
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"),
- pat: []byte("RIFF\x00\x00\x00\x00WEBPVP"),
- ct: "image/webp",
- },
- &exactSig{[]byte("\x00\x00\x01\x00"), "image/vnd.microsoft.icon"},
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
- pat: []byte("RIFF\x00\x00\x00\x00WAVE"),
- ct: "audio/wave",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
- pat: []byte("FORM\x00\x00\x00\x00AIFF"),
- ct: "audio/aiff",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF"),
- pat: []byte(".snd"),
- ct: "audio/basic",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\xFF"),
- pat: []byte("OggS\x00"),
- ct: "application/ogg",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"),
- pat: []byte("MThd\x00\x00\x00\x06"),
- ct: "audio/midi",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF"),
- pat: []byte("ID3"),
- ct: "audio/mpeg",
- },
- &maskedSig{
- mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
- pat: []byte("RIFF\x00\x00\x00\x00AVI "),
- ct: "video/avi",
- },
- // Fonts
- &maskedSig{
- // 34 NULL bytes followed by the string "LP"
- pat: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x4C\x50"),
- // 34 NULL bytes followed by \xF\xF
- mask: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF"),
- ct: "application/vnd.ms-fontobject",
- },
- &exactSig{[]byte("\x00\x01\x00\x00"), "application/font-ttf"},
- &exactSig{[]byte("OTTO"), "application/font-off"},
- &exactSig{[]byte("ttcf"), "application/font-cff"},
- &exactSig{[]byte("wOFF"), "application/font-woff"},
- &exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"},
- &exactSig{[]byte("\x52\x61\x72\x20\x1A\x07\x00"), "application/x-rar-compressed"},
- &exactSig{[]byte("\x50\x4B\x03\x04"), "application/zip"},
- &exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"},
- mp4Sig{},
- textSig{}, // should be last
- }
- type exactSig struct {
- sig []byte
- ct string
- }
- func (e *exactSig) match(data []byte, firstNonWS int) string {
- if bytes.HasPrefix(data, e.sig) {
- return e.ct
- }
- return ""
- }
- type maskedSig struct {
- mask, pat []byte
- skipWS bool
- ct string
- }
- func (m *maskedSig) match(data []byte, firstNonWS int) string {
- // pattern matching algorithm section 6
- // https://mimesniff.spec.whatwg.org/#pattern-matching-algorithm
- if m.skipWS {
- data = data[firstNonWS:]
- }
- if len(m.pat) != len(m.mask) {
- return ""
- }
- if len(data) < len(m.mask) {
- return ""
- }
- for i, mask := range m.mask {
- db := data[i] & mask
- if db != m.pat[i] {
- return ""
- }
- }
- return m.ct
- }
- type htmlSig []byte
- func (h htmlSig) match(data []byte, firstNonWS int) string {
- data = data[firstNonWS:]
- if len(data) < len(h)+1 {
- return ""
- }
- for i, b := range h {
- db := data[i]
- if 'A' <= b && b <= 'Z' {
- db &= 0xDF
- }
- if b != db {
- return ""
- }
- }
- // Next byte must be space or right angle bracket.
- if db := data[len(h)]; db != ' ' && db != '>' {
- return ""
- }
- return "text/html; charset=utf-8"
- }
- var mp4ftype = []byte("ftyp")
- var mp4 = []byte("mp4")
- type mp4Sig struct{}
- func (mp4Sig) match(data []byte, firstNonWS int) string {
- // https://mimesniff.spec.whatwg.org/#signature-for-mp4
- // c.f. section 6.2.1
- if len(data) < 12 {
- return ""
- }
- boxSize := int(binary.BigEndian.Uint32(data[:4]))
- if boxSize%4 != 0 || len(data) < boxSize {
- return ""
- }
- if !bytes.Equal(data[4:8], mp4ftype) {
- return ""
- }
- for st := 8; st < boxSize; st += 4 {
- if st == 12 {
- // minor version number
- continue
- }
- if bytes.Equal(data[st:st+3], mp4) {
- return "video/mp4"
- }
- }
- return ""
- }
- type textSig struct{}
- func (textSig) match(data []byte, firstNonWS int) string {
- // c.f. section 5, step 4.
- for _, b := range data[firstNonWS:] {
- switch {
- case b <= 0x08,
- b == 0x0B,
- 0x0E <= b && b <= 0x1A,
- 0x1C <= b && b <= 0x1F:
- return ""
- }
- }
- return "text/plain; charset=utf-8"
- }
|