bom.go 734 B

123456789101112131415161718192021222324252627282930313233343536373839
  1. // Package bom is used to clean up UTF-8 Byte Order Marks.
  2. package bom
  3. import (
  4. "bufio"
  5. "io"
  6. )
  7. const (
  8. bom0 = 0xef
  9. bom1 = 0xbb
  10. bom2 = 0xbf
  11. )
  12. // Clean returns b with the 3 byte BOM stripped off the front if it is present.
  13. // If the BOM is not present, then b is returned.
  14. func Clean(b []byte) []byte {
  15. if len(b) >= 3 &&
  16. b[0] == bom0 &&
  17. b[1] == bom1 &&
  18. b[2] == bom2 {
  19. return b[3:]
  20. }
  21. return b
  22. }
  23. // NewReader returns an io.Reader that will skip over initial UTF-8 byte order marks.
  24. func NewReader(r io.Reader) io.Reader {
  25. buf := bufio.NewReader(r)
  26. b, err := buf.Peek(3)
  27. if err != nil {
  28. // not enough bytes
  29. return buf
  30. }
  31. if b[0] == bom0 && b[1] == bom1 && b[2] == bom2 {
  32. discardBytes(buf, 3)
  33. }
  34. return buf
  35. }