parser.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. package dump
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "github.com/juju/errors"
  10. "github.com/siddontang/go-mysql/mysql"
  11. )
  12. var (
  13. ErrSkip = errors.New("Handler error, but skipped")
  14. )
  15. type ParseHandler interface {
  16. // Parse CHANGE MASTER TO MASTER_LOG_FILE=name, MASTER_LOG_POS=pos;
  17. BinLog(name string, pos uint64) error
  18. Data(schema string, table string, values []string) error
  19. }
  20. var binlogExp *regexp.Regexp
  21. var useExp *regexp.Regexp
  22. var valuesExp *regexp.Regexp
  23. func init() {
  24. binlogExp = regexp.MustCompile("^CHANGE MASTER TO MASTER_LOG_FILE='(.+)', MASTER_LOG_POS=(\\d+);")
  25. useExp = regexp.MustCompile("^USE `(.+)`;")
  26. valuesExp = regexp.MustCompile("^INSERT INTO `(.+?)` VALUES \\((.+)\\);$")
  27. }
  28. // Parse the dump data with Dumper generate.
  29. // It can not parse all the data formats with mysqldump outputs
  30. func Parse(r io.Reader, h ParseHandler, parseBinlogPos bool) error {
  31. rb := bufio.NewReaderSize(r, 1024*16)
  32. var db string
  33. var binlogParsed bool
  34. for {
  35. line, err := rb.ReadString('\n')
  36. if err != nil && err != io.EOF {
  37. return errors.Trace(err)
  38. } else if mysql.ErrorEqual(err, io.EOF) {
  39. break
  40. }
  41. // Ignore '\n' on Linux or '\r\n' on Windows
  42. line = strings.SplitAfter(line, ";")[0]
  43. if parseBinlogPos && !binlogParsed {
  44. if m := binlogExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  45. name := m[0][1]
  46. pos, err := strconv.ParseUint(m[0][2], 10, 64)
  47. if err != nil {
  48. return errors.Errorf("parse binlog %v err, invalid number", line)
  49. }
  50. if err = h.BinLog(name, pos); err != nil && err != ErrSkip {
  51. return errors.Trace(err)
  52. }
  53. binlogParsed = true
  54. }
  55. }
  56. if m := useExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  57. db = m[0][1]
  58. }
  59. if m := valuesExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  60. table := m[0][1]
  61. values, err := parseValues(m[0][2])
  62. if err != nil {
  63. return errors.Errorf("parse values %v err", line)
  64. }
  65. if err = h.Data(db, table, values); err != nil && err != ErrSkip {
  66. return errors.Trace(err)
  67. }
  68. }
  69. }
  70. return nil
  71. }
  72. func parseValues(str string) ([]string, error) {
  73. // values are seperated by comma, but we can not split using comma directly
  74. // string is enclosed by single quote
  75. // a simple implementation, may be more robust later.
  76. values := make([]string, 0, 8)
  77. i := 0
  78. for i < len(str) {
  79. if str[i] != '\'' {
  80. // no string, read until comma
  81. j := i + 1
  82. for ; j < len(str) && str[j] != ','; j++ {
  83. }
  84. values = append(values, str[i:j])
  85. // skip ,
  86. i = j + 1
  87. } else {
  88. // read string until another single quote
  89. j := i + 1
  90. escaped := false
  91. for j < len(str) {
  92. if str[j] == '\\' {
  93. // skip escaped character
  94. j += 2
  95. escaped = true
  96. continue
  97. } else if str[j] == '\'' {
  98. break
  99. } else {
  100. j++
  101. }
  102. }
  103. if j >= len(str) {
  104. return nil, fmt.Errorf("parse quote values error")
  105. }
  106. value := str[i : j+1]
  107. if escaped {
  108. value = unescapeString(value)
  109. }
  110. values = append(values, value)
  111. // skip ' and ,
  112. i = j + 2
  113. }
  114. // need skip blank???
  115. }
  116. return values, nil
  117. }
  118. // unescapeString un-escapes the string.
  119. // mysqldump will escape the string when dumps,
  120. // Refer http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
  121. func unescapeString(s string) string {
  122. i := 0
  123. value := make([]byte, 0, len(s))
  124. for i < len(s) {
  125. if s[i] == '\\' {
  126. j := i + 1
  127. if j == len(s) {
  128. // The last char is \, remove
  129. break
  130. }
  131. value = append(value, unescapeChar(s[j]))
  132. i += 2
  133. } else {
  134. value = append(value, s[i])
  135. i++
  136. }
  137. }
  138. return string(value)
  139. }
  140. func unescapeChar(ch byte) byte {
  141. // \" \' \\ \n \0 \b \Z \r \t ==> escape to one char
  142. switch ch {
  143. case 'n':
  144. ch = '\n'
  145. case '0':
  146. ch = 0
  147. case 'b':
  148. ch = 8
  149. case 'Z':
  150. ch = 26
  151. case 'r':
  152. ch = '\r'
  153. case 't':
  154. ch = '\t'
  155. }
  156. return ch
  157. }