grok.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. package grok
  2. import (
  3. "bufio"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "os"
  8. "path/filepath"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. )
  14. var (
  15. canonical = regexp.MustCompile(`%{(\w+(?::\w+(?::\w+)?)?)}`)
  16. normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`)
  17. symbolic = regexp.MustCompile(`\W`)
  18. )
  19. // A Config structure is used to configure a Grok parser.
  20. type Config struct {
  21. NamedCapturesOnly bool
  22. SkipDefaultPatterns bool
  23. RemoveEmptyValues bool
  24. PatternsDir []string
  25. Patterns map[string]string
  26. }
  27. // Grok object us used to load patterns and deconstruct strings using those
  28. // patterns.
  29. type Grok struct {
  30. rawPattern map[string]string
  31. config *Config
  32. aliases map[string]string
  33. compiledPatterns map[string]*gRegexp
  34. patterns map[string]*gPattern
  35. patternsGuard *sync.RWMutex
  36. compiledGuard *sync.RWMutex
  37. }
  38. type gPattern struct {
  39. expression string
  40. typeInfo semanticTypes
  41. }
  42. type gRegexp struct {
  43. regexp *regexp.Regexp
  44. typeInfo semanticTypes
  45. }
  46. type semanticTypes map[string]string
  47. // New returns a Grok object.
  48. func New() (*Grok, error) {
  49. return NewWithConfig(&Config{})
  50. }
  51. // NewWithConfig returns a Grok object that is configured to behave according
  52. // to the supplied Config structure.
  53. func NewWithConfig(config *Config) (*Grok, error) {
  54. g := &Grok{
  55. config: config,
  56. aliases: map[string]string{},
  57. compiledPatterns: map[string]*gRegexp{},
  58. patterns: map[string]*gPattern{},
  59. rawPattern: map[string]string{},
  60. patternsGuard: new(sync.RWMutex),
  61. compiledGuard: new(sync.RWMutex),
  62. }
  63. if !config.SkipDefaultPatterns {
  64. g.AddPatternsFromMap(patterns)
  65. }
  66. if len(config.PatternsDir) > 0 {
  67. for _, path := range config.PatternsDir {
  68. err := g.AddPatternsFromPath(path)
  69. if err != nil {
  70. return nil, err
  71. }
  72. }
  73. }
  74. if err := g.AddPatternsFromMap(config.Patterns); err != nil {
  75. return nil, err
  76. }
  77. return g, nil
  78. }
  79. // AddPattern adds a new pattern to the list of loaded patterns.
  80. func (g *Grok) addPattern(name, pattern string) error {
  81. dnPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
  82. if err != nil {
  83. return err
  84. }
  85. g.patterns[name] = &gPattern{expression: dnPattern, typeInfo: ti}
  86. return nil
  87. }
  88. // AddPattern adds a named pattern to grok
  89. func (g *Grok) AddPattern(name, pattern string) error {
  90. g.patternsGuard.Lock()
  91. defer g.patternsGuard.Unlock()
  92. g.rawPattern[name] = pattern
  93. g.buildPatterns()
  94. return nil
  95. }
  96. // AddPatternsFromMap loads a map of named patterns
  97. func (g *Grok) AddPatternsFromMap(m map[string]string) error {
  98. g.patternsGuard.Lock()
  99. defer g.patternsGuard.Unlock()
  100. for name, pattern := range m {
  101. g.rawPattern[name] = pattern
  102. }
  103. return g.buildPatterns()
  104. }
  105. // AddPatternsFromMap adds new patterns from the specified map to the list of
  106. // loaded patterns.
  107. func (g *Grok) addPatternsFromMap(m map[string]string) error {
  108. patternDeps := graph{}
  109. for k, v := range m {
  110. keys := []string{}
  111. for _, key := range canonical.FindAllStringSubmatch(v, -1) {
  112. names := strings.Split(key[1], ":")
  113. syntax := names[0]
  114. if g.patterns[syntax] == nil {
  115. if _, ok := m[syntax]; !ok {
  116. return fmt.Errorf("no pattern found for %%{%s}", syntax)
  117. }
  118. }
  119. keys = append(keys, syntax)
  120. }
  121. patternDeps[k] = keys
  122. }
  123. order, _ := sortGraph(patternDeps)
  124. for _, key := range reverseList(order) {
  125. g.addPattern(key, m[key])
  126. }
  127. return nil
  128. }
  129. // AddPatternsFromPath adds new patterns from the files in the specified
  130. // directory to the list of loaded patterns.
  131. func (g *Grok) AddPatternsFromPath(path string) error {
  132. if fi, err := os.Stat(path); err == nil {
  133. if fi.IsDir() {
  134. path = path + "/*"
  135. }
  136. } else {
  137. return fmt.Errorf("invalid path : %s", path)
  138. }
  139. // only one error can be raised, when pattern is malformed
  140. // pattern is hard-coded "/*" so we ignore err
  141. files, _ := filepath.Glob(path)
  142. var filePatterns = map[string]string{}
  143. for _, fileName := range files {
  144. file, err := os.Open(fileName)
  145. if err != nil {
  146. return err
  147. }
  148. scanner := bufio.NewScanner(bufio.NewReader(file))
  149. for scanner.Scan() {
  150. l := scanner.Text()
  151. if len(l) > 0 && l[0] != '#' {
  152. names := strings.SplitN(l, " ", 2)
  153. filePatterns[names[0]] = names[1]
  154. }
  155. }
  156. file.Close()
  157. }
  158. return g.AddPatternsFromMap(filePatterns)
  159. }
  160. // Match returns true if the specified text matches the pattern.
  161. func (g *Grok) Match(pattern, text string) (bool, error) {
  162. gr, err := g.compile(pattern)
  163. if err != nil {
  164. return false, err
  165. }
  166. if ok := gr.regexp.MatchString(text); !ok {
  167. return false, nil
  168. }
  169. return true, nil
  170. }
  171. // compiledParse parses the specified text and returns a map with the results.
  172. func (g *Grok) compiledParse(gr *gRegexp, text string) (map[string]string, error) {
  173. captures := make(map[string]string)
  174. if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
  175. for i, name := range gr.regexp.SubexpNames() {
  176. if name != "" {
  177. if g.config.RemoveEmptyValues && match[i] == "" {
  178. continue
  179. }
  180. name = g.nameToAlias(name)
  181. captures[name] = match[i]
  182. }
  183. }
  184. }
  185. return captures, nil
  186. }
  187. // Parse the specified text and return a map with the results.
  188. func (g *Grok) Parse(pattern, text string) (map[string]string, error) {
  189. gr, err := g.compile(pattern)
  190. if err != nil {
  191. return nil, err
  192. }
  193. return g.compiledParse(gr, text)
  194. }
  195. // ParseTyped returns a inteface{} map with typed captured fields based on provided pattern over the text
  196. func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, error) {
  197. gr, err := g.compile(pattern)
  198. if err != nil {
  199. return nil, err
  200. }
  201. match := gr.regexp.FindStringSubmatch(text)
  202. captures := make(map[string]interface{})
  203. if len(match) > 0 {
  204. for i, segmentName := range gr.regexp.SubexpNames() {
  205. if len(segmentName) != 0 {
  206. if g.config.RemoveEmptyValues == true && match[i] == "" {
  207. continue
  208. }
  209. name := g.nameToAlias(segmentName)
  210. if segmentType, ok := gr.typeInfo[segmentName]; ok {
  211. switch segmentType {
  212. case "int":
  213. captures[name], _ = strconv.Atoi(match[i])
  214. case "float":
  215. captures[name], _ = strconv.ParseFloat(match[i], 64)
  216. default:
  217. return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType)
  218. }
  219. } else {
  220. captures[name] = match[i]
  221. }
  222. }
  223. }
  224. }
  225. return captures, nil
  226. }
  227. // ParseToMultiMap parses the specified text and returns a map with the
  228. // results. Values are stored in an string slice, so values from captures with
  229. // the same name don't get overridden.
  230. func (g *Grok) ParseToMultiMap(pattern, text string) (map[string][]string, error) {
  231. gr, err := g.compile(pattern)
  232. if err != nil {
  233. return nil, err
  234. }
  235. captures := make(map[string][]string)
  236. if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
  237. for i, name := range gr.regexp.SubexpNames() {
  238. if name != "" {
  239. if g.config.RemoveEmptyValues == true && match[i] == "" {
  240. continue
  241. }
  242. name = g.nameToAlias(name)
  243. captures[name] = append(captures[name], match[i])
  244. }
  245. }
  246. }
  247. return captures, nil
  248. }
  249. func (g *Grok) buildPatterns() error {
  250. g.patterns = map[string]*gPattern{}
  251. return g.addPatternsFromMap(g.rawPattern)
  252. }
  253. func (g *Grok) compile(pattern string) (*gRegexp, error) {
  254. g.compiledGuard.RLock()
  255. gr, ok := g.compiledPatterns[pattern]
  256. g.compiledGuard.RUnlock()
  257. if ok {
  258. return gr, nil
  259. }
  260. g.patternsGuard.RLock()
  261. newPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
  262. g.patternsGuard.RUnlock()
  263. if err != nil {
  264. return nil, err
  265. }
  266. compiledRegex, err := regexp.Compile(newPattern)
  267. if err != nil {
  268. return nil, err
  269. }
  270. gr = &gRegexp{regexp: compiledRegex, typeInfo: ti}
  271. g.compiledGuard.Lock()
  272. g.compiledPatterns[pattern] = gr
  273. g.compiledGuard.Unlock()
  274. return gr, nil
  275. }
  276. func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPattern) (string, semanticTypes, error) {
  277. ti := semanticTypes{}
  278. for _, values := range normal.FindAllStringSubmatch(pattern, -1) {
  279. names := strings.Split(values[1], ":")
  280. syntax, semantic, alias := names[0], names[0], names[0]
  281. if len(names) > 1 {
  282. semantic = names[1]
  283. alias = g.aliasizePatternName(semantic)
  284. }
  285. // Add type cast information only if type set, and not string
  286. if len(names) == 3 {
  287. if names[2] != "string" {
  288. ti[semantic] = names[2]
  289. }
  290. }
  291. storedPattern, ok := storedPatterns[syntax]
  292. if !ok {
  293. return "", ti, fmt.Errorf("no pattern found for %%{%s}", syntax)
  294. }
  295. var buffer bytes.Buffer
  296. if !g.config.NamedCapturesOnly || (g.config.NamedCapturesOnly && len(names) > 1) {
  297. buffer.WriteString("(?P<")
  298. buffer.WriteString(alias)
  299. buffer.WriteString(">")
  300. buffer.WriteString(storedPattern.expression)
  301. buffer.WriteString(")")
  302. } else {
  303. buffer.WriteString("(")
  304. buffer.WriteString(storedPattern.expression)
  305. buffer.WriteString(")")
  306. }
  307. //Merge type Informations
  308. for k, v := range storedPattern.typeInfo {
  309. //Lastest type information is the one to keep in memory
  310. if _, ok := ti[k]; !ok {
  311. ti[k] = v
  312. }
  313. }
  314. pattern = strings.Replace(pattern, values[0], buffer.String(), -1)
  315. }
  316. return pattern, ti, nil
  317. }
  318. func (g *Grok) aliasizePatternName(name string) string {
  319. alias := symbolic.ReplaceAllString(name, "_")
  320. g.aliases[alias] = name
  321. return alias
  322. }
  323. func (g *Grok) nameToAlias(name string) string {
  324. alias, ok := g.aliases[name]
  325. if ok {
  326. return alias
  327. }
  328. return name
  329. }
  330. // ParseStream will match the given pattern on a line by line basis from the reader
  331. // and apply the results to the process function
  332. func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(map[string]string) error) error {
  333. gr, err := g.compile(pattern)
  334. if err != nil {
  335. return err
  336. }
  337. for {
  338. line, err := reader.ReadString('\n')
  339. if err == io.EOF {
  340. return nil
  341. }
  342. if err != nil {
  343. return err
  344. }
  345. values, err := g.compiledParse(gr, line)
  346. if err != nil {
  347. return err
  348. }
  349. if err = process(values); err != nil {
  350. return err
  351. }
  352. }
  353. }