123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- package grok
- import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "regexp"
- "strconv"
- "strings"
- "sync"
- )
- var (
- canonical = regexp.MustCompile(`%{(\w+(?::\w+(?::\w+)?)?)}`)
- normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`)
- symbolic = regexp.MustCompile(`\W`)
- )
- // A Config structure is used to configure a Grok parser.
- type Config struct {
- NamedCapturesOnly bool
- SkipDefaultPatterns bool
- RemoveEmptyValues bool
- PatternsDir []string
- Patterns map[string]string
- }
- // Grok object us used to load patterns and deconstruct strings using those
- // patterns.
- type Grok struct {
- rawPattern map[string]string
- config *Config
- aliases map[string]string
- compiledPatterns map[string]*gRegexp
- patterns map[string]*gPattern
- patternsGuard *sync.RWMutex
- compiledGuard *sync.RWMutex
- }
- type gPattern struct {
- expression string
- typeInfo semanticTypes
- }
- type gRegexp struct {
- regexp *regexp.Regexp
- typeInfo semanticTypes
- }
- type semanticTypes map[string]string
- // New returns a Grok object.
- func New() (*Grok, error) {
- return NewWithConfig(&Config{})
- }
- // NewWithConfig returns a Grok object that is configured to behave according
- // to the supplied Config structure.
- func NewWithConfig(config *Config) (*Grok, error) {
- g := &Grok{
- config: config,
- aliases: map[string]string{},
- compiledPatterns: map[string]*gRegexp{},
- patterns: map[string]*gPattern{},
- rawPattern: map[string]string{},
- patternsGuard: new(sync.RWMutex),
- compiledGuard: new(sync.RWMutex),
- }
- if !config.SkipDefaultPatterns {
- g.AddPatternsFromMap(patterns)
- }
- if len(config.PatternsDir) > 0 {
- for _, path := range config.PatternsDir {
- err := g.AddPatternsFromPath(path)
- if err != nil {
- return nil, err
- }
- }
- }
- if err := g.AddPatternsFromMap(config.Patterns); err != nil {
- return nil, err
- }
- return g, nil
- }
- // AddPattern adds a new pattern to the list of loaded patterns.
- func (g *Grok) addPattern(name, pattern string) error {
- dnPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
- if err != nil {
- return err
- }
- g.patterns[name] = &gPattern{expression: dnPattern, typeInfo: ti}
- return nil
- }
- // AddPattern adds a named pattern to grok
- func (g *Grok) AddPattern(name, pattern string) error {
- g.patternsGuard.Lock()
- defer g.patternsGuard.Unlock()
- g.rawPattern[name] = pattern
- g.buildPatterns()
- return nil
- }
- // AddPatternsFromMap loads a map of named patterns
- func (g *Grok) AddPatternsFromMap(m map[string]string) error {
- g.patternsGuard.Lock()
- defer g.patternsGuard.Unlock()
- for name, pattern := range m {
- g.rawPattern[name] = pattern
- }
- return g.buildPatterns()
- }
- // AddPatternsFromMap adds new patterns from the specified map to the list of
- // loaded patterns.
- func (g *Grok) addPatternsFromMap(m map[string]string) error {
- patternDeps := graph{}
- for k, v := range m {
- keys := []string{}
- for _, key := range canonical.FindAllStringSubmatch(v, -1) {
- names := strings.Split(key[1], ":")
- syntax := names[0]
- if g.patterns[syntax] == nil {
- if _, ok := m[syntax]; !ok {
- return fmt.Errorf("no pattern found for %%{%s}", syntax)
- }
- }
- keys = append(keys, syntax)
- }
- patternDeps[k] = keys
- }
- order, _ := sortGraph(patternDeps)
- for _, key := range reverseList(order) {
- g.addPattern(key, m[key])
- }
- return nil
- }
- // AddPatternsFromPath adds new patterns from the files in the specified
- // directory to the list of loaded patterns.
- func (g *Grok) AddPatternsFromPath(path string) error {
- if fi, err := os.Stat(path); err == nil {
- if fi.IsDir() {
- path = path + "/*"
- }
- } else {
- return fmt.Errorf("invalid path : %s", path)
- }
- // only one error can be raised, when pattern is malformed
- // pattern is hard-coded "/*" so we ignore err
- files, _ := filepath.Glob(path)
- var filePatterns = map[string]string{}
- for _, fileName := range files {
- file, err := os.Open(fileName)
- if err != nil {
- return err
- }
- scanner := bufio.NewScanner(bufio.NewReader(file))
- for scanner.Scan() {
- l := scanner.Text()
- if len(l) > 0 && l[0] != '#' {
- names := strings.SplitN(l, " ", 2)
- filePatterns[names[0]] = names[1]
- }
- }
- file.Close()
- }
- return g.AddPatternsFromMap(filePatterns)
- }
- // Match returns true if the specified text matches the pattern.
- func (g *Grok) Match(pattern, text string) (bool, error) {
- gr, err := g.compile(pattern)
- if err != nil {
- return false, err
- }
- if ok := gr.regexp.MatchString(text); !ok {
- return false, nil
- }
- return true, nil
- }
- // compiledParse parses the specified text and returns a map with the results.
- func (g *Grok) compiledParse(gr *gRegexp, text string) (map[string]string, error) {
- captures := make(map[string]string)
- if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
- for i, name := range gr.regexp.SubexpNames() {
- if name != "" {
- if g.config.RemoveEmptyValues && match[i] == "" {
- continue
- }
- name = g.nameToAlias(name)
- captures[name] = match[i]
- }
- }
- }
- return captures, nil
- }
- // Parse the specified text and return a map with the results.
- func (g *Grok) Parse(pattern, text string) (map[string]string, error) {
- gr, err := g.compile(pattern)
- if err != nil {
- return nil, err
- }
- return g.compiledParse(gr, text)
- }
- // ParseTyped returns a inteface{} map with typed captured fields based on provided pattern over the text
- func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, error) {
- gr, err := g.compile(pattern)
- if err != nil {
- return nil, err
- }
- match := gr.regexp.FindStringSubmatch(text)
- captures := make(map[string]interface{})
- if len(match) > 0 {
- for i, segmentName := range gr.regexp.SubexpNames() {
- if len(segmentName) != 0 {
- if g.config.RemoveEmptyValues == true && match[i] == "" {
- continue
- }
- name := g.nameToAlias(segmentName)
- if segmentType, ok := gr.typeInfo[segmentName]; ok {
- switch segmentType {
- case "int":
- captures[name], _ = strconv.Atoi(match[i])
- case "float":
- captures[name], _ = strconv.ParseFloat(match[i], 64)
- default:
- return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType)
- }
- } else {
- captures[name] = match[i]
- }
- }
- }
- }
- return captures, nil
- }
- // ParseToMultiMap parses the specified text and returns a map with the
- // results. Values are stored in an string slice, so values from captures with
- // the same name don't get overridden.
- func (g *Grok) ParseToMultiMap(pattern, text string) (map[string][]string, error) {
- gr, err := g.compile(pattern)
- if err != nil {
- return nil, err
- }
- captures := make(map[string][]string)
- if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
- for i, name := range gr.regexp.SubexpNames() {
- if name != "" {
- if g.config.RemoveEmptyValues == true && match[i] == "" {
- continue
- }
- name = g.nameToAlias(name)
- captures[name] = append(captures[name], match[i])
- }
- }
- }
- return captures, nil
- }
- func (g *Grok) buildPatterns() error {
- g.patterns = map[string]*gPattern{}
- return g.addPatternsFromMap(g.rawPattern)
- }
- func (g *Grok) compile(pattern string) (*gRegexp, error) {
- g.compiledGuard.RLock()
- gr, ok := g.compiledPatterns[pattern]
- g.compiledGuard.RUnlock()
- if ok {
- return gr, nil
- }
- g.patternsGuard.RLock()
- newPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
- g.patternsGuard.RUnlock()
- if err != nil {
- return nil, err
- }
- compiledRegex, err := regexp.Compile(newPattern)
- if err != nil {
- return nil, err
- }
- gr = &gRegexp{regexp: compiledRegex, typeInfo: ti}
- g.compiledGuard.Lock()
- g.compiledPatterns[pattern] = gr
- g.compiledGuard.Unlock()
- return gr, nil
- }
- func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPattern) (string, semanticTypes, error) {
- ti := semanticTypes{}
- for _, values := range normal.FindAllStringSubmatch(pattern, -1) {
- names := strings.Split(values[1], ":")
- syntax, semantic, alias := names[0], names[0], names[0]
- if len(names) > 1 {
- semantic = names[1]
- alias = g.aliasizePatternName(semantic)
- }
- // Add type cast information only if type set, and not string
- if len(names) == 3 {
- if names[2] != "string" {
- ti[semantic] = names[2]
- }
- }
- storedPattern, ok := storedPatterns[syntax]
- if !ok {
- return "", ti, fmt.Errorf("no pattern found for %%{%s}", syntax)
- }
- var buffer bytes.Buffer
- if !g.config.NamedCapturesOnly || (g.config.NamedCapturesOnly && len(names) > 1) {
- buffer.WriteString("(?P<")
- buffer.WriteString(alias)
- buffer.WriteString(">")
- buffer.WriteString(storedPattern.expression)
- buffer.WriteString(")")
- } else {
- buffer.WriteString("(")
- buffer.WriteString(storedPattern.expression)
- buffer.WriteString(")")
- }
- //Merge type Informations
- for k, v := range storedPattern.typeInfo {
- //Lastest type information is the one to keep in memory
- if _, ok := ti[k]; !ok {
- ti[k] = v
- }
- }
- pattern = strings.Replace(pattern, values[0], buffer.String(), -1)
- }
- return pattern, ti, nil
- }
- func (g *Grok) aliasizePatternName(name string) string {
- alias := symbolic.ReplaceAllString(name, "_")
- g.aliases[alias] = name
- return alias
- }
- func (g *Grok) nameToAlias(name string) string {
- alias, ok := g.aliases[name]
- if ok {
- return alias
- }
- return name
- }
- // ParseStream will match the given pattern on a line by line basis from the reader
- // and apply the results to the process function
- func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(map[string]string) error) error {
- gr, err := g.compile(pattern)
- if err != nil {
- return err
- }
- for {
- line, err := reader.ReadString('\n')
- if err == io.EOF {
- return nil
- }
- if err != nil {
- return err
- }
- values, err := g.compiledParse(gr, line)
- if err != nil {
- return err
- }
- if err = process(values); err != nil {
- return err
- }
- }
- }
|