123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783 |
- // Copyright (c) 2011 Florian Weimer. All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- //
- // * Redistributions in binary form must reproduce the above copyright
- // notice, this list of conditions and the following disclaimer in the
- // documentation and/or other materials provided with the distribution.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- // This package provides access to the Perl Compatible Regular
- // Expresion library, PCRE.
- //
- // It implements two main types, Regexp and Matcher. Regexp objects
- // store a compiled regular expression. They consist of two immutable
- // parts: pcre and pcre_extra. You can add pcre_exta to Compiled Regexp by
- // studying it with Study() function.
- // Compilation of regular expressions using Compile or MustCompile is
- // slightly expensive, so these objects should be kept and reused,
- // instead of compiling them from scratch for each matching attempt.
- // CompileJIT and MustCompileJIT are way more expensive then ordinary
- // methods, becose they run Study() func after Regexp compiled but gives
- // much better perfomance:
- // http://sljit.sourceforge.net/regex_perf.html
- //
- // Matcher objects keeps the results of a match against a []byte or
- // string subject. The Group and GroupString functions provide access
- // to capture groups; both versions work no matter if the subject was a
- // []byte or string.
- //
- // Matcher objects contain some temporary space and refer the original
- // subject. They are mutable and can be reused (using Match,
- // MatchString, Reset or ResetString).
- //
- // Most of Matcher.*String method are just links to []byte methods, so keep
- // this in mind.
- //
- // For details on the regular expression language implemented by this
- // package and the flags defined below, see the PCRE documentation.
- // http://www.pcre.org/pcre.txt
- package pcre
- /*
- #cgo pkg-config: libpcre
- #include <pcre.h>
- #include <string.h>
- */
- import "C"
- import (
- "fmt"
- "strconv"
- "strings"
- "unsafe"
- )
- // Flags for Compile and Match functions.
- const (
- ANCHORED = C.PCRE_ANCHORED
- BSR_ANYCRLF = C.PCRE_BSR_ANYCRLF
- BSR_UNICODE = C.PCRE_BSR_UNICODE
- NEWLINE_ANY = C.PCRE_NEWLINE_ANY
- NEWLINE_ANYCRLF = C.PCRE_NEWLINE_ANYCRLF
- NEWLINE_CR = C.PCRE_NEWLINE_CR
- NEWLINE_CRLF = C.PCRE_NEWLINE_CRLF
- NEWLINE_LF = C.PCRE_NEWLINE_LF
- NO_UTF8_CHECK = C.PCRE_NO_UTF8_CHECK
- )
- // Flags for Compile functions
- const (
- CASELESS = C.PCRE_CASELESS
- DOLLAR_ENDONLY = C.PCRE_DOLLAR_ENDONLY
- DOTALL = C.PCRE_DOTALL
- DUPNAMES = C.PCRE_DUPNAMES
- EXTENDED = C.PCRE_EXTENDED
- EXTRA = C.PCRE_EXTRA
- FIRSTLINE = C.PCRE_FIRSTLINE
- JAVASCRIPT_COMPAT = C.PCRE_JAVASCRIPT_COMPAT
- MULTILINE = C.PCRE_MULTILINE
- NO_AUTO_CAPTURE = C.PCRE_NO_AUTO_CAPTURE
- UNGREEDY = C.PCRE_UNGREEDY
- UTF8 = C.PCRE_UTF8
- UCP = C.PCRE_UCP
- )
- // Flags for Match functions
- const (
- NOTBOL = C.PCRE_NOTBOL
- NOTEOL = C.PCRE_NOTEOL
- NOTEMPTY = C.PCRE_NOTEMPTY
- NOTEMPTY_ATSTART = C.PCRE_NOTEMPTY_ATSTART
- NO_START_OPTIMIZE = C.PCRE_NO_START_OPTIMIZE
- PARTIAL_HARD = C.PCRE_PARTIAL_HARD
- PARTIAL_SOFT = C.PCRE_PARTIAL_SOFT
- )
- // Flags for Study function
- const (
- STUDY_JIT_COMPILE = C.PCRE_STUDY_JIT_COMPILE
- STUDY_JIT_PARTIAL_SOFT_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
- STUDY_JIT_PARTIAL_HARD_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
- )
- // Flags for Config() fuction
- const (
- CONFIG_JIT = C.PCRE_CONFIG_JIT
- CONFIG_JITTARGET = C.PCRE_CONFIG_JITTARGET
- CONFIG_LINK_SIZE = C.PCRE_CONFIG_LINK_SIZE
- CONFIG_MATCH_LIMIT = C.PCRE_CONFIG_MATCH_LIMIT
- CONFIG_MATCH_LIMIT_RECURSION = C.PCRE_CONFIG_MATCH_LIMIT_RECURSION
- CONFIG_NEWLINE = C.PCRE_CONFIG_NEWLINE
- CONFIG_BSR = C.PCRE_CONFIG_BSR
- CONFIG_POSIX_MALLOC_THRESHOLD = C.PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
- CONFIG_STACKRECURSE = C.PCRE_CONFIG_STACKRECURSE
- CONFIG_UTF16 = C.PCRE_CONFIG_UTF16
- CONFIG_UTF32 = C.PCRE_CONFIG_UTF32
- CONFIG_UTF8 = C.PCRE_CONFIG_UTF8
- CONFIG_UNICODE_PROPERTIES = C.PCRE_CONFIG_UNICODE_PROPERTIES
- )
- // Exec-time and get/set-time error codes
- const (
- ERROR_NOMATCH = C.PCRE_ERROR_NOMATCH
- ERROR_NULL = C.PCRE_ERROR_NULL
- ERROR_BADOPTION = C.PCRE_ERROR_BADOPTION
- ERROR_BADMAGIC = C.PCRE_ERROR_BADMAGIC
- ERROR_UNKNOWN_OPCODE = C.PCRE_ERROR_UNKNOWN_OPCODE
- ERROR_UNKNOWN_NODE = C.PCRE_ERROR_UNKNOWN_NODE
- ERROR_NOMEMORY = C.PCRE_ERROR_NOMEMORY
- ERROR_NOSUBSTRING = C.PCRE_ERROR_NOSUBSTRING
- ERROR_MATCHLIMIT = C.PCRE_ERROR_MATCHLIMIT
- ERROR_CALLOUT = C.PCRE_ERROR_CALLOUT
- ERROR_BADUTF8 = C.PCRE_ERROR_BADUTF8
- ERROR_BADUTF8_OFFSET = C.PCRE_ERROR_BADUTF8_OFFSET
- ERROR_PARTIAL = C.PCRE_ERROR_PARTIAL
- ERROR_BADPARTIAL = C.PCRE_ERROR_BADPARTIAL
- ERROR_RECURSIONLIMIT = C.PCRE_ERROR_RECURSIONLIMIT
- ERROR_INTERNAL = C.PCRE_ERROR_INTERNAL
- ERROR_BADCOUNT = C.PCRE_ERROR_BADCOUNT
- ERROR_JIT_STACKLIMIT = C.PCRE_ERROR_JIT_STACKLIMIT
- )
- // This function returns information about libpcre configuration.
- // Function passed flag f to C.pcre_config() func, and convert returned
- // vaule to string type.
- // http://www.pcre.org/original/doc/html/pcre_config.html
- func Config(f int) (r string) {
- if f == C.PCRE_CONFIG_JITTARGET {
- var jittarget *C.char
- C.pcre_config(C.PCRE_CONFIG_JITTARGET, unsafe.Pointer(&jittarget))
- r = C.GoString(jittarget)
- } else {
- var i C.int
- C.pcre_config(C.int(f), unsafe.Pointer(&i))
- r = fmt.Sprint(int32(i))
- }
- return
- }
- // This function returns string, which contains all information
- // you can access by pcre_config() function
- func ConfigAll() (ret string) {
- var i C.int
- C.pcre_config(C.PCRE_CONFIG_JIT, unsafe.Pointer(&i))
- ret += fmt.Sprintf("jit: %d\n", int32(i))
- var jittarget *C.char
- C.pcre_config(C.PCRE_CONFIG_JITTARGET, unsafe.Pointer(&jittarget))
- ret += fmt.Sprintf("jittarget: %s\n", C.GoString(jittarget))
- C.pcre_config(C.PCRE_CONFIG_LINK_SIZE, unsafe.Pointer(&i))
- ret += fmt.Sprintf("link_size: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_MATCH_LIMIT, unsafe.Pointer(&i))
- ret += fmt.Sprintf("match_limit: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_MATCH_LIMIT_RECURSION, unsafe.Pointer(&i))
- ret += fmt.Sprintf("match_limit_recursion: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_NEWLINE, unsafe.Pointer(&i))
- ret += fmt.Sprintf("newline: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_BSR, unsafe.Pointer(&i))
- ret += fmt.Sprintf("bsr: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, unsafe.Pointer(&i))
- ret += fmt.Sprintf("posix_malloc_threshold: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_STACKRECURSE, unsafe.Pointer(&i))
- ret += fmt.Sprintf("stackrecurse: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_UTF16, unsafe.Pointer(&i))
- ret += fmt.Sprintf("utf16: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_UTF32, unsafe.Pointer(&i))
- ret += fmt.Sprintf("utf32: %d\n", int32(i))
- C.pcre_config(C.PCRE_CONFIG_UTF8, unsafe.Pointer(&i))
- ret += fmt.Sprintf("utf8: %d", int32(i))
- C.pcre_config(C.PCRE_CONFIG_UNICODE_PROPERTIES, unsafe.Pointer(&i))
- ret += fmt.Sprintf("unicode_properties: %d\n", int32(i))
- return
- }
- // A reference to a compiled regular expression.
- // Use Compile or MustCompile to create such objects.
- type Regexp struct {
- ptr []byte
- extra []byte
- }
- // Number of bytes in the compiled pattern
- func pcresize(ptr *C.pcre) (size C.size_t) {
- C.pcre_fullinfo(ptr, nil, C.PCRE_INFO_SIZE, unsafe.Pointer(&size))
- return
- }
- func pcreJITsize(ptr *C.pcre, ext *C.pcre_extra) (size C.size_t) {
- C.pcre_fullinfo(ptr, ext, C.PCRE_INFO_JITSIZE, unsafe.Pointer(&size))
- return
- }
- // Number of capture groups
- func pcregroups(ptr *C.pcre) (count C.int) {
- C.pcre_fullinfo(ptr, nil,
- C.PCRE_INFO_CAPTURECOUNT, unsafe.Pointer(&count))
- return
- }
- // Returns string with regex pattern and int with fpcre flags.
- // Flags are specified before the regex in form like this "(?flags)regex"
- // Supported symbols i=CASELESS; m=MULTILINE; s=DOTALL; U=UNGREEDY; J=DUPNAMES;
- // x=EXTENDED; X=EXTRA; D=DOLLAR_ENDONLY; u=UTF8|UCP;
- func ParseFlags(ptr string) (string, int) {
- fReg := MustCompile("^\\(\\?[a-zA-Z]+?\\)", 0)
- flags := 0
- for fStr := fReg.FindString(ptr, 0); fStr != ""; ptr = ptr[len(fStr):] {
- fStr = fReg.FindString(ptr, 0)
- if strings.Contains(fStr, "i") {
- flags = flags | CASELESS
- }
- if strings.Contains(fStr, "D") {
- flags = flags | DOLLAR_ENDONLY
- }
- if strings.Contains(fStr, "s") {
- flags = flags | DOTALL
- }
- if strings.Contains(fStr, "J") {
- flags = flags | DUPNAMES
- }
- if strings.Contains(fStr, "x") {
- flags = flags | EXTENDED
- }
- if strings.Contains(fStr, "X") {
- flags = flags | EXTRA
- }
- if strings.Contains(fStr, "m") {
- flags = flags | MULTILINE
- }
- if strings.Contains(fStr, "U") {
- flags = flags | UNGREEDY
- }
- if strings.Contains(fStr, "u") {
- flags = flags | UTF8 | UCP
- }
- }
- return ptr, flags
- }
- // Try to compile the pattern. If an error occurs, the second return
- // value is non-nil.
- func Compile(pattern string, flags int) (Regexp, error) {
- patternC := C.CString(pattern)
- defer C.free(unsafe.Pointer(patternC))
- if clen := int(C.strlen(patternC)); clen != len(pattern) {
- return Regexp{}, fmt.Errorf("%s (%d): %s",
- pattern,
- clen,
- "NUL byte in pattern",
- )
- }
- var errptr *C.char
- var erroffset C.int
- ptr := C.pcre_compile(patternC, C.int(flags), &errptr, &erroffset, nil)
- if ptr == nil {
- return Regexp{}, fmt.Errorf("%s (%d): %s",
- pattern,
- int(erroffset),
- C.GoString(errptr),
- )
- }
- defer C.free(unsafe.Pointer(ptr))
- psize := pcresize(ptr)
- var re Regexp
- re.ptr = make([]byte, psize)
- C.memcpy(unsafe.Pointer(&re.ptr[0]), unsafe.Pointer(ptr), psize)
- return re, nil
- }
- // Try to parse flags of regex and compile it. If an error occurs,
- // the second return value is non-nil.
- func CompileParse(ptr string) (Regexp, error) {
- ptr, f := ParseFlags(ptr)
- retRegex, err := Compile(ptr, f)
- if err != nil {
- return Regexp{}, fmt.Errorf("can't compile/study pcre regexp: %s\nFlags:%b", ptr, f)
- }
- return retRegex, nil
- }
- // Compile pattern with jit compilation. flagC is Compile flags,
- // flagS is study flag.
- func CompileJIT(pattern string, flagsC, flagsS int) (Regexp, error) {
- patternC := C.CString(pattern)
- defer C.free(unsafe.Pointer(patternC))
- if clen := int(C.strlen(patternC)); clen != len(pattern) {
- return Regexp{}, fmt.Errorf("%s (%d): %s",
- pattern,
- clen,
- "NUL byte in pattern",
- )
- }
- var errptr *C.char
- var erroffset C.int
- ptr := C.pcre_compile(patternC, C.int(flagsC), &errptr, &erroffset, nil)
- if ptr == nil {
- return Regexp{}, fmt.Errorf("%s (%d): %s",
- pattern,
- int(erroffset),
- C.GoString(errptr),
- )
- }
- psize := pcresize(ptr)
- var re Regexp
- re.ptr = make([]byte, psize)
- C.memcpy(unsafe.Pointer(&re.ptr[0]), unsafe.Pointer(ptr), psize)
- errS := re.study(flagsS)
- if errS != nil {
- return re, fmt.Errorf("study error: %s", errS)
- }
- return re, nil
- }
- // Try to parse flags of regex and compile it with JIT optimization.
- // If an error occurs, the second return value is non-nil.
- func CompileParseJIT(ptr string, flags int) (Regexp, error) {
- ptr, f := ParseFlags(ptr)
- retRegex, err := CompileJIT(ptr, f, flags)
- if err != nil {
- return Regexp{}, fmt.Errorf("can't compile/study pcre regexp: %s\nFlags:%b\nFlagsJIT%b", ptr, f, flags)
- }
- return retRegex, nil
- }
- // Compile the pattern. If compilation fails, panic.
- func MustCompile(pattern string, flag int) (re Regexp) {
- re, err := Compile(pattern, flag)
- if err != nil {
- panic(err)
- }
- return
- }
- // CompileParse the pattern. If compilation fails, panic.
- func MustCompileParse(pattern string) (re Regexp) {
- re, err := CompileParse(pattern)
- if err != nil {
- panic(err)
- }
- return
- }
- // CompileJIT the pattern. If compilation fails, panic.
- func MustCompileJIT(pattern string, flagsC, flagsS int) (re Regexp) {
- re, err := CompileJIT(pattern, flagsC, flagsS)
- if err != nil {
- panic(err)
- }
- return
- }
- // CompileParseJIT the pattern. If compilation fails, panic.
- func MustCompileParseJIT(pattern string, flags int) (re Regexp) {
- re, err := CompileParseJIT(pattern, flags)
- if err != nil {
- panic(err)
- }
- return
- }
- // Return the start and end of the first match.
- func (re *Regexp) FindAllIndex(bytes []byte, flags int) (r [][]int) {
- m := re.Matcher(bytes, flags)
- offset := 0
- for m.Match(bytes[offset:], flags) {
- r = append(r, []int{offset + int(m.ovector[0]), offset + int(m.ovector[1])})
- offset += int(m.ovector[1])
- }
- return
- }
- // Return the start and end of the first match, or nil if no match.
- // loc[0] is the start and loc[1] is the end.
- func (re *Regexp) FindIndex(bytes []byte, flags int) []int {
- m := re.Matcher(bytes, flags)
- if m.Matches {
- return []int{int(m.ovector[0]), int(m.ovector[1])}
- }
- return nil
- }
- // Return the start and end of the first match, or nil if no match.
- // loc[0] is the start and loc[1] is the end.
- func (re *Regexp) FindString(s string, flags int) string {
- m := re.Matcher([]byte(s), flags)
- if m.Matches {
- return s[int(m.ovector[0]):int(m.ovector[1])]
- }
- return ""
- }
- // Returns the number of capture groups in the compiled regexp pattern.
- func (re Regexp) Groups() int {
- if re.ptr == nil {
- panic("Regexp.Groups: uninitialized")
- }
- return int(pcregroups((*C.pcre)(unsafe.Pointer(&re.ptr[0]))))
- }
- // Tries to match the speficied byte array slice to the current pattern.
- // Returns true if the match succeeds.
- func (r *Regexp) Match(subject []byte, flags int) bool {
- m := r.Matcher(subject, flags)
- return m.Matches
- }
- // Same as Match, but accept string as argument
- func (r *Regexp) MatchString(subject string, flags int) bool {
- m := r.Matcher([]byte(subject), flags)
- return m.Matches
- }
- // Returns a new matcher object, with the byte array slice as a
- // subject.
- func (re Regexp) Matcher(subject []byte, flags int) (m *Matcher) {
- m = new(Matcher)
- m.Reset(re, subject, flags)
- return
- }
- // Returns a new matcher object, with the specified subject string.
- func (re Regexp) MatcherString(subject string, flags int) (m *Matcher) {
- m = new(Matcher)
- m.ResetString(re, subject, flags)
- return
- }
- // Return a copy of a byte slice with pattern matches replaced by repl.
- func (re Regexp) ReplaceAll(bytes, repl []byte, flags int) []byte {
- m := re.Matcher(bytes, 0)
- r := []byte{}
- for m.Match(bytes, flags) {
- r = append(append(r, bytes[:m.ovector[0]]...), repl...)
- bytes = bytes[m.ovector[1]:]
- }
- return append(r, bytes...)
- }
- // Same as ReplaceAll, but accept strings as arguments
- func (re Regexp) ReplaceAllString(src, repl string, flags int) string {
- return string(re.ReplaceAll([]byte(src), []byte(repl), flags))
- }
- // Study regexp and add pcre_extra information to it, which gives huge
- // speed boost when matching. If an error occurs, return value is
- // non-nil. If flags = 0, don't study at all and return error.
- // Studying can be quite a heavy optimization, but it's worth it.
- func (re *Regexp) study(flags int) error {
- if re.extra != nil {
- return fmt.Errorf("regexp already optimized")
- }
- if flags <= 0 {
- return fmt.Errorf("flag must be > 0")
- }
- var err *C.char
- extra := C.pcre_study((*C.pcre)(unsafe.Pointer(&re.ptr[0])), C.int(flags), &err)
- if err != nil {
- return fmt.Errorf(C.GoString(err))
- }
- defer C.free(unsafe.Pointer(extra))
- size := pcreJITsize((*C.pcre)(unsafe.Pointer(&re.ptr[0])), extra)
- if size > 0 {
- re.extra = make([]byte, size)
- C.memcpy(unsafe.Pointer(&re.extra[0]), unsafe.Pointer(extra), size)
- return nil
- } else {
- return fmt.Errorf(C.GoString(err))
- }
- }
- // Matcher objects provide a place for storing match results.
- // They can be created by the Matcher and MatcherString functions,
- // or they can be initialized with Reset or ResetString.
- type Matcher struct {
- re Regexp
- Groups int
- ovector []int32 // space for capture offsets, int32 is analogfor C.int type
- Matches bool // last match was successful
- Error error // pcre_exec error from last match
- Partial bool // was the last match a partial match?
- SubjectS string // contain finded subject as string
- SubjectB []byte // contain finded subject as []byte
- }
- // Tries to match the speficied byte array slice to the current
- // pattern. Returns exec result.
- // C docs http://www.pcre.org/original/doc/html/pcre_exec.html
- func (m *Matcher) Exec(subject []byte, flags int) int {
- if m.re.ptr == nil {
- panic("Matcher.Match: uninitialized")
- }
- length := len(subject)
- m.SubjectS = string(subject)
- m.SubjectB = subject
- if length == 0 {
- subject = nullbyte // make first character adressable
- }
- subjectptr := (*C.char)(unsafe.Pointer(&subject[0]))
- return m.exec(subjectptr, length, flags)
- }
- // Same as Exec, but accept string as argument
- func (m *Matcher) ExecString(subject string, flags int) int {
- return m.Exec([]byte(subject), flags)
- }
- func (m *Matcher) exec(subjectptr *C.char, length, flags int) int {
- var extra *C.pcre_extra
- if m.re.extra != nil {
- extra = (*C.pcre_extra)(unsafe.Pointer(&m.re.extra[0]))
- } else {
- extra = nil
- }
- rc := C.pcre_exec((*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), extra,
- subjectptr, C.int(length), 0, C.int(flags),
- (*C.int)(unsafe.Pointer(&m.ovector[0])), C.int(len(m.ovector)))
- return int(rc)
- }
- // Returns the captured string with submatches of the last match
- // (performed by Matcher, MatcherString, Reset, ResetString, Match,
- // or MatchString). Group 0 is the part of the subject which matches
- // the whole pattern; the first actual capture group is numbered 1.
- // Capture groups which are not present return a nil slice.
- func (m *Matcher) Extract() [][]byte {
- if m.Matches {
- captured_texts := make([][]byte, m.Groups+1)
- captured_texts[0] = m.SubjectB
- for i := 1; i < m.Groups+1; i++ {
- start := m.ovector[2*i]
- end := m.ovector[2*i+1]
- captured_text := m.SubjectB[start:end]
- captured_texts[i] = captured_text
- }
- return captured_texts
- } else {
- return nil
- }
- }
- // Same as Extract, but returns []string
- func (m *Matcher) ExtractString() []string {
- if m.Matches {
- captured_texts := make([]string, m.Groups+1)
- captured_texts[0] = m.SubjectS
- for i := 1; i < m.Groups+1; i++ {
- start := m.ovector[2*i]
- end := m.ovector[2*i+1]
- captured_text := m.SubjectS[start:end]
- captured_texts[i] = captured_text
- }
- return captured_texts
- } else {
- return nil
- }
- }
- func (m *Matcher) init(re Regexp) {
- m.Matches = false
- if m.re.ptr != nil && &m.re.ptr[0] == &re.ptr[0] {
- // Skip group count extraction if the matcher has
- // already been initialized with the same regular
- // expression.
- return
- }
- m.re = re
- m.Groups = re.Groups()
- if ovectorlen := 3 * (1 + m.Groups); len(m.ovector) < ovectorlen {
- m.ovector = make([]int32, int32(ovectorlen))
- }
- }
- var nullbyte = []byte{0}
- // Returns the numbered capture group of the last match (performed by
- // Matcher, MatcherString, Reset, ResetString, Match, or MatchString).
- // Group 0 is the part of the subject which matches the whole pattern;
- // the first actual capture group is numbered 1. Capture groups which
- // are not present return a nil slice.
- func (m *Matcher) Group(group int) []byte {
- start := m.ovector[2*group]
- end := m.ovector[2*group+1]
- if start >= 0 {
- return m.SubjectB[start:end]
- }
- return nil
- }
- // Returns the numbered capture group positions of the last match
- // (performed by Matcher, MatcherString, Reset, ResetString, Match,
- // or MatchString). Group 0 is the part of the subject which matches
- // the whole pattern; the first actual capture group is numbered 1.
- // Capture groups which are not present return a nil slice.
- func (m *Matcher) GroupIndices(group int) []int {
- start := m.ovector[2*group]
- end := m.ovector[2*group+1]
- if start >= 0 {
- return []int{int(start), int(end)}
- }
- return nil
- }
- // Same as Group, but returns string
- func (m *Matcher) GroupString(group int) string {
- start := m.ovector[2*group]
- end := m.ovector[2*group+1]
- if start >= 0 {
- return m.SubjectS[start:end]
- }
- return ""
- }
- // Index returns the start and end of the first match, if a previous
- // call to Matcher, MatcherString, Reset, ResetString, Match or
- // MatchString succeeded. loc[0] is the start and loc[1] is the end.
- func (m *Matcher) Index() []int {
- if !m.Matches {
- return nil
- }
- return []int{int(m.ovector[0]), int(m.ovector[1])}
- }
- // Tries to match the speficied byte array slice to the current
- // pattern. Returns true if the match succeeds.
- func (m *Matcher) Match(subject []byte, flags int) bool {
- rc := m.Exec(subject, flags)
- m.Matches, m.Error = checkMatch(rc)
- m.Partial = (rc == C.PCRE_ERROR_PARTIAL)
- return m.Matches
- }
- // Tries to match the speficied subject string to the current pattern.
- // Returns true if the match succeeds.
- func (m *Matcher) MatchString(subject string, flags int) bool {
- rc := m.ExecString(subject, flags)
- m.Matches, m.Error = checkMatch(rc)
- m.Partial = (rc == ERROR_PARTIAL)
- return m.Matches
- }
- func checkMatch(rc int) (bool, error) {
- switch {
- case rc >= 0 || rc == ERROR_PARTIAL:
- return true, nil
- case rc == ERROR_NOMATCH:
- return false, nil
- case rc == ERROR_NULL:
- return false, fmt.Errorf("%d, pcre_exec: one or more variables passed to pcre_exec == NULL", ERROR_NULL)
- case rc == ERROR_BADOPTION:
- return false, fmt.Errorf("%d, pcre_exec: An unrecognized bit was set in the options argument", ERROR_BADOPTION)
- case rc == ERROR_BADMAGIC:
- return false, fmt.Errorf("%d, pcre_exec: invalid option flag", ERROR_BADMAGIC)
- case rc == ERROR_UNKNOWN_OPCODE:
- return false, fmt.Errorf("%d, pcre_exec: an unknown item was encountered in the compiled pattern", ERROR_UNKNOWN_OPCODE)
- case rc == ERROR_NOMEMORY:
- return false, fmt.Errorf("%d, pcre_exec: match limit", ERROR_NOMEMORY)
- case rc == ERROR_MATCHLIMIT:
- return false, fmt.Errorf("%d, pcre_exec: backtracking (match) limit was reached", ERROR_MATCHLIMIT)
- case rc == ERROR_BADUTF8:
- return false, fmt.Errorf("%d, pcre_exec: string that contains an invalid UTF-8 byte sequence was passed as a subject", ERROR_BADUTF8)
- case rc == ERROR_RECURSIONLIMIT:
- return false, fmt.Errorf("%d, pcre_exec: recursion limit", ERROR_RECURSIONLIMIT)
- case rc == ERROR_JIT_STACKLIMIT:
- return false, fmt.Errorf("%d, pcre_exec: error JIT stack limit", ERROR_JIT_STACKLIMIT)
- case rc == ERROR_INTERNAL:
- panic("pcre_exec: INTERNAL ERROR")
- case rc == ERROR_BADCOUNT:
- panic("pcre_exec: INTERNAL ERROR")
- }
- panic("unexepected return code from pcre_exec: " +
- strconv.Itoa(int(rc)))
- }
- func (m *Matcher) name2index(name string) (group int, err error) {
- if m.re.ptr == nil {
- err = fmt.Errorf("Matcher.Named: uninitialized")
- return
- }
- name1 := C.CString(name)
- defer C.free(unsafe.Pointer(name1))
- group = int(C.pcre_get_stringnumber(
- (*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), name1))
- if group < 0 {
- err = fmt.Errorf("Matcher.Named: unknown name: " + name)
- return
- }
- return
- }
- // Returns the value of the named capture group. This is a nil slice
- // if the capture group is not present. Panics if the name does not
- // refer to a group.
- func (m *Matcher) Named(group string) (g []byte, err error) {
- group_num, err := m.name2index(group)
- if err != nil {
- return
- }
- return m.Group(group_num), nil
- }
- // Returns true if the named capture group is present. Panics if the
- // name does not refer to a group.
- func (m *Matcher) NamedPresent(group string) (pres bool) {
- group_num, err := m.name2index(group)
- if err != nil {
- return false
- }
- return m.Present(group_num)
- }
- // Returns the value of the named capture group, or an empty string if
- // the capture group is not present. Panics if the name does not
- // refer to a group.
- func (m *Matcher) NamedString(group string) (g string, err error) {
- group_num, err := m.name2index(group)
- if err != nil {
- return
- }
- return m.GroupString(group_num), nil
- }
- // Returns true if the numbered capture group is present in the last
- // match (performed by Matcher, MatcherString, Reset, ResetString,
- // Match, or MatchString). Group numbers start at 1. A capture group
- // can be present and match the empty string.
- func (m *Matcher) Present(group int) bool {
- return m.ovector[2*group] >= 0
- }
- // Switches the matcher object to the specified pattern and subject.
- func (m *Matcher) Reset(re Regexp, subject []byte, flags int) {
- if re.ptr == nil {
- panic("Regexp.Matcher: uninitialized")
- }
- m.init(re)
- m.Match(subject, flags)
- }
- // Switches the matcher object to the specified pattern and subject
- // string.
- func (m *Matcher) ResetString(re Regexp, subject string, flags int) {
- if re.ptr == nil {
- panic("Regexp.Matcher: uninitialized")
- }
- m.init(re)
- m.MatchString(subject, flags)
- }
|