123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432 |
- package xregex
- /*
- golang version regex parser
- refer to: https://github.com/aristotle9/as3cc/tree/master/java-template/src/org/lala/lex/utils/parser
- */
- import (
- "errors"
- "fmt"
- "strconv"
- )
- const (
- _initial = "INITIAL"
- _deadState = 0xFFFFFFFF
- _maxValue = 0x7fffffffffffffff
- )
- var (
- errEOF = errors.New("已经到达末尾")
- )
- // Lexer golang lexter
- type lexer struct {
- transTable []*stateTransItem
- finalTable map[int64]int64
- initialTable map[string]int64
- inputTable []*rangeItem
- start int64
- oldStart int64
- tokenName string
- yyText interface{}
- yy interface{}
- ended bool
- initialInput int64
- initialState string
- line int64
- column int64
- advanced bool
- source string
- }
- func newLexer() (lx *lexer) {
- lx = &lexer{}
- lx.transTable = []*stateTransItem{
- {false, []int64{0xFFFFFFFF, 0x3, 0x2, 0x1},
- []*rangeItem{{0, 32, 0}, {33, 33, 1},
- {34, 34, 2}, {35, 35, 3}}},
- {false,
- []int64{0xFFFFFFFF, 0xF, 0xE, 0xD, 0xC, 0xB, 0xA, 0x9, 0x8, 0x7, 0x6, 0x5,
- 0x4},
- []*rangeItem{{0, 0, 0}, {1, 1, 1},
- {2, 2, 2}, {3, 3, 3}, {4, 4, 4},
- {5, 5, 5}, {6, 6, 6}, {7, 7, 7},
- {8, 28, 8}, {29, 29, 9}, {30, 30, 10},
- {31, 31, 11}, {32, 32, 12},
- {33, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0xF, 0xE, 0xD, 0x8, 0x12, 0x11, 0x10},
- []*rangeItem{{0, 0, 0}, {1, 1, 1},
- {2, 2, 2}, {3, 3, 3}, {4, 7, 4},
- {8, 8, 5}, {9, 9, 6}, {10, 27, 4},
- {28, 28, 7}, {29, 32, 4},
- {33, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x16, 0x15, 0x14, 0x13},
- []*rangeItem{{0, 21, 0}, {22, 24, 1},
- {25, 25, 2}, {26, 26, 3}, {27, 27, 4},
- {28, 35, 0}}},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil},
- {false,
- []int64{0xFFFFFFFF, 0x1F, 0x17, 0xE, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x1E, 0x21,
- 0x20, 0x18},
- []*rangeItem{{0, 0, 0}, {1, 1, 1},
- {2, 9, 2}, {10, 11, 3}, {12, 12, 4},
- {13, 13, 5}, {14, 14, 6}, {15, 15, 7},
- {16, 16, 8}, {17, 18, 2}, {19, 19, 9},
- {20, 20, 10}, {21, 21, 11}, {22, 23, 2},
- {24, 24, 12}, {25, 32, 2},
- {33, 35, 0}}},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {false, []int64{0xFFFFFFFF, 0x14},
- []*rangeItem{{0, 25, 0}, {26, 26, 1},
- {27, 35, 0}}},
- {true, nil, nil},
- {false, []int64{0xFFFFFFFF, 0x16},
- []*rangeItem{{0, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {true, nil, nil},
- {false, []int64{0xFFFFFFFF, 0x22},
- []*rangeItem{{0, 22, 0}, {23, 24, 1},
- {25, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x23},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x24},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil}, {true, nil, nil},
- {true, nil, nil},
- {false, []int64{0xFFFFFFFF, 0x25},
- []*rangeItem{{0, 22, 0}, {23, 24, 1},
- {25, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x26},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x27},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {true, nil, nil}, {true, nil, nil},
- {false, []int64{0xFFFFFFFF, 0x28},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {false, []int64{0xFFFFFFFF, 0x29},
- []*rangeItem{{0, 10, 0}, {11, 11, 1},
- {12, 12, 0}, {13, 14, 1}, {15, 17, 0},
- {18, 18, 1}, {19, 19, 0}, {20, 20, 1},
- {21, 21, 0}, {22, 24, 1},
- {25, 35, 0}}},
- {true, nil, nil}}
- lx.finalTable = make(map[int64]int64)
- lx.finalTable[0x4] = 0x0
- lx.finalTable[0x5] = 0x4
- lx.finalTable[0x6] = 0x1
- lx.finalTable[0x7] = 0x2
- lx.finalTable[0x8] = 0x1C
- lx.finalTable[0x9] = 0x3
- lx.finalTable[0xA] = 0x6
- lx.finalTable[0xB] = 0x5
- lx.finalTable[0xC] = 0xA
- lx.finalTable[0xD] = 0x1C
- lx.finalTable[0xE] = 0x12
- lx.finalTable[0xF] = 0x1B
- lx.finalTable[0x10] = 0x8
- lx.finalTable[0x11] = 0x7
- lx.finalTable[0x12] = 0x9
- lx.finalTable[0x13] = 0xE
- lx.finalTable[0x14] = 0xD
- lx.finalTable[0x15] = 0xB
- lx.finalTable[0x16] = 0xC
- lx.finalTable[0x17] = 0x1A
- lx.finalTable[0x18] = 0x1A
- lx.finalTable[0x19] = 0x1A
- lx.finalTable[0x1A] = 0x1A
- lx.finalTable[0x1B] = 0x16
- lx.finalTable[0x1C] = 0x17
- lx.finalTable[0x1D] = 0x13
- lx.finalTable[0x1E] = 0x15
- lx.finalTable[0x1F] = 0x18
- lx.finalTable[0x20] = 0x14
- lx.finalTable[0x21] = 0x19
- lx.finalTable[0x25] = 0xF
- lx.finalTable[0x26] = 0x10
- lx.finalTable[0x29] = 0x11
- lx.inputTable = []*rangeItem{{0, 8, 17}, {9, 9, 26},
- {10, 10, 0}, {11, 12, 17}, {13, 13, 0},
- {14, 31, 17}, {32, 32, 26}, {33, 39, 17},
- {40, 40, 31}, {41, 41, 5}, {42, 42, 32},
- {43, 43, 30}, {44, 44, 25}, {45, 45, 28},
- {46, 46, 2}, {47, 47, 1}, {48, 48, 24},
- {49, 55, 23}, {56, 57, 22}, {58, 62, 17},
- {63, 63, 29}, {64, 64, 17}, {65, 70, 18},
- {71, 90, 17}, {91, 91, 6}, {92, 92, 3},
- {93, 93, 8}, {94, 94, 9}, {95, 96, 17},
- {97, 97, 18}, {98, 98, 14}, {99, 99, 20},
- {100, 100, 11}, {101, 101, 18}, {102, 102, 13},
- {103, 109, 17}, {110, 110, 21}, {111, 113, 17},
- {114, 114, 12}, {115, 115, 10}, {116, 116, 19},
- {117, 117, 15}, {118, 118, 17}, {119, 119, 10},
- {120, 120, 16}, {121, 122, 17}, {123, 123, 4},
- {124, 124, 7}, {125, 125, 27}, {126, 65535, 17}}
- lx.initialTable = make(map[string]int64)
- lx.initialTable["REPEAT"] = 0x1
- lx.initialTable["BRACKET"] = 0x2
- lx.initialTable["INITIAL"] = 0x3
- return
- }
- func (lx *lexer) setSource(src string) {
- if src != "" {
- lx.source = src
- }
- lx.ended = false
- lx.start = 0
- lx.oldStart = 0
- lx.line = 1
- lx.column = 0
- lx.advanced = true
- lx.tokenName = ""
- lx.yy = nil
- lx.initialState = _initial
- lx.initialInput = lx.initialTable[lx.initialState]
- }
- func (lx *lexer) getToken() (string, error) {
- var err error
- if lx.advanced {
- lx.tokenName, err = lx.next()
- lx.advanced = false
- }
- return lx.tokenName, err
- }
- func (lx *lexer) getPositionInfo() string {
- return fmt.Sprintf("row(%d) column(%d)", lx.line, lx.column)
- }
- func (lx *lexer) next() (ret string, err error) {
- for {
- var (
- nextState int64
- ch int64
- och = _maxValue
- next = lx.start
- curState = lx.transTable[0].toStates[lx.initialInput]
- lastFinalState = int64(_deadState)
- lastFinalPosition = lx.start
- )
- for {
- if next < int64(len(lx.source)) {
- ch = int64(lx.source[next])
- // 计算行、列的位置
- if och != _maxValue {
- if ch == 0x0d { // \r符号
- lx.column = 0
- lx.line++
- } else if ch == 0x0a { // \n
- if och != 0x0d { // != \r
- lx.column = 0
- lx.line++
- }
- } else {
- lx.column++
- }
- }
- och = int(ch)
- if nextState, err = lx.trans(curState, ch); err != nil {
- return
- }
- } else {
- nextState = _deadState
- }
- //OK
- if nextState == _deadState {
- if lx.start == lastFinalPosition {
- if lx.start == int64(len(lx.source)) {
- if !lx.ended {
- lx.ended = true
- return "<$>", nil
- }
- return "", errEOF
- }
- return "", fmt.Errorf("意外的字符(line:%d,col:%d) of %s", lx.line, lx.column, lx.source)
- }
- lx.yyText = lx.source[lx.start:lastFinalPosition]
- lx.oldStart = lx.start
- lx.start = lastFinalPosition
- fIndex := lx.finalTable[lastFinalState]
- switch fIndex {
- case 0x0:
- return "*", nil
- case 0x1:
- return "+", nil
- case 0x2:
- return "?", nil
- case 0x3:
- return "|", nil
- case 0x4:
- return "(", nil
- case 0x5:
- return ")", nil
- case 0x6:
- if err = lx.begin("BRACKET"); err != nil {
- return
- }
- return "[", nil
- case 0x7:
- return "^", nil
- case 0x8:
- return "-", nil
- case 0x9:
- if err = lx.begin("INITIAL"); err != nil {
- return
- }
- return "]", nil
- case 0xA:
- if err = lx.begin("REPEAT"); err != nil {
- return
- }
- return "{", nil
- case 0xB:
- return ",", nil
- case 0xC:
- if lx.yyText, err = strconv.ParseInt(lx.yyText.(string), 10, 64); err != nil {
- return
- }
- return "d", nil
- case 0xE:
- if err = lx.begin("INITIAL"); err != nil {
- return
- }
- return "}", nil
- case 0xF:
- var tmp int64
- if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:4], 8, 64); err != nil {
- return
- }
- lx.yyText = string(tmp)
- return "c", nil
- case 0x10:
- var tmp int64
- if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:4], 16, 64); err != nil {
- return
- }
- lx.yyText = string(tmp)
- return "c", nil
- case 0x11:
- var tmp int64
- if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:6], 16, 64); err != nil {
- return
- }
- lx.yyText = string(tmp)
- return "c", nil
- case 0x12:
- return "escc", nil
- case 0x13:
- lx.yyText = "\r"
- return "c", nil
- case 0x14:
- lx.yyText = "\n"
- return "c", nil
- case 0x15:
- lx.yyText = "\t"
- return "c", nil
- case 0x16:
- lx.yyText = "\b"
- return "c", nil
- case 0x17:
- lx.yyText = "\f"
- return "c", nil
- case 0x18:
- lx.yyText = "/"
- return "c", nil
- case 0x19:
- return "escc", nil
- case 0x1A:
- lx.yyText = lx.yyText.(string)[1:2]
- return "c", nil
- case 0x1B:
- return "/", nil
- case 0x1C:
- return "c", nil
- }
- break
- } else {
- next++
- if _, ok := lx.finalTable[nextState]; ok {
- lastFinalState = nextState
- lastFinalPosition = next
- }
- curState = nextState
- }
- }
- }
- }
- func (lx *lexer) begin(state string) error {
- return lx.setInitialState(state)
- }
- func (lx *lexer) setInitialState(state string) (err error) {
- if _, ok := lx.initialTable[state]; !ok {
- err = fmt.Errorf("未定义的初始状态:%s", state)
- return
- }
- lx.initialState = state
- lx.initialInput = lx.initialTable[state]
- return
- }
- func (lx *lexer) trans(curState, ch int64) (int64, error) {
- if ch < lx.inputTable[0].from || ch > lx.inputTable[len(lx.inputTable)-1].to {
- return 0, fmt.Errorf("line:%d,column:%d 输入字符超出范围", lx.line, lx.column)
- }
- if lx.transTable[curState].isDead {
- return _deadState, nil
- }
- pubInput := find(ch, lx.inputTable)
- innerInput := find(pubInput, lx.transTable[curState].transEdge)
- return lx.transTable[curState].toStates[innerInput], nil
- }
- func find(code int64, table []*rangeItem) int64 {
- var (
- max = len(table) - 1
- min int
- mid uint64
- )
- for {
- mid = uint64(min+max) >> 1
- if table[mid].from <= code {
- if table[mid].to >= code {
- return table[mid].value
- }
- min = int(mid) + 1
- } else {
- max = int(mid) - 1
- }
- }
- }
|