package xregex /* golang version regex parser refer to: https://github.com/aristotle9/as3cc/tree/master/java-template/src/org/lala/lex/utils/parser */ import ( "errors" "fmt" "strconv" ) const ( _initial = "INITIAL" _deadState = 0xFFFFFFFF _maxValue = 0x7fffffffffffffff ) var ( errEOF = errors.New("已经到达末尾") ) // Lexer golang lexter type lexer struct { transTable []*stateTransItem finalTable map[int64]int64 initialTable map[string]int64 inputTable []*rangeItem start int64 oldStart int64 tokenName string yyText interface{} yy interface{} ended bool initialInput int64 initialState string line int64 column int64 advanced bool source string } func newLexer() (lx *lexer) { lx = &lexer{} lx.transTable = []*stateTransItem{ {false, []int64{0xFFFFFFFF, 0x3, 0x2, 0x1}, []*rangeItem{{0, 32, 0}, {33, 33, 1}, {34, 34, 2}, {35, 35, 3}}}, {false, []int64{0xFFFFFFFF, 0xF, 0xE, 0xD, 0xC, 0xB, 0xA, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4}, []*rangeItem{{0, 0, 0}, {1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}, {5, 5, 5}, {6, 6, 6}, {7, 7, 7}, {8, 28, 8}, {29, 29, 9}, {30, 30, 10}, {31, 31, 11}, {32, 32, 12}, {33, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0xF, 0xE, 0xD, 0x8, 0x12, 0x11, 0x10}, []*rangeItem{{0, 0, 0}, {1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 7, 4}, {8, 8, 5}, {9, 9, 6}, {10, 27, 4}, {28, 28, 7}, {29, 32, 4}, {33, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x16, 0x15, 0x14, 0x13}, []*rangeItem{{0, 21, 0}, {22, 24, 1}, {25, 25, 2}, {26, 26, 3}, {27, 27, 4}, {28, 35, 0}}}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x1F, 0x17, 0xE, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x1E, 0x21, 0x20, 0x18}, []*rangeItem{{0, 0, 0}, {1, 1, 1}, {2, 9, 2}, {10, 11, 3}, {12, 12, 4}, {13, 13, 5}, {14, 14, 6}, {15, 15, 7}, {16, 16, 8}, {17, 18, 2}, {19, 19, 9}, {20, 20, 10}, {21, 21, 11}, {22, 23, 2}, {24, 24, 12}, {25, 32, 2}, {33, 35, 0}}}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x14}, []*rangeItem{{0, 25, 0}, {26, 26, 1}, {27, 35, 0}}}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x16}, []*rangeItem{{0, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x22}, []*rangeItem{{0, 22, 0}, {23, 24, 1}, {25, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x23}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x24}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x25}, []*rangeItem{{0, 22, 0}, {23, 24, 1}, {25, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x26}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x27}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {true, nil, nil}, {true, nil, nil}, {false, []int64{0xFFFFFFFF, 0x28}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {false, []int64{0xFFFFFFFF, 0x29}, []*rangeItem{{0, 10, 0}, {11, 11, 1}, {12, 12, 0}, {13, 14, 1}, {15, 17, 0}, {18, 18, 1}, {19, 19, 0}, {20, 20, 1}, {21, 21, 0}, {22, 24, 1}, {25, 35, 0}}}, {true, nil, nil}} lx.finalTable = make(map[int64]int64) lx.finalTable[0x4] = 0x0 lx.finalTable[0x5] = 0x4 lx.finalTable[0x6] = 0x1 lx.finalTable[0x7] = 0x2 lx.finalTable[0x8] = 0x1C lx.finalTable[0x9] = 0x3 lx.finalTable[0xA] = 0x6 lx.finalTable[0xB] = 0x5 lx.finalTable[0xC] = 0xA lx.finalTable[0xD] = 0x1C lx.finalTable[0xE] = 0x12 lx.finalTable[0xF] = 0x1B lx.finalTable[0x10] = 0x8 lx.finalTable[0x11] = 0x7 lx.finalTable[0x12] = 0x9 lx.finalTable[0x13] = 0xE lx.finalTable[0x14] = 0xD lx.finalTable[0x15] = 0xB lx.finalTable[0x16] = 0xC lx.finalTable[0x17] = 0x1A lx.finalTable[0x18] = 0x1A lx.finalTable[0x19] = 0x1A lx.finalTable[0x1A] = 0x1A lx.finalTable[0x1B] = 0x16 lx.finalTable[0x1C] = 0x17 lx.finalTable[0x1D] = 0x13 lx.finalTable[0x1E] = 0x15 lx.finalTable[0x1F] = 0x18 lx.finalTable[0x20] = 0x14 lx.finalTable[0x21] = 0x19 lx.finalTable[0x25] = 0xF lx.finalTable[0x26] = 0x10 lx.finalTable[0x29] = 0x11 lx.inputTable = []*rangeItem{{0, 8, 17}, {9, 9, 26}, {10, 10, 0}, {11, 12, 17}, {13, 13, 0}, {14, 31, 17}, {32, 32, 26}, {33, 39, 17}, {40, 40, 31}, {41, 41, 5}, {42, 42, 32}, {43, 43, 30}, {44, 44, 25}, {45, 45, 28}, {46, 46, 2}, {47, 47, 1}, {48, 48, 24}, {49, 55, 23}, {56, 57, 22}, {58, 62, 17}, {63, 63, 29}, {64, 64, 17}, {65, 70, 18}, {71, 90, 17}, {91, 91, 6}, {92, 92, 3}, {93, 93, 8}, {94, 94, 9}, {95, 96, 17}, {97, 97, 18}, {98, 98, 14}, {99, 99, 20}, {100, 100, 11}, {101, 101, 18}, {102, 102, 13}, {103, 109, 17}, {110, 110, 21}, {111, 113, 17}, {114, 114, 12}, {115, 115, 10}, {116, 116, 19}, {117, 117, 15}, {118, 118, 17}, {119, 119, 10}, {120, 120, 16}, {121, 122, 17}, {123, 123, 4}, {124, 124, 7}, {125, 125, 27}, {126, 65535, 17}} lx.initialTable = make(map[string]int64) lx.initialTable["REPEAT"] = 0x1 lx.initialTable["BRACKET"] = 0x2 lx.initialTable["INITIAL"] = 0x3 return } func (lx *lexer) setSource(src string) { if src != "" { lx.source = src } lx.ended = false lx.start = 0 lx.oldStart = 0 lx.line = 1 lx.column = 0 lx.advanced = true lx.tokenName = "" lx.yy = nil lx.initialState = _initial lx.initialInput = lx.initialTable[lx.initialState] } func (lx *lexer) getToken() (string, error) { var err error if lx.advanced { lx.tokenName, err = lx.next() lx.advanced = false } return lx.tokenName, err } func (lx *lexer) getPositionInfo() string { return fmt.Sprintf("row(%d) column(%d)", lx.line, lx.column) } func (lx *lexer) next() (ret string, err error) { for { var ( nextState int64 ch int64 och = _maxValue next = lx.start curState = lx.transTable[0].toStates[lx.initialInput] lastFinalState = int64(_deadState) lastFinalPosition = lx.start ) for { if next < int64(len(lx.source)) { ch = int64(lx.source[next]) // 计算行、列的位置 if och != _maxValue { if ch == 0x0d { // \r符号 lx.column = 0 lx.line++ } else if ch == 0x0a { // \n if och != 0x0d { // != \r lx.column = 0 lx.line++ } } else { lx.column++ } } och = int(ch) if nextState, err = lx.trans(curState, ch); err != nil { return } } else { nextState = _deadState } //OK if nextState == _deadState { if lx.start == lastFinalPosition { if lx.start == int64(len(lx.source)) { if !lx.ended { lx.ended = true return "<$>", nil } return "", errEOF } return "", fmt.Errorf("意外的字符(line:%d,col:%d) of %s", lx.line, lx.column, lx.source) } lx.yyText = lx.source[lx.start:lastFinalPosition] lx.oldStart = lx.start lx.start = lastFinalPosition fIndex := lx.finalTable[lastFinalState] switch fIndex { case 0x0: return "*", nil case 0x1: return "+", nil case 0x2: return "?", nil case 0x3: return "|", nil case 0x4: return "(", nil case 0x5: return ")", nil case 0x6: if err = lx.begin("BRACKET"); err != nil { return } return "[", nil case 0x7: return "^", nil case 0x8: return "-", nil case 0x9: if err = lx.begin("INITIAL"); err != nil { return } return "]", nil case 0xA: if err = lx.begin("REPEAT"); err != nil { return } return "{", nil case 0xB: return ",", nil case 0xC: if lx.yyText, err = strconv.ParseInt(lx.yyText.(string), 10, 64); err != nil { return } return "d", nil case 0xE: if err = lx.begin("INITIAL"); err != nil { return } return "}", nil case 0xF: var tmp int64 if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:4], 8, 64); err != nil { return } lx.yyText = string(tmp) return "c", nil case 0x10: var tmp int64 if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:4], 16, 64); err != nil { return } lx.yyText = string(tmp) return "c", nil case 0x11: var tmp int64 if tmp, err = strconv.ParseInt(lx.yyText.(string)[2:6], 16, 64); err != nil { return } lx.yyText = string(tmp) return "c", nil case 0x12: return "escc", nil case 0x13: lx.yyText = "\r" return "c", nil case 0x14: lx.yyText = "\n" return "c", nil case 0x15: lx.yyText = "\t" return "c", nil case 0x16: lx.yyText = "\b" return "c", nil case 0x17: lx.yyText = "\f" return "c", nil case 0x18: lx.yyText = "/" return "c", nil case 0x19: return "escc", nil case 0x1A: lx.yyText = lx.yyText.(string)[1:2] return "c", nil case 0x1B: return "/", nil case 0x1C: return "c", nil } break } else { next++ if _, ok := lx.finalTable[nextState]; ok { lastFinalState = nextState lastFinalPosition = next } curState = nextState } } } } func (lx *lexer) begin(state string) error { return lx.setInitialState(state) } func (lx *lexer) setInitialState(state string) (err error) { if _, ok := lx.initialTable[state]; !ok { err = fmt.Errorf("未定义的初始状态:%s", state) return } lx.initialState = state lx.initialInput = lx.initialTable[state] return } func (lx *lexer) trans(curState, ch int64) (int64, error) { if ch < lx.inputTable[0].from || ch > lx.inputTable[len(lx.inputTable)-1].to { return 0, fmt.Errorf("line:%d,column:%d 输入字符超出范围", lx.line, lx.column) } if lx.transTable[curState].isDead { return _deadState, nil } pubInput := find(ch, lx.inputTable) innerInput := find(pubInput, lx.transTable[curState].transEdge) return lx.transTable[curState].toStates[innerInput], nil } func find(code int64, table []*rangeItem) int64 { var ( max = len(table) - 1 min int mid uint64 ) for { mid = uint64(min+max) >> 1 if table[mid].from <= code { if table[mid].to >= code { return table[mid].value } min = int(mid) + 1 } else { max = int(mid) - 1 } } }