123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289 |
- package expr
- import (
- "bytes"
- "fmt"
- "strconv"
- "strings"
- "text/scanner"
- "unicode"
- )
- const (
- TokenEOF = -(iota + 1)
- TokenIdent
- TokenInt
- TokenFloat
- TokenOperator
- )
- type lexer struct {
- scan scanner.Scanner
- token rune
- text string
- }
- func (lex *lexer) getToken() rune {
- return lex.token
- }
- func (lex *lexer) getText() string {
- return lex.text
- }
- func (lex *lexer) next() {
- token := lex.scan.Scan()
- text := lex.scan.TokenText()
- switch token {
- case scanner.EOF:
- lex.token = TokenEOF
- lex.text = text
- case scanner.Ident:
- lex.token = TokenIdent
- lex.text = text
- case scanner.Int:
- lex.token = TokenInt
- lex.text = text
- case scanner.Float:
- lex.token = TokenFloat
- lex.text = text
- case '+', '-', '*', '/', '%', '~':
- lex.token = TokenOperator
- lex.text = text
- case '&', '|', '=':
- var buffer bytes.Buffer
- lex.token = TokenOperator
- buffer.WriteRune(token)
- next := lex.scan.Peek()
- if next == token {
- buffer.WriteRune(next)
- lex.scan.Scan()
- }
- lex.text = buffer.String()
- case '>', '<', '!':
- var buffer bytes.Buffer
- lex.token = TokenOperator
- buffer.WriteRune(token)
- next := lex.scan.Peek()
- if next == '=' {
- buffer.WriteRune(next)
- lex.scan.Scan()
- }
- lex.text = buffer.String()
- default:
- if token >= 0 {
- lex.token = token
- lex.text = text
- } else {
- msg := fmt.Sprintf("got unknown token:%q, text:%s", lex.token, lex.text)
- panic(lexPanic(msg))
- }
- }
- //fmt.Printf("token:%d, text:%s\n", lex.token, lex.text)
- }
- type lexPanic string
- // describe returns a string describing the current token, for use in errors.
- func (lex *lexer) describe() string {
- switch lex.token {
- case TokenEOF:
- return "end of file"
- case TokenIdent:
- return fmt.Sprintf("identifier %s", lex.getText())
- case TokenInt, TokenFloat:
- return fmt.Sprintf("number %s", lex.getText())
- }
- return fmt.Sprintf("%q", rune(lex.getToken())) // any other rune
- }
- func precedence(token rune, text string) int {
- if token == TokenOperator {
- switch text {
- case "~", "!":
- return 9
- case "*", "/", "%":
- return 8
- case "+", "-":
- return 7
- case ">", ">=", "<", "<=":
- return 6
- case "!=", "==", "=":
- return 5
- case "&":
- return 4
- case "|":
- return 3
- case "&&":
- return 2
- case "||":
- return 1
- default:
- msg := fmt.Sprintf("unknown operator:%s", text)
- panic(lexPanic(msg))
- }
- }
- return 0
- }
- // ---- parser ----
- type ExpressionParser struct {
- expression Expr
- variable map[string]struct{}
- }
- func NewExpressionParser() *ExpressionParser {
- return &ExpressionParser{
- expression: nil,
- variable: make(map[string]struct{}),
- }
- }
- // Parse parses the input string as an arithmetic expression.
- //
- // expr = num a literal number, e.g., 3.14159
- // | id a variable name, e.g., x
- // | id '(' expr ',' ... ')' a function call
- // | '-' expr a unary operator ( + - ! )
- // | expr '+' expr a binary operator ( + - * / && & || | == )
- //
- func (parser *ExpressionParser) Parse(input string) (err error) {
- defer func() {
- switch x := recover().(type) {
- case nil:
- // no panic
- case lexPanic:
- err = fmt.Errorf("%s", x)
- default:
- // unexpected panic: resume state of panic.
- panic(x)
- }
- }()
- lex := new(lexer)
- lex.scan.Init(strings.NewReader(input))
- lex.scan.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats
- lex.scan.IsIdentRune = parser.isIdentRune
- lex.next() // initial lookahead
- parser.expression = nil
- parser.variable = make(map[string]struct{})
- e := parser.parseExpr(lex)
- if lex.token != scanner.EOF {
- return fmt.Errorf("unexpected %s", lex.describe())
- }
- parser.expression = e
- return nil
- }
- func (parser *ExpressionParser) GetExpr() Expr {
- return parser.expression
- }
- func (parser *ExpressionParser) GetVariable() []string {
- variable := make([]string, 0, len(parser.variable))
- for v := range parser.variable {
- if v != "true" && v != "false" {
- variable = append(variable, v)
- }
- }
- return variable
- }
- func (parser *ExpressionParser) isIdentRune(ch rune, i int) bool {
- return ch == '$' || ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
- }
- func (parser *ExpressionParser) parseExpr(lex *lexer) Expr {
- return parser.parseBinary(lex, 1)
- }
- // binary = unary ('+' binary)*
- // parseBinary stops when it encounters an
- // operator of lower precedence than prec1.
- func (parser *ExpressionParser) parseBinary(lex *lexer, prec1 int) Expr {
- lhs := parser.parseUnary(lex)
- for prec := precedence(lex.getToken(), lex.getText()); prec >= prec1; prec-- {
- for precedence(lex.getToken(), lex.getText()) == prec {
- op := lex.getText()
- lex.next() // consume operator
- rhs := parser.parseBinary(lex, prec+1)
- lhs = binary{op, lhs, rhs}
- }
- }
- return lhs
- }
- // unary = '+' expr | primary
- func (parser *ExpressionParser) parseUnary(lex *lexer) Expr {
- if lex.getToken() == TokenOperator {
- op := lex.getText()
- if op == "+" || op == "-" || op == "~" || op == "!" {
- lex.next()
- return unary{op, parser.parseUnary(lex)}
- } else {
- msg := fmt.Sprintf("unary got unknown operator:%s", lex.getText())
- panic(lexPanic(msg))
- }
- }
- return parser.parsePrimary(lex)
- }
- // primary = id
- // | id '(' expr ',' ... ',' expr ')'
- // | num
- // | '(' expr ')'
- func (parser *ExpressionParser) parsePrimary(lex *lexer) Expr {
- switch lex.token {
- case TokenIdent:
- id := lex.getText()
- lex.next()
- if lex.token != '(' {
- parser.variable[id] = struct{}{}
- return Var(id)
- }
- lex.next() // consume '('
- var args []Expr
- if lex.token != ')' {
- for {
- args = append(args, parser.parseExpr(lex))
- if lex.token != ',' {
- break
- }
- lex.next() // consume ','
- }
- if lex.token != ')' {
- msg := fmt.Sprintf("got %q, want ')'", lex.token)
- panic(lexPanic(msg))
- }
- }
- lex.next() // consume ')'
- return call{id, args}
- case TokenFloat:
- f, err := strconv.ParseFloat(lex.getText(), 64)
- if err != nil {
- panic(lexPanic(err.Error()))
- }
- lex.next() // consume number
- return literal{value: f}
- case TokenInt:
- i, err := strconv.ParseInt(lex.getText(), 10, 64)
- if err != nil {
- panic(lexPanic(err.Error()))
- }
- lex.next() // consume number
- return literal{value: i}
- case '(':
- lex.next() // consume '('
- e := parser.parseExpr(lex)
- if lex.token != ')' {
- msg := fmt.Sprintf("got %s, want ')'", lex.describe())
- panic(lexPanic(msg))
- }
- lex.next() // consume ')'
- return e
- }
- msg := fmt.Sprintf("unexpected %s", lex.describe())
- panic(lexPanic(msg))
- }
|