token.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. // Copyright (c) 2017 Ernest Micklei
  2. //
  3. // MIT License
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining
  6. // a copy of this software and associated documentation files (the
  7. // "Software"), to deal in the Software without restriction, including
  8. // without limitation the rights to use, copy, modify, merge, publish,
  9. // distribute, sublicense, and/or sell copies of the Software, and to
  10. // permit persons to whom the Software is furnished to do so, subject to
  11. // the following conditions:
  12. //
  13. // The above copyright notice and this permission notice shall be
  14. // included in all copies or substantial portions of the Software.
  15. //
  16. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  20. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  21. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. package proto
  24. import (
  25. "strings"
  26. )
  27. // token represents a lexical token.
  28. type token int
  29. const (
  30. // Special tokens
  31. tILLEGAL token = iota
  32. tEOF
  33. tWS
  34. // Literals
  35. tIDENT
  36. // Misc characters
  37. tSEMICOLON // ;
  38. tCOLON // :
  39. tEQUALS // =
  40. tQUOTE // "
  41. tSINGLEQUOTE // '
  42. tLEFTPAREN // (
  43. tRIGHTPAREN // )
  44. tLEFTCURLY // {
  45. tRIGHTCURLY // }
  46. tLEFTSQUARE // [
  47. tRIGHTSQUARE // ]
  48. tCOMMENT // /
  49. tLESS // <
  50. tGREATER // >
  51. tCOMMA // ,
  52. tDOT // .
  53. // Keywords
  54. keywordsStart
  55. tSYNTAX
  56. tSERVICE
  57. tRPC
  58. tRETURNS
  59. tMESSAGE
  60. tIMPORT
  61. tPACKAGE
  62. tOPTION
  63. tREPEATED
  64. tWEAK
  65. tPUBLIC
  66. // special fields
  67. tONEOF
  68. tMAP
  69. tRESERVED
  70. tENUM
  71. tSTREAM
  72. // BEGIN proto2
  73. tOPTIONAL
  74. tGROUP
  75. tEXTENSIONS
  76. tEXTEND
  77. tREQUIRED
  78. // END proto2
  79. keywordsEnd
  80. )
  81. // typeTokens exists for future validation
  82. // const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes"
  83. // isKeyword returns if tok is in the keywords range
  84. func isKeyword(tok token) bool {
  85. return keywordsStart < tok && tok < keywordsEnd
  86. }
  87. // isWhitespace checks for space,tab and newline
  88. func isWhitespace(r rune) bool {
  89. return r == ' ' || r == '\t' || r == '\n'
  90. }
  91. // isDigit returns true if the rune is a digit.
  92. func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
  93. // isString checks if the literal is quoted (single or double).
  94. func isString(lit string) bool {
  95. return (strings.HasPrefix(lit, "\"") &&
  96. strings.HasSuffix(lit, "\"")) ||
  97. (strings.HasPrefix(lit, "'") &&
  98. strings.HasSuffix(lit, "'"))
  99. }
  100. func isComment(lit string) bool {
  101. return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*")
  102. }
  103. func unQuote(lit string) string {
  104. return strings.Trim(lit, "\"'")
  105. }
  106. func asToken(literal string) token {
  107. switch literal {
  108. // delimiters
  109. case ";":
  110. return tSEMICOLON
  111. case ":":
  112. return tCOLON
  113. case "=":
  114. return tEQUALS
  115. case "\"":
  116. return tQUOTE
  117. case "'":
  118. return tSINGLEQUOTE
  119. case "(":
  120. return tLEFTPAREN
  121. case ")":
  122. return tRIGHTPAREN
  123. case "{":
  124. return tLEFTCURLY
  125. case "}":
  126. return tRIGHTCURLY
  127. case "[":
  128. return tLEFTSQUARE
  129. case "]":
  130. return tRIGHTSQUARE
  131. case "<":
  132. return tLESS
  133. case ">":
  134. return tGREATER
  135. case ",":
  136. return tCOMMA
  137. case ".":
  138. return tDOT
  139. // words
  140. case "syntax":
  141. return tSYNTAX
  142. case "service":
  143. return tSERVICE
  144. case "rpc":
  145. return tRPC
  146. case "returns":
  147. return tRETURNS
  148. case "option":
  149. return tOPTION
  150. case "message":
  151. return tMESSAGE
  152. case "import":
  153. return tIMPORT
  154. case "package":
  155. return tPACKAGE
  156. case "oneof":
  157. return tONEOF
  158. // special fields
  159. case "map":
  160. return tMAP
  161. case "reserved":
  162. return tRESERVED
  163. case "enum":
  164. return tENUM
  165. case "repeated":
  166. return tREPEATED
  167. case "weak":
  168. return tWEAK
  169. case "public":
  170. return tPUBLIC
  171. case "stream":
  172. return tSTREAM
  173. // proto2
  174. case "optional":
  175. return tOPTIONAL
  176. case "group":
  177. return tGROUP
  178. case "extensions":
  179. return tEXTENSIONS
  180. case "extend":
  181. return tEXTEND
  182. case "required":
  183. return tREQUIRED
  184. case "ws":
  185. return tWS
  186. case "ill":
  187. return tILLEGAL
  188. default:
  189. // special cases
  190. if isComment(literal) {
  191. return tCOMMENT
  192. }
  193. return tIDENT
  194. }
  195. }