lexer.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. // Copyright 2017 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package asm
  17. import (
  18. "fmt"
  19. "os"
  20. "strings"
  21. "unicode"
  22. "unicode/utf8"
  23. "github.com/ethereum/go-ethereum/common/gopool"
  24. )
  25. // stateFn is used through the lifetime of the
  26. // lexer to parse the different values at the
  27. // current state.
  28. type stateFn func(*lexer) stateFn
  29. // token is emitted when the lexer has discovered
  30. // a new parsable token. These are delivered over
  31. // the tokens channels of the lexer
  32. type token struct {
  33. typ tokenType
  34. lineno int
  35. text string
  36. }
  37. // tokenType are the different types the lexer
  38. // is able to parse and return.
  39. type tokenType int
  40. const (
  41. eof tokenType = iota // end of file
  42. lineStart // emitted when a line starts
  43. lineEnd // emitted when a line ends
  44. invalidStatement // any invalid statement
  45. element // any element during element parsing
  46. label // label is emitted when a label is found
  47. labelDef // label definition is emitted when a new label is found
  48. number // number is emitted when a number is found
  49. stringValue // stringValue is emitted when a string has been found
  50. Numbers = "1234567890" // characters representing any decimal number
  51. HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
  52. Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
  53. )
  54. // String implements stringer
  55. func (it tokenType) String() string {
  56. if int(it) > len(stringtokenTypes) {
  57. return "invalid"
  58. }
  59. return stringtokenTypes[it]
  60. }
  61. var stringtokenTypes = []string{
  62. eof: "EOF",
  63. invalidStatement: "invalid statement",
  64. element: "element",
  65. lineEnd: "end of line",
  66. lineStart: "new line",
  67. label: "label",
  68. labelDef: "label definition",
  69. number: "number",
  70. stringValue: "string",
  71. }
  72. // lexer is the basic construct for parsing
  73. // source code and turning them in to tokens.
  74. // Tokens are interpreted by the compiler.
  75. type lexer struct {
  76. input string // input contains the source code of the program
  77. tokens chan token // tokens is used to deliver tokens to the listener
  78. state stateFn // the current state function
  79. lineno int // current line number in the source file
  80. start, pos, width int // positions for lexing and returning value
  81. debug bool // flag for triggering debug output
  82. }
  83. // lex lexes the program by name with the given source. It returns a
  84. // channel on which the tokens are delivered.
  85. func Lex(source []byte, debug bool) <-chan token {
  86. ch := make(chan token)
  87. l := &lexer{
  88. input: string(source),
  89. tokens: ch,
  90. state: lexLine,
  91. debug: debug,
  92. }
  93. gopool.Submit(func() {
  94. l.emit(lineStart)
  95. for l.state != nil {
  96. l.state = l.state(l)
  97. }
  98. l.emit(eof)
  99. close(l.tokens)
  100. })
  101. return ch
  102. }
  103. // next returns the next rune in the program's source.
  104. func (l *lexer) next() (rune rune) {
  105. if l.pos >= len(l.input) {
  106. l.width = 0
  107. return 0
  108. }
  109. rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
  110. l.pos += l.width
  111. return rune
  112. }
  113. // backup backsup the last parsed element (multi-character)
  114. func (l *lexer) backup() {
  115. l.pos -= l.width
  116. }
  117. // peek returns the next rune but does not advance the seeker
  118. func (l *lexer) peek() rune {
  119. r := l.next()
  120. l.backup()
  121. return r
  122. }
  123. // ignore advances the seeker and ignores the value
  124. func (l *lexer) ignore() {
  125. l.start = l.pos
  126. }
  127. // Accepts checks whether the given input matches the next rune
  128. func (l *lexer) accept(valid string) bool {
  129. if strings.ContainsRune(valid, l.next()) {
  130. return true
  131. }
  132. l.backup()
  133. return false
  134. }
  135. // acceptRun will continue to advance the seeker until valid
  136. // can no longer be met.
  137. func (l *lexer) acceptRun(valid string) {
  138. for strings.ContainsRune(valid, l.next()) {
  139. }
  140. l.backup()
  141. }
  142. // acceptRunUntil is the inverse of acceptRun and will continue
  143. // to advance the seeker until the rune has been found.
  144. func (l *lexer) acceptRunUntil(until rune) bool {
  145. // Continues running until a rune is found
  146. for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
  147. if i == 0 {
  148. return false
  149. }
  150. }
  151. return true
  152. }
  153. // blob returns the current value
  154. func (l *lexer) blob() string {
  155. return l.input[l.start:l.pos]
  156. }
  157. // Emits a new token on to token channel for processing
  158. func (l *lexer) emit(t tokenType) {
  159. token := token{t, l.lineno, l.blob()}
  160. if l.debug {
  161. fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
  162. }
  163. l.tokens <- token
  164. l.start = l.pos
  165. }
  166. // lexLine is state function for lexing lines
  167. func lexLine(l *lexer) stateFn {
  168. for {
  169. switch r := l.next(); {
  170. case r == '\n':
  171. l.emit(lineEnd)
  172. l.ignore()
  173. l.lineno++
  174. l.emit(lineStart)
  175. case r == ';' && l.peek() == ';':
  176. return lexComment
  177. case isSpace(r):
  178. l.ignore()
  179. case isLetter(r) || r == '_':
  180. return lexElement
  181. case isNumber(r):
  182. return lexNumber
  183. case r == '@':
  184. l.ignore()
  185. return lexLabel
  186. case r == '"':
  187. return lexInsideString
  188. default:
  189. return nil
  190. }
  191. }
  192. }
  193. // lexComment parses the current position until the end
  194. // of the line and discards the text.
  195. func lexComment(l *lexer) stateFn {
  196. l.acceptRunUntil('\n')
  197. l.ignore()
  198. return lexLine
  199. }
  200. // lexLabel parses the current label, emits and returns
  201. // the lex text state function to advance the parsing
  202. // process.
  203. func lexLabel(l *lexer) stateFn {
  204. l.acceptRun(Alpha + "_" + Numbers)
  205. l.emit(label)
  206. return lexLine
  207. }
  208. // lexInsideString lexes the inside of a string until
  209. // the state function finds the closing quote.
  210. // It returns the lex text state function.
  211. func lexInsideString(l *lexer) stateFn {
  212. if l.acceptRunUntil('"') {
  213. l.emit(stringValue)
  214. }
  215. return lexLine
  216. }
  217. func lexNumber(l *lexer) stateFn {
  218. acceptance := Numbers
  219. if l.accept("0") || l.accept("xX") {
  220. acceptance = HexadecimalNumbers
  221. }
  222. l.acceptRun(acceptance)
  223. l.emit(number)
  224. return lexLine
  225. }
  226. func lexElement(l *lexer) stateFn {
  227. l.acceptRun(Alpha + "_" + Numbers)
  228. if l.peek() == ':' {
  229. l.emit(labelDef)
  230. l.accept(":")
  231. l.ignore()
  232. } else {
  233. l.emit(element)
  234. }
  235. return lexLine
  236. }
  237. func isLetter(t rune) bool {
  238. return unicode.IsLetter(t)
  239. }
  240. func isSpace(t rune) bool {
  241. return unicode.IsSpace(t)
  242. }
  243. func isNumber(t rune) bool {
  244. return unicode.IsNumber(t)
  245. }