|
|
@ -33,7 +33,7 @@ func NewScanner(r io.Reader) *Scanner { |
|
|
|
|
|
|
|
|
func (scanner *Scanner) nextPosition() *Position { |
|
|
func (scanner *Scanner) nextPosition() *Position { |
|
|
if len(scanner.positionBuffer) > 0 { |
|
|
if len(scanner.positionBuffer) > 0 { |
|
|
last = len(scanner.positionBuffer) - 1 |
|
|
|
|
|
|
|
|
last := len(scanner.positionBuffer) - 1 |
|
|
scanner.currentPosition = scanner.positionBuffer[last] |
|
|
scanner.currentPosition = scanner.positionBuffer[last] |
|
|
scanner.positionBuffer = scanner.positionBuffer[0:last] |
|
|
scanner.positionBuffer = scanner.positionBuffer[0:last] |
|
|
return scanner.currentPosition |
|
|
return scanner.currentPosition |
|
|
@ -43,27 +43,181 @@ func (scanner *Scanner) nextPosition() *Position { |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
scanner.state = errorcode.E_EOF |
|
|
scanner.state = errorcode.E_EOF |
|
|
next = EOF |
|
|
next = EOF |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
scanner.currentColumn++ |
|
|
|
|
|
if next == '\n' { |
|
|
|
|
|
scanner.currentLine++ |
|
|
scanner.currentLine++ |
|
|
scanner.currentColumn = 0 |
|
|
scanner.currentColumn = 0 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return &Position{ |
|
|
|
|
|
|
|
|
pos := &Position{ |
|
|
Char: next, |
|
|
Char: next, |
|
|
Line: scanner.currentLine, |
|
|
Line: scanner.currentLine, |
|
|
Column: scanner.currentColumn, |
|
|
Column: scanner.currentColumn, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
scanner.currentColumn++ |
|
|
|
|
|
if next == '\n' || next == EOF { |
|
|
|
|
|
scanner.currentLine++ |
|
|
|
|
|
scanner.currentColumn = 0 |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return pos |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
func (scanner *Scanner) unreadPosition(pos *Position) { |
|
|
func (scanner *Scanner) unreadPosition(pos *Position) { |
|
|
scanner.positionBuffer = append(scanner.positionBuffer, pos) |
|
|
scanner.positionBuffer = append(scanner.positionBuffer, pos) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (scanner *Scanner) parseQuoted(positions *Positions, quote rune) *token.Token { |
|
|
|
|
|
// Determine quote size, 1 or 3 (e.g. 'string', '''string''')
|
|
|
|
|
|
quoteSize := 1 |
|
|
|
|
|
endQuoteSize := 0 |
|
|
|
|
|
pos := scanner.nextPosition() |
|
|
|
|
|
if pos.Char == quote { |
|
|
|
|
|
pos2 := scanner.nextPosition() |
|
|
|
|
|
if pos2.Char == quote { |
|
|
|
|
|
positions.Append(pos) |
|
|
|
|
|
positions.Append(pos2) |
|
|
|
|
|
quoteSize = 3 |
|
|
|
|
|
} else { |
|
|
|
|
|
scanner.unreadPosition(pos2) |
|
|
|
|
|
endQuoteSize = 1 |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
scanner.unreadPosition(pos) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for { |
|
|
|
|
|
if endQuoteSize == quoteSize { |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
positions.Append(pos) |
|
|
|
|
|
if pos.Char == EOF { |
|
|
|
|
|
return positions.AsToken(token.ERRORTOKEN) |
|
|
|
|
|
} |
|
|
|
|
|
if quoteSize == 1 && pos.Char == '\n' { |
|
|
|
|
|
return positions.AsToken(token.ERRORTOKEN) |
|
|
|
|
|
} |
|
|
|
|
|
if pos.Char == quote { |
|
|
|
|
|
endQuoteSize += 1 |
|
|
|
|
|
} else { |
|
|
|
|
|
endQuoteSize = 0 |
|
|
|
|
|
if pos.Char == '\\' { |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
return positions.AsToken(token.STRING) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func (scanner *Scanner) NextToken() *token.Token { |
|
|
func (scanner *Scanner) NextToken() *token.Token { |
|
|
return &Token{ |
|
|
|
|
|
ID: token.ENDMARKER, |
|
|
|
|
|
|
|
|
positions := NewPositions() |
|
|
|
|
|
|
|
|
|
|
|
pos := scanner.nextPosition() |
|
|
|
|
|
// skip spaces
|
|
|
|
|
|
for { |
|
|
|
|
|
if pos.Char != ' ' && pos.Char != '\t' { |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// skip comments
|
|
|
|
|
|
if pos.Char == '#' { |
|
|
|
|
|
for { |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
if pos.Char == EOF || pos.Char == '\n' { |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
positions.Append(pos) |
|
|
|
|
|
switch ch := pos.Char; { |
|
|
|
|
|
case ch == EOF: |
|
|
|
|
|
id := token.ENDMARKER |
|
|
|
|
|
if scanner.state != errorcode.E_EOF { |
|
|
|
|
|
id = token.ERRORTOKEN |
|
|
|
|
|
} |
|
|
|
|
|
return positions.AsToken(id) |
|
|
|
|
|
case IsIdentifierStart(ch): |
|
|
|
|
|
// Parse Identifier
|
|
|
|
|
|
saw_b, saw_r, saw_u := false, false, false |
|
|
|
|
|
for { |
|
|
|
|
|
if !(saw_b || saw_u) && (ch == 'b' || ch == 'B') { |
|
|
|
|
|
saw_b = true |
|
|
|
|
|
} else if !(saw_b || saw_u || saw_r) && (ch == 'u' || ch == 'U') { |
|
|
|
|
|
saw_u = true |
|
|
|
|
|
} else if !(saw_r || saw_u) && (ch == 'r' || ch == 'R') { |
|
|
|
|
|
saw_r = true |
|
|
|
|
|
} else { |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
if IsQuote(pos.Char) { |
|
|
|
|
|
positions.Append(pos) |
|
|
|
|
|
return scanner.parseQuoted(positions, pos.Char) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
for IsIdentifierChar(pos.Char) { |
|
|
|
|
|
positions.Append(pos) |
|
|
|
|
|
pos = scanner.nextPosition() |
|
|
|
|
|
} |
|
|
|
|
|
scanner.unreadPosition(pos) |
|
|
|
|
|
return positions.AsToken(token.NAME) |
|
|
|
|
|
case ch == '\n': |
|
|
|
|
|
return positions.AsToken(token.NEWLINE) |
|
|
|
|
|
case ch == '.': |
|
|
|
|
|
pos2 := scanner.nextPosition() |
|
|
|
|
|
if IsDigit(pos2.Char) { |
|
|
|
|
|
// Parse Number
|
|
|
|
|
|
} else if pos2.Char == '.' { |
|
|
|
|
|
positions.Append(pos2) |
|
|
|
|
|
pos3 := scanner.nextPosition() |
|
|
|
|
|
if pos3.Char == '.' { |
|
|
|
|
|
positions.Append(pos3) |
|
|
|
|
|
return positions.AsToken(token.ELLIPSIS) |
|
|
|
|
|
} |
|
|
|
|
|
scanner.unreadPosition(pos3) |
|
|
|
|
|
} |
|
|
|
|
|
scanner.unreadPosition(pos2) |
|
|
|
|
|
|
|
|
|
|
|
return positions.AsToken(token.DOT) |
|
|
|
|
|
case IsDigit(ch): |
|
|
|
|
|
// Parse Number
|
|
|
|
|
|
case IsQuote(ch): |
|
|
|
|
|
// Parse String
|
|
|
|
|
|
return scanner.parseQuoted(positions, ch) |
|
|
|
|
|
case ch == '\\': |
|
|
|
|
|
// Parse Continuation
|
|
|
|
|
|
default: |
|
|
|
|
|
// Two and Three character operators
|
|
|
|
|
|
pos2 := scanner.nextPosition() |
|
|
|
|
|
op2Id := GetTwoCharTokenID(pos.Char, pos2.Char) |
|
|
|
|
|
if op2Id != token.OP { |
|
|
|
|
|
positions.Append(pos2) |
|
|
|
|
|
pos3 := scanner.nextPosition() |
|
|
|
|
|
op3Id := GetThreeCharTokenID(pos.Char, pos2.Char, pos3.Char) |
|
|
|
|
|
if op3Id != token.OP { |
|
|
|
|
|
positions.Append(pos3) |
|
|
|
|
|
return positions.AsToken(op3Id) |
|
|
|
|
|
} |
|
|
|
|
|
scanner.unreadPosition(pos3) |
|
|
|
|
|
return positions.AsToken(op2Id) |
|
|
|
|
|
} |
|
|
|
|
|
scanner.unreadPosition(pos2) |
|
|
|
|
|
} |
|
|
|
|
|
switch pos.Char { |
|
|
|
|
|
case '(', '[', '{': |
|
|
|
|
|
// Increment indentation level
|
|
|
|
|
|
// scanner.indentationLevel++
|
|
|
|
|
|
break |
|
|
|
|
|
case ')', ']', '}': |
|
|
|
|
|
// Decrement indentation level
|
|
|
|
|
|
// scanner.indentationLevel--
|
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
opId := GetOneCharTokenID(pos.Char) |
|
|
|
|
|
return positions.AsToken(opId) |
|
|
} |
|
|
} |