Browse Source

emit indent/dedent tokens

master
Brett Langdon 10 years ago
parent
commit
d191ba369b
1 changed files with 101 additions and 30 deletions
  1. +101
    -30
      parser/tokenizer.go

+ 101
- 30
parser/tokenizer.go View File

@ -8,30 +8,38 @@ import (
) )
var EOF rune = 0 var EOF rune = 0
var MAXINDENT int = 100
type TokenizerState struct { type TokenizerState struct {
buffer *bufio.Reader
curColumn int
curIndent int
curLevel int
curLine int
curLiteral string
fp *os.File
nestingLevel int
offset int
tabsize int
atBol bool
buffer *bufio.Reader
curColumn int
curLine int
curLiteral string
fp *os.File
indentationLevel int
indentationAltStack []int
indentationCurrent int
indentationPending int
indentationStack []int
offset int
tabsize int
tabsizeAlt int
} }
func newTokenizerState() *TokenizerState { func newTokenizerState() *TokenizerState {
return &TokenizerState{ return &TokenizerState{
curColumn: 0,
curIndent: 0,
curLevel: 0,
curLine: 1,
curLiteral: "",
nestingLevel: 0,
offset: 0,
tabsize: 8,
atBol: true,
curColumn: 0,
curLine: 1,
curLiteral: "",
indentationAltStack: make([]int, MAXINDENT),
indentationCurrent: 0,
indentationLevel: 0,
indentationPending: 0,
indentationStack: make([]int, MAXINDENT),
offset: 0,
tabsize: 8,
} }
} }
@ -243,21 +251,79 @@ end:
} }
func (tokenizer *TokenizerState) Next() *token.Token { func (tokenizer *TokenizerState) Next() *token.Token {
next_line:
curTok := tokenizer.newToken() curTok := tokenizer.newToken()
col := 0
nextChar := EOF nextChar := EOF
// Get indentation level
for {
nextChar = tokenizer.readNext()
if nextChar == ' ' {
col += 1
} else if nextChar == '\t' {
col = (col/tokenizer.tabsize + 1) * tokenizer.tabsize
blankline := false
if tokenizer.atBol {
// Get indentation level
col := 0
altcol := 0
tokenizer.atBol = false
for {
nextChar = tokenizer.readNext()
if nextChar == ' ' {
col++
altcol++
} else if nextChar == '\t' {
col = (col/tokenizer.tabsize + 1) * tokenizer.tabsize
altcol = (altcol/tokenizer.tabsizeAlt + 1) * tokenizer.tabsizeAlt
} else {
break
}
}
tokenizer.unread()
if nextChar == '#' || nextChar == '\n' {
// Lines with only newline or comment, shouldn't affect indentation
if col == 0 && nextChar == '\n' {
blankline = false
} else {
blankline = true
}
}
if !blankline && tokenizer.indentationLevel == 0 {
if col == tokenizer.indentationStack[tokenizer.indentationCurrent] {
if altcol != tokenizer.indentationAltStack[tokenizer.indentationCurrent] {
return tokenizer.finalizeToken(curTok, token.ERRORTOKEN)
}
} else if col > tokenizer.indentationStack[tokenizer.indentationCurrent] {
if tokenizer.indentationCurrent+1 >= MAXINDENT {
return tokenizer.finalizeToken(curTok, token.ERRORTOKEN)
}
if altcol <= tokenizer.indentationAltStack[tokenizer.indentationCurrent] {
return tokenizer.finalizeToken(curTok, token.ERRORTOKEN)
}
tokenizer.indentationPending++
tokenizer.indentationCurrent++
tokenizer.indentationStack[tokenizer.indentationCurrent] = col
tokenizer.indentationAltStack[tokenizer.indentationCurrent] = altcol
} else {
for tokenizer.indentationCurrent > 0 && col < tokenizer.indentationStack[tokenizer.indentationCurrent] {
tokenizer.indentationPending--
tokenizer.indentationCurrent--
}
if col != tokenizer.indentationStack[tokenizer.indentationCurrent] {
return tokenizer.finalizeToken(curTok, token.ERRORTOKEN)
}
if altcol != tokenizer.indentationAltStack[tokenizer.indentationCurrent] {
return tokenizer.finalizeToken(curTok, token.ERRORTOKEN)
}
}
}
}
if tokenizer.indentationPending != 0 {
if tokenizer.indentationPending < 0 {
tokenizer.indentationPending++
return tokenizer.finalizeToken(curTok, token.DEDENT)
} else { } else {
break
tokenizer.indentationPending--
return tokenizer.finalizeToken(curTok, token.INDENT)
} }
} }
tokenizer.unread()
again: again:
// Skip spaces // Skip spaces
@ -283,6 +349,7 @@ again:
// Check for EOF // Check for EOF
if nextChar == EOF { if nextChar == EOF {
tokenizer.curLiteral = ""
return tokenizer.finalizeToken(curTok, token.ENDMARKER) return tokenizer.finalizeToken(curTok, token.ENDMARKER)
} }
@ -312,6 +379,10 @@ again:
// Newline // Newline
if nextChar == '\n' { if nextChar == '\n' {
tokenizer.atBol = true
if blankline || tokenizer.indentationLevel > 0 {
goto next_line
}
tokenizer.curLine += 1 tokenizer.curLine += 1
tokenizer.curColumn = 0 tokenizer.curColumn = 0
return tokenizer.finalizeToken(curTok, token.NEWLINE) return tokenizer.finalizeToken(curTok, token.NEWLINE)
@ -379,10 +450,10 @@ letter_quote:
switch nextChar { switch nextChar {
case '(', '[', '{': case '(', '[', '{':
tokenizer.nestingLevel++
tokenizer.indentationLevel++
break break
case ')', ']', '}': case ')', ']', '}':
tokenizer.nestingLevel--
tokenizer.indentationLevel--
break break
} }


Loading…
Cancel
Save