Browse Source

add start to ast parsing

master
Brett Langdon 10 years ago
parent
commit
96a2180981
7 changed files with 464 additions and 0 deletions
  1. +123
    -0
      ast/Python.asdl
  2. +42
    -0
      ast/expression.go
  3. +26
    -0
      ast/expression_context.go
  4. +34
    -0
      ast/module.go
  5. +6
    -0
      ast/node.go
  6. +196
    -0
      ast/parser.go
  7. +37
    -0
      ast/statement.go

+ 123
- 0
ast/Python.asdl View File

@ -0,0 +1,123 @@
-- ASDL's six builtin types are identifier, int, string, bytes, object, singleton
module Python
{
mod = Module(stmt* body)
| Interactive(stmt* body)
| Expression(expr body)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| ClassDef(identifier name,
expr* bases,
keyword* keywords,
stmt* body,
expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| AugAssign(expr target, operator op, expr value)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body)
| AsyncWith(withitem* items, stmt* body)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
| ImportFrom(identifier? module, alias* names, int? level)
| Global(identifier* names)
| Nonlocal(identifier* names)
| Expr(expr value)
| Pass | Break | Continue
-- XXX Jython will be different
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
| BinOp(expr left, operator op, expr right)
| UnaryOp(unaryop op, expr operand)
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
| Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
| SetComp(expr elt, comprehension* generators)
| DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Await(expr value)
| Yield(expr? value)
| YieldFrom(expr value)
-- need sequences for compare to distinguish between
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
| Bytes(bytes s)
| NameConstant(singleton value)
| Ellipsis
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
| Subscript(expr value, slice slice, expr_context ctx)
| Starred(expr value, expr_context ctx)
| Name(identifier id, expr_context ctx)
| List(expr* elts, expr_context ctx)
| Tuple(expr* elts, expr_context ctx)
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
expr_context = Load | Store | Del | AugLoad | AugStore | Param
slice = Slice(expr? lower, expr? upper, expr? step)
| ExtSlice(slice* dims)
| Index(expr value)
boolop = And | Or
operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
| RShift | BitOr | BitXor | BitAnd | FloorDiv
unaryop = Invert | Not | UAdd | USub
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
comprehension = (expr target, expr iter, expr* ifs)
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset)
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation)
attributes (int lineno, int col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
keyword = (identifier? arg, expr value)
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
}

+ 42
- 0
ast/expression.go View File

@ -0,0 +1,42 @@
package ast
import "fmt"
type Expression interface {
Node
expr()
}
type Name struct {
Identifier string
Context ExpressionContext
}
func NewName(id string, ctx ExpressionContext) *Name {
return &Name{
Identifier: id,
Context: ctx,
}
}
func (name *Name) node() {}
func (name *Name) expr() {}
func (name *Name) String() string {
return fmt.Sprintf("Name(id=%#v, ctx=%s)", name.Identifier, name.Context.String())
}
type Num struct {
Value int64
}
func NewNum(i int64) *Num {
return &Num{
Value: i,
}
}
func (num *Num) node() {}
func (num *Num) expr() {}
func (num *Num) String() string {
return fmt.Sprintf("Num(n=%d)", num.Value)
}

+ 26
- 0
ast/expression_context.go View File

@ -0,0 +1,26 @@
package ast
type ExpressionContext interface {
Node
exprCtx()
}
type Store struct{}
func NewStore() *Store {
return &Store{}
}
func (store *Store) node() {}
func (store *Store) exprCtx() {}
func (store *Store) String() string { return "Store()" }
type Load struct{}
func NewLoad() *Load {
return &Load{}
}
func (load *Load) node() {}
func (load *Load) exprCtx() {}
func (load *Load) String() string { return "Load()" }

+ 34
- 0
ast/module.go View File

@ -0,0 +1,34 @@
package ast
import (
"fmt"
"strings"
)
type Mod interface {
Node
mod()
}
type Module struct {
Body []Statement
}
func NewModule() *Module {
return &Module{
Body: make([]Statement, 0),
}
}
func (module *Module) node() {}
func (module *Module) mod() {}
func (module *Module) Append(stmt Statement) {
module.Body = append(module.Body, stmt)
}
func (module *Module) String() string {
stmts := make([]string, 0)
for _, stmt := range module.Body {
stmts = append(stmts, stmt.String())
}
return fmt.Sprintf("Module(body=[%s])", strings.Join(stmts, ", "))
}

+ 6
- 0
ast/node.go View File

@ -0,0 +1,6 @@
package ast
type Node interface {
node()
String() string
}

+ 196
- 0
ast/parser.go View File

@ -0,0 +1,196 @@
package ast
import (
"fmt"
"strconv"
"github.com/brettlangdon/gython/grammar"
"github.com/brettlangdon/gython/symbol"
"github.com/brettlangdon/gython/token"
)
func ASTFromGrammar(root *grammar.FileInput) (Mod, error) {
mod := NewModule()
for _, child := range root.Children() {
if child.ID() == symbol.STMT {
stmt := astForStatement(child.(*grammar.Statement))
mod.Append(stmt)
}
}
return mod, nil
}
func isToken(node grammar.Node, tokId token.TokenID) bool {
if n, isTokenNode := node.(*grammar.TokenNode); isTokenNode {
return n.Token.ID == tokId
}
return false
}
func astForStatement(root *grammar.Statement) Statement {
stmt := root.Child()
if stmt.ID() == symbol.SIMPLE_STMT {
stmt = stmt.(*grammar.SimpleStatement).Children()[0]
}
switch stmt := stmt.(type) {
case *grammar.SmallStatement:
switch child := stmt.Child().(type) {
case *grammar.ExpressionStatement:
return astForExpressionStatement(child)
}
case *grammar.CompoundStatement:
fmt.Println(stmt)
}
return nil
}
func astForExpressionStatement(root *grammar.ExpressionStatement) Statement {
children := root.Children()
if len(children) == 1 {
} else if "todo" == "augassign" {
} else {
if !isToken(children[1], token.EQUAL) {
return nil
}
length := len(children)
var value Expression
switch child := children[length-1].(type) {
case *grammar.TestlistStarExpression:
value = astForTestList(child)
default:
value = nil
}
assign := NewAssign(value)
for i := 0; i < length-2; i++ {
target := astForTestList(children[i].(grammar.ExpressionStatementChild))
switch target := target.(type) {
case *Name:
target.Context = NewStore()
}
assign.Append(target)
}
return assign
}
return nil
}
func astForTestList(root grammar.ExpressionStatementChild) Expression {
switch root := root.(type) {
case *grammar.TestlistStarExpression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
}
return nil
}
func astForExpression(root grammar.Node) Expression {
switch root := root.(type) {
case *grammar.Test:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.OrTest:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.AndTest:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.NotTest:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.Comparison:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.Expression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.XorExpression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.AndExpression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.ShiftExpression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.ArithmeticExpression:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.Term:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.Factor:
if root.Length() == 1 {
return astForExpression(root.Children()[0])
}
case *grammar.Power:
return astForPower(root)
default:
fmt.Println(symbol.SymbolNames[root.ID()])
}
return nil
}
func astForPower(root *grammar.Power) Expression {
children := root.Children()
var expr Expression
if child, isAtomExpr := children[0].(*grammar.AtomExpression); isAtomExpr {
expr = astForAtomExpression(child)
} else {
return nil
}
if len(children) == 1 {
return expr
}
return nil
}
func astForAtomExpression(root *grammar.AtomExpression) Expression {
children := root.Children()
switch child := children[0].(type) {
case *grammar.Atom:
return astForAtom(child)
}
return nil
}
func astForAtom(root *grammar.Atom) Expression {
children := root.Children()
if len(children) == 1 {
switch child := children[0].(type) {
case *grammar.TokenNode:
switch child.Token.ID {
case token.NAME:
// TODO: Check for "None", "True", and "False"
return NewName(child.Token.Literal, NewLoad())
case token.NUMBER:
value, err := strconv.ParseInt(child.Token.Literal, 10, 64)
if err != nil {
return nil
}
return NewNum(value)
}
}
}
return nil
}

+ 37
- 0
ast/statement.go View File

@ -0,0 +1,37 @@
package ast
import (
"fmt"
"strings"
)
type Statement interface {
Node
stmt()
}
type Assign struct {
Targets []Expression
Value Expression
}
func NewAssign(value Expression) *Assign {
return &Assign{
Targets: make([]Expression, 0),
Value: value,
}
}
func (assign *Assign) node() {}
func (assign *Assign) stmt() {}
func (assign *Assign) Append(target Expression) {
assign.Targets = append(assign.Targets, target)
}
func (assign *Assign) String() string {
exprs := make([]string, 0)
for _, expr := range assign.Targets {
exprs = append(exprs, expr.String())
}
return fmt.Sprintf("Assign(targets=[%s], value=%s)", strings.Join(exprs, ", "), assign.Value.String())
}

Loading…
Cancel
Save