diff --git a/ast/Python.asdl b/ast/Python.asdl new file mode 100644 index 0000000..cd0832d --- /dev/null +++ b/ast/Python.asdl @@ -0,0 +1,123 @@ +-- ASDL's six builtin types are identifier, int, string, bytes, object, singleton + +module Python +{ + mod = Module(stmt* body) + | Interactive(stmt* body) + | Expression(expr body) + + -- not really an actual node but useful in Jython's typesystem. + | Suite(stmt* body) + + stmt = FunctionDef(identifier name, arguments args, + stmt* body, expr* decorator_list, expr? returns) + | AsyncFunctionDef(identifier name, arguments args, + stmt* body, expr* decorator_list, expr? returns) + + | ClassDef(identifier name, + expr* bases, + keyword* keywords, + stmt* body, + expr* decorator_list) + | Return(expr? value) + + | Delete(expr* targets) + | Assign(expr* targets, expr value) + | AugAssign(expr target, operator op, expr value) + + -- use 'orelse' because else is a keyword in target languages + | For(expr target, expr iter, stmt* body, stmt* orelse) + | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse) + | While(expr test, stmt* body, stmt* orelse) + | If(expr test, stmt* body, stmt* orelse) + | With(withitem* items, stmt* body) + | AsyncWith(withitem* items, stmt* body) + + | Raise(expr? exc, expr? cause) + | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) + | Assert(expr test, expr? msg) + + | Import(alias* names) + | ImportFrom(identifier? module, alias* names, int? level) + + | Global(identifier* names) + | Nonlocal(identifier* names) + | Expr(expr value) + | Pass | Break | Continue + + -- XXX Jython will be different + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + -- BoolOp() can use left & right? + expr = BoolOp(boolop op, expr* values) + | BinOp(expr left, operator op, expr right) + | UnaryOp(unaryop op, expr operand) + | Lambda(arguments args, expr body) + | IfExp(expr test, expr body, expr orelse) + | Dict(expr* keys, expr* values) + | Set(expr* elts) + | ListComp(expr elt, comprehension* generators) + | SetComp(expr elt, comprehension* generators) + | DictComp(expr key, expr value, comprehension* generators) + | GeneratorExp(expr elt, comprehension* generators) + -- the grammar constrains where yield expressions can occur + | Await(expr value) + | Yield(expr? value) + | YieldFrom(expr value) + -- need sequences for compare to distinguish between + -- x < 4 < 3 and (x < 4) < 3 + | Compare(expr left, cmpop* ops, expr* comparators) + | Call(expr func, expr* args, keyword* keywords) + | Num(object n) -- a number as a PyObject. + | Str(string s) -- need to specify raw, unicode, etc? + | Bytes(bytes s) + | NameConstant(singleton value) + | Ellipsis + + -- the following expression can appear in assignment context + | Attribute(expr value, identifier attr, expr_context ctx) + | Subscript(expr value, slice slice, expr_context ctx) + | Starred(expr value, expr_context ctx) + | Name(identifier id, expr_context ctx) + | List(expr* elts, expr_context ctx) + | Tuple(expr* elts, expr_context ctx) + + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + expr_context = Load | Store | Del | AugLoad | AugStore | Param + + slice = Slice(expr? lower, expr? upper, expr? step) + | ExtSlice(slice* dims) + | Index(expr value) + + boolop = And | Or + + operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift + | RShift | BitOr | BitXor | BitAnd | FloorDiv + + unaryop = Invert | Not | UAdd | USub + + cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + + comprehension = (expr target, expr iter, expr* ifs) + + excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) + attributes (int lineno, int col_offset) + + arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults, + arg? kwarg, expr* defaults) + + arg = (identifier arg, expr? annotation) + attributes (int lineno, int col_offset) + + -- keyword arguments supplied to call (NULL identifier for **kwargs) + keyword = (identifier? arg, expr value) + + -- import name with optional 'as' alias. + alias = (identifier name, identifier? asname) + + withitem = (expr context_expr, expr? optional_vars) +} + diff --git a/ast/expression.go b/ast/expression.go new file mode 100644 index 0000000..180db0f --- /dev/null +++ b/ast/expression.go @@ -0,0 +1,42 @@ +package ast + +import "fmt" + +type Expression interface { + Node + expr() +} + +type Name struct { + Identifier string + Context ExpressionContext +} + +func NewName(id string, ctx ExpressionContext) *Name { + return &Name{ + Identifier: id, + Context: ctx, + } +} + +func (name *Name) node() {} +func (name *Name) expr() {} +func (name *Name) String() string { + return fmt.Sprintf("Name(id=%#v, ctx=%s)", name.Identifier, name.Context.String()) +} + +type Num struct { + Value int64 +} + +func NewNum(i int64) *Num { + return &Num{ + Value: i, + } +} + +func (num *Num) node() {} +func (num *Num) expr() {} +func (num *Num) String() string { + return fmt.Sprintf("Num(n=%d)", num.Value) +} diff --git a/ast/expression_context.go b/ast/expression_context.go new file mode 100644 index 0000000..ca21ca1 --- /dev/null +++ b/ast/expression_context.go @@ -0,0 +1,26 @@ +package ast + +type ExpressionContext interface { + Node + exprCtx() +} + +type Store struct{} + +func NewStore() *Store { + return &Store{} +} + +func (store *Store) node() {} +func (store *Store) exprCtx() {} +func (store *Store) String() string { return "Store()" } + +type Load struct{} + +func NewLoad() *Load { + return &Load{} +} + +func (load *Load) node() {} +func (load *Load) exprCtx() {} +func (load *Load) String() string { return "Load()" } diff --git a/ast/module.go b/ast/module.go new file mode 100644 index 0000000..d873c04 --- /dev/null +++ b/ast/module.go @@ -0,0 +1,34 @@ +package ast + +import ( + "fmt" + "strings" +) + +type Mod interface { + Node + mod() +} + +type Module struct { + Body []Statement +} + +func NewModule() *Module { + return &Module{ + Body: make([]Statement, 0), + } +} + +func (module *Module) node() {} +func (module *Module) mod() {} +func (module *Module) Append(stmt Statement) { + module.Body = append(module.Body, stmt) +} +func (module *Module) String() string { + stmts := make([]string, 0) + for _, stmt := range module.Body { + stmts = append(stmts, stmt.String()) + } + return fmt.Sprintf("Module(body=[%s])", strings.Join(stmts, ", ")) +} diff --git a/ast/node.go b/ast/node.go new file mode 100644 index 0000000..7245e6c --- /dev/null +++ b/ast/node.go @@ -0,0 +1,6 @@ +package ast + +type Node interface { + node() + String() string +} diff --git a/ast/parser.go b/ast/parser.go new file mode 100644 index 0000000..3e00469 --- /dev/null +++ b/ast/parser.go @@ -0,0 +1,196 @@ +package ast + +import ( + "fmt" + "strconv" + + "github.com/brettlangdon/gython/grammar" + "github.com/brettlangdon/gython/symbol" + "github.com/brettlangdon/gython/token" +) + +func ASTFromGrammar(root *grammar.FileInput) (Mod, error) { + mod := NewModule() + + for _, child := range root.Children() { + if child.ID() == symbol.STMT { + stmt := astForStatement(child.(*grammar.Statement)) + mod.Append(stmt) + } + } + + return mod, nil +} + +func isToken(node grammar.Node, tokId token.TokenID) bool { + if n, isTokenNode := node.(*grammar.TokenNode); isTokenNode { + return n.Token.ID == tokId + } + return false +} + +func astForStatement(root *grammar.Statement) Statement { + stmt := root.Child() + + if stmt.ID() == symbol.SIMPLE_STMT { + stmt = stmt.(*grammar.SimpleStatement).Children()[0] + } + + switch stmt := stmt.(type) { + case *grammar.SmallStatement: + switch child := stmt.Child().(type) { + case *grammar.ExpressionStatement: + return astForExpressionStatement(child) + } + + case *grammar.CompoundStatement: + fmt.Println(stmt) + } + return nil +} + +func astForExpressionStatement(root *grammar.ExpressionStatement) Statement { + children := root.Children() + if len(children) == 1 { + + } else if "todo" == "augassign" { + } else { + if !isToken(children[1], token.EQUAL) { + return nil + } + length := len(children) + var value Expression + switch child := children[length-1].(type) { + case *grammar.TestlistStarExpression: + value = astForTestList(child) + default: + value = nil + } + + assign := NewAssign(value) + for i := 0; i < length-2; i++ { + target := astForTestList(children[i].(grammar.ExpressionStatementChild)) + switch target := target.(type) { + case *Name: + target.Context = NewStore() + } + assign.Append(target) + } + return assign + } + return nil +} + +func astForTestList(root grammar.ExpressionStatementChild) Expression { + switch root := root.(type) { + case *grammar.TestlistStarExpression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + } + return nil +} + +func astForExpression(root grammar.Node) Expression { + switch root := root.(type) { + case *grammar.Test: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.OrTest: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.AndTest: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.NotTest: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.Comparison: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.Expression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.XorExpression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.AndExpression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.ShiftExpression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.ArithmeticExpression: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.Term: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.Factor: + if root.Length() == 1 { + return astForExpression(root.Children()[0]) + } + case *grammar.Power: + return astForPower(root) + default: + fmt.Println(symbol.SymbolNames[root.ID()]) + } + return nil +} + +func astForPower(root *grammar.Power) Expression { + children := root.Children() + var expr Expression + if child, isAtomExpr := children[0].(*grammar.AtomExpression); isAtomExpr { + expr = astForAtomExpression(child) + } else { + return nil + } + + if len(children) == 1 { + return expr + } + + return nil +} + +func astForAtomExpression(root *grammar.AtomExpression) Expression { + children := root.Children() + switch child := children[0].(type) { + case *grammar.Atom: + return astForAtom(child) + } + return nil +} + +func astForAtom(root *grammar.Atom) Expression { + children := root.Children() + if len(children) == 1 { + switch child := children[0].(type) { + case *grammar.TokenNode: + switch child.Token.ID { + case token.NAME: + // TODO: Check for "None", "True", and "False" + return NewName(child.Token.Literal, NewLoad()) + case token.NUMBER: + value, err := strconv.ParseInt(child.Token.Literal, 10, 64) + if err != nil { + return nil + } + return NewNum(value) + } + } + } + return nil +} diff --git a/ast/statement.go b/ast/statement.go new file mode 100644 index 0000000..83b51f3 --- /dev/null +++ b/ast/statement.go @@ -0,0 +1,37 @@ +package ast + +import ( + "fmt" + "strings" +) + +type Statement interface { + Node + stmt() +} + +type Assign struct { + Targets []Expression + Value Expression +} + +func NewAssign(value Expression) *Assign { + return &Assign{ + Targets: make([]Expression, 0), + Value: value, + } +} + +func (assign *Assign) node() {} +func (assign *Assign) stmt() {} +func (assign *Assign) Append(target Expression) { + assign.Targets = append(assign.Targets, target) +} +func (assign *Assign) String() string { + exprs := make([]string, 0) + for _, expr := range assign.Targets { + exprs = append(exprs, expr.String()) + } + + return fmt.Sprintf("Assign(targets=[%s], value=%s)", strings.Join(exprs, ", "), assign.Value.String()) +}