From 84239f7e83216db69ac8ecba62014fecf1866b01 Mon Sep 17 00:00:00 2001 From: brettlangdon Date: Sat, 13 Jun 2015 22:15:44 -0400 Subject: [PATCH] initial commit/prototype --- README.md | 169 +++++++++++++++++++++++++++++ config/config.go | 59 ++++++++++ config/section.go | 69 ++++++++++++ example/example.cfg | 16 +++ example/main.go | 21 ++++ forge.go | 45 ++++++++ parser/parser.go | 254 ++++++++++++++++++++++++++++++++++++++++++++ token/token.go | 17 +++ token/tokenid.go | 46 ++++++++ token/tokenizer.go | 154 +++++++++++++++++++++++++++ 10 files changed, 850 insertions(+) create mode 100644 README.md create mode 100644 config/config.go create mode 100644 config/section.go create mode 100644 example/example.cfg create mode 100644 example/main.go create mode 100644 forge.go create mode 100644 parser/parser.go create mode 100644 token/token.go create mode 100644 token/tokenid.go create mode 100644 token/tokenizer.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..7417a95 --- /dev/null +++ b/README.md @@ -0,0 +1,169 @@ +forge +===== + +Forge is a configuration syntax and parser. + +## Installation + +`git get github.com/brettlangdon/forge` + +## File format + +The format was influenced a lot by nginx configuration file format. + +```config +global_key = "string value"; +sub_settings { + sub_int = 500; + sub_float = 80.80; + sub_sub_settings { + sub_sub_sub_settings { + key = "value"; + } + } +} + +second { + key = "value"; + global_reference = sub_settings.sub_float; + local_reference = .key; +} +``` + +For normal settings the format is the key followed by an equal sign followed by the value and lastly ending with a semicolon. +` = ;` + +Sections (basically a map) is formatted as the section name with the section's settings wrapped in brackets. +`
{ = ; }` + +## Data types + +### String +A string value is wrapped by double quotes (single quotes will not work). + +`"string value"`, `"single ' quotes ' allowed"`. + +As of right now there is no way to escape double quotes within a string's value; + +### Number + +There are two supported numbers, Integer and Float, both of which are simply numbers with the later having one period. + +`500`, `50.56`. + +### Section + +Sections are essentially maps, that is a setting whose purpose is to hold other settings. +Sections can be used to namespace settings. + +`section { setting = "value"; }`. + + +### References + +References are used to refer to previously defined settings. There are two kinds of references, a global reference and a local reference; + +The general format for a reference is a mix of identifiers and periods, for example `production.db.name`. + +A global reference is a reference which starts looking for its value from the top most section (global section). + +A local reference is a reference whose value starts with a period, this reference will start looking for it's value from the current section it is within (local section). + +```config +production { + db { + name = "forge"; + } +} + +development { + db { + name = production.db.name; + } + db_name = .db.name; +} +``` + +## API + +`github.com/brettlangdon/forge` + +* `forge.ParseString(data string) (map[string]interface{}, error)` +* `forge.ParseBytes(data []byte) (map[string]interface{}, error)` +* `forge.ParseFile(filename string) (map[string]interface{}, error)` +* `forge.ParseReader(reader io.Reader) (map[string]interface{}, error)` + + +## Example + +You can see example usage in the `example` folder. + +```go +package main + +import ( + "fmt" + "json" + + "github.com/brettlangdon/forge" +) + +func main() { + // Parse the file `example.cfg` as a map[string]interface{} + settings, err := forge.ParseFile("example.cfg") + if err != nil { + panic(err) + } + + // Convert the settings to JSON for printing + jsonBytes, err := json.Marshal(settings) + if err != nil { + panic(err) + } + + // Print the parsed settings + fmt.Println(string(jsonBytes)) +} +``` + +## Future Plans + +The following features are currently on my bucket list for the future: + +### More data types + +I would like to at least add `Boolean` possibly `List` + +### Operations/Expressions + +Would be nice to have Addition/Subtraction/Multiplication/Division: + +```config +whole = 100 +half = whole / 2; +double = whole * 2; +one_more = whole + 1; +one_less = whole - 1; +``` + +Also Concatenation for strings: + +```config +domain = "github.com"; +username = "brettlangdon"; +name = "forge"; +repo_url = domain + "/" + username + "/" + name; +``` + +### API + +I'll probably revisit the API, I just threw it together quick, want to make sure it right. + + +### Comments + +This is pretty lacking and should be added soon. + +### Documentation + +Documentation is a good thing. diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..57c9874 --- /dev/null +++ b/config/config.go @@ -0,0 +1,59 @@ +package config + +type ConfigType int + +const ( + SECTION ConfigType = iota + INTEGER + FLOAT + STRING +) + +var configTypes = [...]string{ + SECTION: "SECTION", + INTEGER: "INTEGER", + FLOAT: "FLOAT", + STRING: "STRING", +} + +func (this ConfigType) String() string { + s := "" + if 0 <= this && this < ConfigType(len(configTypes)) { + s = configTypes[this] + } + + if s == "" { + s = "UNKNOWN" + } + + return s +} + +type ConfigValue interface { + GetType() ConfigType + GetValue() interface{} +} + +type IntegerValue struct { + Name string + Value int64 +} + +func (this IntegerValue) GetType() ConfigType { return INTEGER } +func (this IntegerValue) GetValue() interface{} { return this.Value } + +type FloatValue struct { + Name string + Value float64 +} + +func (this FloatValue) GetType() ConfigType { return INTEGER } +func (this FloatValue) GetValue() interface{} { return this.Value } + +type StringValue struct { + Name string + Value string +} + +func (this StringValue) GetType() ConfigType { return STRING } +func (this StringValue) GetValue() interface{} { return this.Value } diff --git a/config/section.go b/config/section.go new file mode 100644 index 0000000..6660408 --- /dev/null +++ b/config/section.go @@ -0,0 +1,69 @@ +package config + +import "encoding/json" + +type SectionValue struct { + Name string + Value map[string]ConfigValue +} + +func (this SectionValue) GetType() ConfigType { return SECTION } +func (this SectionValue) GetValue() interface{} { return this.Value } + +func (this SectionValue) Set(name string, value ConfigValue) { + this.Value[name] = value +} + +func (this SectionValue) Get(name string) ConfigValue { + return this.Value[name] +} + +func (this SectionValue) GetSection(name string) SectionValue { + value := this.Value[name] + return value.(SectionValue) +} + +func (this SectionValue) GetString(name string) StringValue { + value := this.Value[name] + return value.(StringValue) +} + +func (this SectionValue) GetInteger(name string) IntegerValue { + value := this.Value[name] + return value.(IntegerValue) +} + +func (this SectionValue) GetFloat(name string) FloatValue { + value := this.Value[name] + return value.(FloatValue) +} + +func (this SectionValue) Contains(name string) bool { + _, ok := this.Value[name] + return ok +} + +func (this SectionValue) ToJSON() ([]byte, error) { + data, err := this.ToMap() + if err != nil { + return nil, err + } + return json.Marshal(data) +} + +func (this SectionValue) ToMap() (map[string]interface{}, error) { + settings := make(map[string]interface{}) + for name, value := range this.Value { + if value.GetType() == SECTION { + data, err := value.(SectionValue).ToMap() + if err != nil { + return nil, err + } + settings[name] = data + } else { + settings[name] = value.GetValue() + } + } + + return settings, nil +} diff --git a/example/example.cfg b/example/example.cfg new file mode 100644 index 0000000..b3cb8db --- /dev/null +++ b/example/example.cfg @@ -0,0 +1,16 @@ +global = "global value"; +master { + string = "master string value"; + integer = 500; + float = 80.80; + sub { + key = "master sub key value"; + } +} + +slave { + another = "slave another value"; + global_reference = global; + master_sub_key = master.sub.key; + _under = 50; +} diff --git a/example/main.go b/example/main.go new file mode 100644 index 0000000..48396d2 --- /dev/null +++ b/example/main.go @@ -0,0 +1,21 @@ +package main + +import ( + "encoding/json" + "fmt" + + "github.com/brettlangdon/forge" +) + +func main() { + settings, err := forge.ParseFile("example.cfg") + if err != nil { + panic(err) + } + + data, err := json.Marshal(settings) + if err != nil { + panic(err) + } + fmt.Println(string(data)) +} diff --git a/forge.go b/forge.go new file mode 100644 index 0000000..322a297 --- /dev/null +++ b/forge.go @@ -0,0 +1,45 @@ +package forge + +import ( + "bytes" + "io" + "strings" + + "github.com/brettlangdon/forge/parser" +) + +func ParseString(data string) (map[string]interface{}, error) { + settings, err := parser.ParseReader(strings.NewReader(data)) + if err != nil { + return nil, err + } + + return settings.ToMap() +} + +func ParseBytes(data []byte) (map[string]interface{}, error) { + settings, err := parser.ParseReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return settings.ToMap() +} + +func ParseFile(filename string) (map[string]interface{}, error) { + settings, err := parser.ParseFile(filename) + if err != nil { + return nil, err + } + + return settings.ToMap() +} + +func ParseReader(reader io.Reader) (map[string]interface{}, error) { + settings, err := parser.ParseReader(reader) + if err != nil { + return nil, err + } + + return settings.ToMap() +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..d4f085a --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,254 @@ +package parser + +import ( + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/brettlangdon/forge/config" + "github.com/brettlangdon/forge/token" +) + +type Parser struct { + settings config.SectionValue + tokenizer *token.Tokenizer + cur_tok token.Token + cur_section config.SectionValue + previous []config.SectionValue +} + +func (this *Parser) SyntaxError(msg string) error { + msg = fmt.Sprintf( + "Syntax error line <%d> column <%d>: %s", + this.cur_tok.Line, + this.cur_tok.Column, + msg, + ) + return errors.New(msg) +} + +func (this *Parser) ReferenceTypeError(names []string, expected config.ConfigType, actual config.ConfigType) error { + reference := strings.Join(names, ".") + msg := fmt.Sprintf( + "Reference type error, '%s', expected type %s instead got %s", + reference, + expected, + actual, + ) + return errors.New(msg) +} + +func (this *Parser) ReferenceMissingError(names []string, searching string) error { + reference := strings.Join(names, ".") + msg := fmt.Sprintf( + "Reference missing error, '%s' does not have key '%s'", + reference, + searching, + ) + return errors.New(msg) +} + +func (this *Parser) readToken() token.Token { + this.cur_tok = this.tokenizer.NextToken() + return this.cur_tok +} + +func (this *Parser) parseReference(starting_section config.SectionValue, period bool) (config.ConfigValue, error) { + names := []string{} + if period == false { + names = append(names, this.cur_tok.Literal) + } + for { + this.readToken() + if this.cur_tok.ID == token.PERIOD && period == false { + period = true + } else if period && this.cur_tok.ID == token.IDENTIFIER { + names = append(names, this.cur_tok.Literal) + period = false + } else if this.cur_tok.ID == token.SEMICOLON { + break + } else { + msg := fmt.Sprintf("expected ';' instead found '%s'", this.cur_tok.Literal) + return nil, this.SyntaxError(msg) + } + } + if len(names) == 0 { + return nil, this.SyntaxError( + fmt.Sprintf("expected IDENTIFIER instead found %s", this.cur_tok.Literal), + ) + } + + if period { + return nil, this.SyntaxError(fmt.Sprintf("expected IDENTIFIER after PERIOD")) + } + + var reference config.ConfigValue + reference = starting_section + visited := []string{} + for { + if len(names) == 0 { + break + } + if reference.GetType() != config.SECTION { + return nil, this.ReferenceTypeError(visited, config.SECTION, reference.GetType()) + } + name := names[0] + names = names[1:] + section := reference.(config.SectionValue) + if section.Contains(name) == false { + return nil, this.ReferenceMissingError(visited, name) + } + reference = section.Get(name) + visited = append(visited, name) + } + + return reference, nil +} + +func (this *Parser) parseSetting(name string) error { + var value config.ConfigValue + this.readToken() + + read_next := true + switch this.cur_tok.ID { + case token.STRING: + value = config.StringValue{ + Name: name, + Value: this.cur_tok.Literal, + } + case token.INTEGER: + int_val, err := strconv.ParseInt(this.cur_tok.Literal, 10, 64) + if err != nil { + return err + } + value = config.IntegerValue{ + Name: name, + Value: int_val, + } + case token.FLOAT: + float_val, err := strconv.ParseFloat(this.cur_tok.Literal, 64) + if err != nil { + return err + } + value = config.FloatValue{ + Name: name, + Value: float_val, + } + case token.PERIOD: + reference, err := this.parseReference(this.cur_section, true) + if err != nil { + return err + } + value = reference + read_next = false + case token.IDENTIFIER: + reference, err := this.parseReference(this.settings, false) + if err != nil { + return err + } + value = reference + read_next = false + default: + return this.SyntaxError( + fmt.Sprintf("expected STRING, INTEGER or FLOAT, instead found %s", this.cur_tok.ID), + ) + } + + if read_next { + this.readToken() + } + if this.cur_tok.ID != token.SEMICOLON { + msg := fmt.Sprintf("expected ';' instead found '%s'", this.cur_tok.Literal) + return this.SyntaxError(msg) + } + + this.cur_section.Set(name, value) + return nil +} + +func (this *Parser) parseSection(name string) error { + section := config.SectionValue{ + Name: name, + Value: make(map[string]config.ConfigValue), + } + this.cur_section.Set(name, section) + this.previous = append(this.previous, this.cur_section) + this.cur_section = section + return nil +} + +func (this *Parser) endSection() error { + if len(this.previous) == 0 { + return this.SyntaxError("unexpected section end '}'") + } + + p_len := len(this.previous) + previous := this.previous[p_len-1] + this.previous = this.previous[0 : p_len-1] + this.cur_section = previous + return nil +} + +func (this *Parser) Parse() error { + this.readToken() + for { + if this.cur_tok.ID == token.EOF { + break + } + tok := this.cur_tok + this.readToken() + switch tok.ID { + case token.IDENTIFIER: + if this.cur_tok.ID == token.LBRACKET { + err := this.parseSection(tok.Literal) + if err != nil { + return err + } + } else if this.cur_tok.ID == token.EQUAL { + err := this.parseSetting(tok.Literal) + if err != nil { + return err + } + } + case token.RBRACKET: + err := this.endSection() + if err != nil { + return err + } + } + } + return nil +} + +func ParseFile(filename string) (settings *config.SectionValue, err error) { + reader, err := os.Open(filename) + if err != nil { + return settings, err + } + return ParseReader(reader) +} + +func ParseReader(reader io.Reader) (*config.SectionValue, error) { + settings := config.SectionValue{ + Value: make(map[string]config.ConfigValue), + } + parser := &Parser{ + tokenizer: token.NewTokenizer(reader), + settings: settings, + cur_section: settings, + previous: make([]config.SectionValue, 0), + } + err := parser.Parse() + if err != nil { + return nil, err + } + + if len(parser.previous) > 0 { + return nil, parser.SyntaxError("expected end of section, instead found EOF") + } + + return &settings, nil +} diff --git a/token/token.go b/token/token.go new file mode 100644 index 0000000..06aedc9 --- /dev/null +++ b/token/token.go @@ -0,0 +1,17 @@ +package token + +import "fmt" + +type Token struct { + ID TokenID + Literal string + Line int + Column int +} + +func (this Token) String() string { + return fmt.Sprintf( + "ID<%s> Literal<%s> Line<%s> Column<%s>", + this.ID, this.Literal, this.Line, this.Column, + ) +} diff --git a/token/tokenid.go b/token/tokenid.go new file mode 100644 index 0000000..f630818 --- /dev/null +++ b/token/tokenid.go @@ -0,0 +1,46 @@ +package token + +type TokenID int + +const ( + ILLEGAL TokenID = iota + EOF + + LBRACKET + RBRACKET + EQUAL + SEMICOLON + PERIOD + + IDENTIFIER + INTEGER + FLOAT + STRING +) + +var tokenNames = [...]string{ + ILLEGAL: "ILLEGAL", + EOF: "EOF", + LBRACKET: "LBRACKET", + RBRACKET: "RBRACKET", + EQUAL: "EQUAL", + SEMICOLON: "SEMICOLON", + PERIOD: "PERIOD", + IDENTIFIER: "IDENTIFIER", + INTEGER: "INTEGER", + FLOAT: "FLOAT", + STRING: "STRING", +} + +func (this TokenID) String() string { + s := "" + if 0 <= this && this < TokenID(len(tokenNames)) { + s = tokenNames[this] + } + + if s == "" { + s = "UNKNOWN" + } + + return s +} diff --git a/token/tokenizer.go b/token/tokenizer.go new file mode 100644 index 0000000..d9b1f55 --- /dev/null +++ b/token/tokenizer.go @@ -0,0 +1,154 @@ +package token + +import ( + "bufio" + "io" +) + +var eof = rune(0) + +func isLetter(ch rune) bool { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') +} + +func isDigit(ch rune) bool { + return ('0' <= ch && ch <= '9') +} + +func isWhitespace(ch rune) bool { + return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') +} + +type Tokenizer struct { + cur_line int + cur_col int + cur_tok Token + cur_ch rune + newline bool + reader *bufio.Reader +} + +func NewTokenizer(reader io.Reader) *Tokenizer { + tokenizer := &Tokenizer{ + reader: bufio.NewReader(reader), + cur_line: 0, + cur_col: 0, + newline: false, + } + tokenizer.readRune() + return tokenizer +} + +func (this *Tokenizer) readRune() { + if this.newline { + this.cur_line += 1 + this.cur_col = 0 + this.newline = false + } else { + this.cur_col += 1 + } + + next_ch, _, err := this.reader.ReadRune() + if err != nil { + this.cur_ch = eof + return + } + + this.cur_ch = next_ch + + if this.cur_ch == '\n' { + this.newline = true + } +} + +func (this *Tokenizer) parseIdentifier() { + this.cur_tok.ID = IDENTIFIER + this.cur_tok.Literal = string(this.cur_ch) + for { + this.readRune() + if !isLetter(this.cur_ch) && this.cur_ch != '_' { + break + } + this.cur_tok.Literal += string(this.cur_ch) + } +} + +func (this *Tokenizer) parseNumber() { + this.cur_tok.ID = INTEGER + this.cur_tok.Literal = string(this.cur_ch) + digit := false + for { + this.readRune() + if this.cur_ch == '.' && digit == false { + this.cur_tok.ID = FLOAT + digit = true + } else if !isDigit(this.cur_ch) { + break + } + this.cur_tok.Literal += string(this.cur_ch) + } +} + +func (this *Tokenizer) parseString() { + this.cur_tok.ID = STRING + this.cur_tok.Literal = string(this.cur_ch) + for { + this.readRune() + if this.cur_ch == '"' { + break + } + this.cur_tok.Literal += string(this.cur_ch) + } + this.readRune() +} + +func (this *Tokenizer) skipWhitespace() { + for { + this.readRune() + if !isWhitespace(this.cur_ch) { + break + } + } +} + +func (this *Tokenizer) NextToken() Token { + if isWhitespace(this.cur_ch) { + this.skipWhitespace() + } + + this.cur_tok = Token{ + ID: ILLEGAL, + Literal: string(this.cur_ch), + Line: this.cur_line, + Column: this.cur_col, + } + + switch ch := this.cur_ch; { + case isLetter(ch) || ch == '_': + this.parseIdentifier() + case isDigit(ch): + this.parseNumber() + case ch == eof: + this.cur_tok.ID = EOF + this.cur_tok.Literal = "EOF" + default: + this.readRune() + this.cur_tok.Literal = string(ch) + switch ch { + case '=': + this.cur_tok.ID = EQUAL + case '"': + this.parseString() + case '{': + this.cur_tok.ID = LBRACKET + case '}': + this.cur_tok.ID = RBRACKET + case ';': + this.cur_tok.ID = SEMICOLON + case '.': + this.cur_tok.ID = PERIOD + } + } + + return this.cur_tok +}