From 0c006f98240d70d259bbf7accb0457e096b48eea Mon Sep 17 00:00:00 2001 From: brettlangdon Date: Sat, 9 Nov 2013 07:32:32 -0500 Subject: [PATCH] update tokenizer to be smarter, allow undefined tokenizer --- README.md | 5 +++-- lib/index.js | 13 +++++++++---- lib/tokenizer.js | 40 ++++++++++++++++++++++++++++++++++++++++ package.json | 2 +- 4 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 lib/tokenizer.js diff --git a/README.md b/README.md index 716a588..531a348 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,9 @@ This is the constructor for `cmdsrv` and should be invoked as `new cmdsrv()`. #### options * `port` - which port to bind to when calling `start` (default: `3223`) * `delimiter` - which character to split each command on (default: `" "`) -* `caseSensitive` - whether or not the commands should be case sensitive (default: `false`) -* `tokenizer` - a `function(line)` which returns a list of tokens for the line (default: `line.split(this.delimiter)` +* `bound` - which character is used as word boundaries (default: `'"'`) +* `caseSensitive` - whether or not the commands should be case sensitive, this means that "GET" and "get" will both be emitted as "get" (default: `false`) +* `tokenizer` - a `function(line, delimiter, bound)` which returns a list of tokens for the line, if `tokenizer` is undefined then no tokenization will occur and only the `line` event will be emitted (default: see `lib/tokenizer.js`) ### cmdsrv.on(command, handler) Received commands are emitted to any handlers listening for that command. diff --git a/lib/index.js b/lib/index.js index 48d504d..b351c4b 100644 --- a/lib/index.js +++ b/lib/index.js @@ -2,6 +2,8 @@ var EventEmitter = require("events").EventEmitter; var net = require("net"); var util = require("util"); +var tokenizer = require("./tokenizer.js"); + var emitLines = function(buffer, emitter){ var pos = buffer.indexOf("\n"); while(~pos){ @@ -24,11 +26,10 @@ var cmdsrv = function(options){ this.caseSensitive = options["caseSensitive"] || false; this.delimiter = options["delimiter"] || " "; + this.bound = options["bound"] || "\""; var self = this; - this.tokenize = options["tokenizer"] || function(line){ - return line.split(self.delimiter); - }; + this.tokenize = options["tokenizer"] || tokenizer; var self = this; this.server = net.createServer(function(connection){ @@ -46,7 +47,11 @@ cmdsrv.prototype.handle = function(connection){ connection.on("line", function(line){ line = line.trim(); self.emit("line", connection, line) - var parts = self.tokenize(line); + if(!self.tokenize){ + return; + } + + var parts = self.tokenize(line, self.delimiter, self.bound); if(!parts.length){ return; diff --git a/lib/tokenizer.js b/lib/tokenizer.js new file mode 100644 index 0000000..806ece3 --- /dev/null +++ b/lib/tokenizer.js @@ -0,0 +1,40 @@ +var tokenizer = function(line, delimiter, bound){ + delimiter = (delimiter === undefined)? " " : delimiter; + bound = (bound === undefined)? "\"" : bound; + var tokens = []; + var i = 0; + var token = ""; + var bounded = false; + for(var i = 0; i < line.length; ++i){ + var next = line[i]; + if(next === delimiter && !bounded){ + if(token.length){ + tokens.push(token); + token = ""; + } + continue; + } else if(next === bound && line[i - 1] !== "\\"){ + if(bounded){ + bounded = false; + tokens.push(token); + token = ""; + continue; + } else{ + bounded = true; + + if(token.length){ + tokens.push(token); + token = ""; + } + } + } else{ + token += next; + } + } + if(token.length){ + tokens.push(token); + } + return tokens; +}; + +return module.exports = tokenizer; diff --git a/package.json b/package.json index 3df7eb3..d6ace0b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cmdsrv", - "version": "0.1.2", + "version": "0.1.3", "description": "simple text protocol command server", "main": "lib/index.js", "scripts": {