From e1ef76d6a80768110cb261b6afaff192fa731272 Mon Sep 17 00:00:00 2001 From: Carl Jackson Date: Sat, 22 Mar 2014 19:30:18 -0700 Subject: [PATCH] Pattern tests + regexp hilarity Add tests for both string and regular expression patterns. Also, reimplement regexp.Regexp.Prefix() on top of the raw regexp/syntax representation, so we can get a little more information out of regexps: - Whether or not the regexp is left-anchored (at the beginning of the string) - What the prefix of the regular expression is, even for left-anchored expressions. We do this by running the regular expression instructions ourselves, more or less cargo-culting the original implementation from package regexp/syntax. Unfortunately it's ~impossible to make this abstraction non-leaky, because the regexp package doesn't give us information about whether or not it was constructed using POSIX or Perl syntax, for example, or if the longest-match setting was applied. The upshot is that regexps are now probably pretty performant-ish. Maybe. (I haven't actually benchmarked it). --- web/mux.go | 9 ++- web/pattern.go | 112 +++++++++++++++++++++++++++--- web/pattern_test.go | 163 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 10 deletions(-) create mode 100644 web/pattern_test.go diff --git a/web/mux.go b/web/mux.go index c8f8d2c..00b5d68 100644 --- a/web/mux.go +++ b/web/mux.go @@ -31,7 +31,10 @@ following types: All of the route-adding functions on Mux take two untyped parameters: pattern and handler. Pattern must be one of the following types: - string (interpreted as a Sinatra pattern) - - regexp.Regexp + - regexp.Regexp. The library assumes that it is a Perl-style regexp that + is anchored on the left (i.e., the beginning of the string). If your + regexp is not anchored on the left, a hopefully-identical left-anchored + regexp will be created and used instead. - web.Pattern Handler must be one of the following types: - http.Handler @@ -44,6 +47,10 @@ type Mux struct { router } +// Sanity check types +var _ http.Handler = &Mux{} +var _ Handler = &Mux{} + // Create a new Mux without any routes or middleware. func New() *Mux { mux := Mux{ diff --git a/web/pattern.go b/web/pattern.go index 3733765..f6b11c7 100644 --- a/web/pattern.go +++ b/web/pattern.go @@ -1,28 +1,35 @@ package web import ( + "bytes" "fmt" + "log" "net/http" "regexp" + "regexp/syntax" "strings" ) type regexpPattern struct { - re *regexp.Regexp - names []string + re *regexp.Regexp + prefix string + names []string } func (p regexpPattern) Prefix() string { - prefix, _ := p.re.LiteralPrefix() - return prefix + return p.prefix } func (p regexpPattern) Match(r *http.Request, c *C) bool { matches := p.re.FindStringSubmatch(r.URL.Path) - if matches == nil { + if matches == nil || len(matches) == 0 { return false } - if c.UrlParams == nil && len(matches) > 0 { + if len(matches) == 1 { + return true + } + + if c.UrlParams == nil { c.UrlParams = make(map[string]string, len(matches)-1) } for i := 1; i < len(matches); i++ { @@ -31,7 +38,89 @@ func (p regexpPattern) Match(r *http.Request, c *C) bool { return true } +func (p regexpPattern) String() string { + return fmt.Sprintf("regexpPattern(%v)", p.re) +} + +/* +I'm sorry, dear reader. I really am. + +The problem here is to take an arbitrary regular expression and: +1. return a regular expression that is just like it, but left-anchored, + preferring to return the original if possible. +2. determine a string literal prefix that all matches of this regular expression + have, much like regexp.Regexp.Prefix(). Unfortunately, Prefix() does not work + in the presence of anchors, so we need to write it ourselves. + +What this actually means is that we need to sketch on the internals of the +standard regexp library to forcefully extract the information we want. + +Unfortunately, regexp.Regexp hides a lot of its state, so our abstraction is +going to be pretty leaky. The biggest leak is that we blindly assume that all +regular expressions are perl-style, not POSIX. This is probably Mostly True, and +I think most users of the library probably won't be able to notice. +*/ +func sketchOnRegex(re *regexp.Regexp) (*regexp.Regexp, string) { + rawRe := re.String() + sRe, err := syntax.Parse(rawRe, syntax.Perl) + if err != nil { + log.Printf("WARN(web): unable to parse regexp %v as perl. "+ + "This route might behave unexpectedly.", re) + return re, "" + } + sRe = sRe.Simplify() + p, err := syntax.Compile(sRe) + if err != nil { + log.Printf("WARN(web): unable to compile regexp %v. This "+ + "route might behave unexpectedly.", re) + return re, "" + } + if p.StartCond()&syntax.EmptyBeginText == 0 { + // I hope doing this is always legal... + newRe, err := regexp.Compile(`\A` + rawRe) + if err != nil { + log.Printf("WARN(web): unable to create a left-"+ + "anchored regexp from %v. This route might "+ + "behave unexpectedly", re) + return re, "" + } + re = newRe + } + + // Run the regular expression more or less by hand :( + pc := uint32(p.Start) + atStart := true + i := &p.Inst[pc] + var buf bytes.Buffer +Sadness: + for { + switch i.Op { + case syntax.InstEmptyWidth: + if !atStart { + break Sadness + } + case syntax.InstCapture, syntax.InstNop: + // nop! + case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, + syntax.InstRuneAnyNotNL: + + atStart = false + if len(i.Rune) != 1 || + syntax.Flags(i.Arg)&syntax.FoldCase != 0 { + break Sadness + } + buf.WriteRune(i.Rune[0]) + default: + break Sadness + } + pc = i.Out + i = &p.Inst[pc] + } + return re, buf.String() +} + func parseRegexpPattern(re *regexp.Regexp) regexpPattern { + re, prefix := sketchOnRegex(re) rnames := re.SubexpNames() // We have to make our own copy since package regexp forbids us // from scribbling over the slice returned by SubexpNames(). @@ -43,8 +132,9 @@ func parseRegexpPattern(re *regexp.Regexp) regexpPattern { names[i] = rname } return regexpPattern{ - re: re, - names: names, + re: re, + prefix: prefix, + names: names, } } @@ -82,7 +172,7 @@ func (s stringPattern) Match(r *http.Request, c *C) bool { } // There's exactly one more literal than pat. if s.isPrefix { - if strings.HasPrefix(path, s.literals[len(s.pats)]) { + if !strings.HasPrefix(path, s.literals[len(s.pats)]) { return false } } else { @@ -100,6 +190,10 @@ func (s stringPattern) Match(r *http.Request, c *C) bool { return true } +func (s stringPattern) String() string { + return fmt.Sprintf("stringPattern(%q, %v)", s.raw, s.isPrefix) +} + func parseStringPattern(s string, isPrefix bool) stringPattern { matches := patternRe.FindAllStringSubmatchIndex(s, -1) pats := make([]string, len(matches)) diff --git a/web/pattern_test.go b/web/pattern_test.go new file mode 100644 index 0000000..7fbe132 --- /dev/null +++ b/web/pattern_test.go @@ -0,0 +1,163 @@ +package web + +import ( + "net/http" + "reflect" + "regexp" + "testing" +) + +func pt(url string, match bool, params map[string]string) patternTest { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + panic(err) + } + + return patternTest{ + r: req, + match: match, + c: &C{}, + cout: &C{UrlParams: params}, + } +} + +type patternTest struct { + r *http.Request + match bool + c *C + cout *C +} + +var patternTests = []struct { + pat Pattern + prefix string + tests []patternTest +}{ + // Regexp tests + {parseRegexpPattern(regexp.MustCompile("^/hello$")), + "/hello", []patternTest{ + pt("/hello", true, nil), + pt("/hell", false, nil), + pt("/hello/", false, nil), + pt("/hello/world", false, nil), + pt("/world", false, nil), + }}, + {parseRegexpPattern(regexp.MustCompile("^/hello/(?P[a-z]+)$")), + "/hello/", []patternTest{ + pt("/hello/world", true, map[string]string{ + "name": "world", + }), + pt("/hello/", false, nil), + pt("/hello/my/love", false, nil), + }}, + {parseRegexpPattern(regexp.MustCompile(`^/a(?P\d+)/b(?P\d+)/?$`)), + "/a", []patternTest{ + pt("/a1/b2", true, map[string]string{ + "a": "1", + "b": "2", + }), + pt("/a9001/b007/", true, map[string]string{ + "a": "9001", + "b": "007", + }), + pt("/a/b", false, nil), + pt("/a", false, nil), + pt("/squirrel", false, nil), + }}, + {parseRegexpPattern(regexp.MustCompile(`^/hello/([a-z]+)$`)), + "/hello/", []patternTest{ + pt("/hello/world", true, map[string]string{ + "$1": "world", + }), + pt("/hello/", false, nil), + }}, + {parseRegexpPattern(regexp.MustCompile("/hello")), + "/hello", []patternTest{ + pt("/hello", true, nil), + pt("/hell", false, nil), + pt("/hello/", true, nil), + pt("/hello/world", true, nil), + pt("/world/hello", false, nil), + }}, + + // String pattern tests + {parseStringPattern("/hello", false), + "/hello", []patternTest{ + pt("/hello", true, nil), + pt("/hell", false, nil), + pt("/hello/", false, nil), + pt("/hello/world", false, nil), + }}, + {parseStringPattern("/hello/:name", false), + "/hello/", []patternTest{ + pt("/hello/world", true, map[string]string{ + "name": "world", + }), + pt("/hell", false, nil), + pt("/hello/", false, nil), + pt("/hello/my/love", false, nil), + }}, + {parseStringPattern("/a/:a/b/:b", false), + "/a/", []patternTest{ + pt("/a/1/b/2", true, map[string]string{ + "a": "1", + "b": "2", + }), + pt("/a", false, nil), + pt("/a//b/", false, nil), + pt("/a/1/b/2/3", false, nil), + }}, + + // String sub-pattern tests + {parseStringPattern("/user/:user", true), + "/user/", []patternTest{ + pt("/user/bob", true, map[string]string{ + "user": "bob", + }), + pt("/user/bob/friends/123", true, map[string]string{ + "user": "bob", + }), + pt("/user/", false, nil), + pt("/user//", false, nil), + }}, + {parseStringPattern("/user/:user/friends", true), + "/user/", []patternTest{ + pt("/user/bob/friends", true, map[string]string{ + "user": "bob", + }), + pt("/user/bob/friends/123", true, map[string]string{ + "user": "bob", + }), + pt("/user/bob/enemies", false, nil), + }}, +} + +func TestPatterns(t *testing.T) { + t.Parallel() + + for _, pt := range patternTests { + p := pt.pat.Prefix() + if p != pt.prefix { + t.Errorf("Expected prefix %q for %v, got %q", pt.prefix, + pt.pat, p) + } else { + for _, test := range pt.tests { + runTest(t, pt.pat, test) + } + } + } +} + +func runTest(t *testing.T, p Pattern, test patternTest) { + result := p.Match(test.r, test.c) + if result != test.match { + t.Errorf("Expected match(%v, %#v) to return %v", p, + test.r.URL.Path, test.match) + return + } + + if !reflect.DeepEqual(test.c, test.cout) { + t.Errorf("Expected a context of %v, instead got %v", test.cout, + test.c) + } +}