Browse Source

Pattern tests + regexp hilarity

Add tests for both string and regular expression patterns. Also, reimplement
regexp.Regexp.Prefix() on top of the raw regexp/syntax representation, so we can
get a little more information out of regexps:
- Whether or not the regexp is left-anchored (at the beginning of the string)
- What the prefix of the regular expression is, even for left-anchored
  expressions.

We do this by running the regular expression instructions ourselves, more or
less cargo-culting the original implementation from package regexp/syntax.

Unfortunately it's ~impossible to make this abstraction non-leaky, because the
regexp package doesn't give us information about whether or not it was
constructed using POSIX or Perl syntax, for example, or if the longest-match
setting was applied.

The upshot is that regexps are now probably pretty performant-ish. Maybe. (I
haven't actually benchmarked it).
Carl Jackson 12 years ago
parent
commit
e1ef76d6a8
3 changed files with 274 additions and 10 deletions
  1. +8
    -1
      web/mux.go
  2. +103
    -9
      web/pattern.go
  3. +163
    -0
      web/pattern_test.go

+ 8
- 1
web/mux.go View File

@ -31,7 +31,10 @@ following types:
All of the route-adding functions on Mux take two untyped parameters: pattern
and handler. Pattern must be one of the following types:
- string (interpreted as a Sinatra pattern)
- regexp.Regexp
- regexp.Regexp. The library assumes that it is a Perl-style regexp that
is anchored on the left (i.e., the beginning of the string). If your
regexp is not anchored on the left, a hopefully-identical left-anchored
regexp will be created and used instead.
- web.Pattern
Handler must be one of the following types:
- http.Handler
@ -44,6 +47,10 @@ type Mux struct {
router
}
// Sanity check types
var _ http.Handler = &Mux{}
var _ Handler = &Mux{}
// Create a new Mux without any routes or middleware.
func New() *Mux {
mux := Mux{


+ 103
- 9
web/pattern.go View File

@ -1,28 +1,35 @@
package web
import (
"bytes"
"fmt"
"log"
"net/http"
"regexp"
"regexp/syntax"
"strings"
)
type regexpPattern struct {
re *regexp.Regexp
names []string
re *regexp.Regexp
prefix string
names []string
}
func (p regexpPattern) Prefix() string {
prefix, _ := p.re.LiteralPrefix()
return prefix
return p.prefix
}
func (p regexpPattern) Match(r *http.Request, c *C) bool {
matches := p.re.FindStringSubmatch(r.URL.Path)
if matches == nil {
if matches == nil || len(matches) == 0 {
return false
}
if c.UrlParams == nil && len(matches) > 0 {
if len(matches) == 1 {
return true
}
if c.UrlParams == nil {
c.UrlParams = make(map[string]string, len(matches)-1)
}
for i := 1; i < len(matches); i++ {
@ -31,7 +38,89 @@ func (p regexpPattern) Match(r *http.Request, c *C) bool {
return true
}
func (p regexpPattern) String() string {
return fmt.Sprintf("regexpPattern(%v)", p.re)
}
/*
I'm sorry, dear reader. I really am.
The problem here is to take an arbitrary regular expression and:
1. return a regular expression that is just like it, but left-anchored,
preferring to return the original if possible.
2. determine a string literal prefix that all matches of this regular expression
have, much like regexp.Regexp.Prefix(). Unfortunately, Prefix() does not work
in the presence of anchors, so we need to write it ourselves.
What this actually means is that we need to sketch on the internals of the
standard regexp library to forcefully extract the information we want.
Unfortunately, regexp.Regexp hides a lot of its state, so our abstraction is
going to be pretty leaky. The biggest leak is that we blindly assume that all
regular expressions are perl-style, not POSIX. This is probably Mostly True, and
I think most users of the library probably won't be able to notice.
*/
func sketchOnRegex(re *regexp.Regexp) (*regexp.Regexp, string) {
rawRe := re.String()
sRe, err := syntax.Parse(rawRe, syntax.Perl)
if err != nil {
log.Printf("WARN(web): unable to parse regexp %v as perl. "+
"This route might behave unexpectedly.", re)
return re, ""
}
sRe = sRe.Simplify()
p, err := syntax.Compile(sRe)
if err != nil {
log.Printf("WARN(web): unable to compile regexp %v. This "+
"route might behave unexpectedly.", re)
return re, ""
}
if p.StartCond()&syntax.EmptyBeginText == 0 {
// I hope doing this is always legal...
newRe, err := regexp.Compile(`\A` + rawRe)
if err != nil {
log.Printf("WARN(web): unable to create a left-"+
"anchored regexp from %v. This route might "+
"behave unexpectedly", re)
return re, ""
}
re = newRe
}
// Run the regular expression more or less by hand :(
pc := uint32(p.Start)
atStart := true
i := &p.Inst[pc]
var buf bytes.Buffer
Sadness:
for {
switch i.Op {
case syntax.InstEmptyWidth:
if !atStart {
break Sadness
}
case syntax.InstCapture, syntax.InstNop:
// nop!
case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny,
syntax.InstRuneAnyNotNL:
atStart = false
if len(i.Rune) != 1 ||
syntax.Flags(i.Arg)&syntax.FoldCase != 0 {
break Sadness
}
buf.WriteRune(i.Rune[0])
default:
break Sadness
}
pc = i.Out
i = &p.Inst[pc]
}
return re, buf.String()
}
func parseRegexpPattern(re *regexp.Regexp) regexpPattern {
re, prefix := sketchOnRegex(re)
rnames := re.SubexpNames()
// We have to make our own copy since package regexp forbids us
// from scribbling over the slice returned by SubexpNames().
@ -43,8 +132,9 @@ func parseRegexpPattern(re *regexp.Regexp) regexpPattern {
names[i] = rname
}
return regexpPattern{
re: re,
names: names,
re: re,
prefix: prefix,
names: names,
}
}
@ -82,7 +172,7 @@ func (s stringPattern) Match(r *http.Request, c *C) bool {
}
// There's exactly one more literal than pat.
if s.isPrefix {
if strings.HasPrefix(path, s.literals[len(s.pats)]) {
if !strings.HasPrefix(path, s.literals[len(s.pats)]) {
return false
}
} else {
@ -100,6 +190,10 @@ func (s stringPattern) Match(r *http.Request, c *C) bool {
return true
}
func (s stringPattern) String() string {
return fmt.Sprintf("stringPattern(%q, %v)", s.raw, s.isPrefix)
}
func parseStringPattern(s string, isPrefix bool) stringPattern {
matches := patternRe.FindAllStringSubmatchIndex(s, -1)
pats := make([]string, len(matches))


+ 163
- 0
web/pattern_test.go View File

@ -0,0 +1,163 @@
package web
import (
"net/http"
"reflect"
"regexp"
"testing"
)
func pt(url string, match bool, params map[string]string) patternTest {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
panic(err)
}
return patternTest{
r: req,
match: match,
c: &C{},
cout: &C{UrlParams: params},
}
}
type patternTest struct {
r *http.Request
match bool
c *C
cout *C
}
var patternTests = []struct {
pat Pattern
prefix string
tests []patternTest
}{
// Regexp tests
{parseRegexpPattern(regexp.MustCompile("^/hello$")),
"/hello", []patternTest{
pt("/hello", true, nil),
pt("/hell", false, nil),
pt("/hello/", false, nil),
pt("/hello/world", false, nil),
pt("/world", false, nil),
}},
{parseRegexpPattern(regexp.MustCompile("^/hello/(?P<name>[a-z]+)$")),
"/hello/", []patternTest{
pt("/hello/world", true, map[string]string{
"name": "world",
}),
pt("/hello/", false, nil),
pt("/hello/my/love", false, nil),
}},
{parseRegexpPattern(regexp.MustCompile(`^/a(?P<a>\d+)/b(?P<b>\d+)/?$`)),
"/a", []patternTest{
pt("/a1/b2", true, map[string]string{
"a": "1",
"b": "2",
}),
pt("/a9001/b007/", true, map[string]string{
"a": "9001",
"b": "007",
}),
pt("/a/b", false, nil),
pt("/a", false, nil),
pt("/squirrel", false, nil),
}},
{parseRegexpPattern(regexp.MustCompile(`^/hello/([a-z]+)$`)),
"/hello/", []patternTest{
pt("/hello/world", true, map[string]string{
"$1": "world",
}),
pt("/hello/", false, nil),
}},
{parseRegexpPattern(regexp.MustCompile("/hello")),
"/hello", []patternTest{
pt("/hello", true, nil),
pt("/hell", false, nil),
pt("/hello/", true, nil),
pt("/hello/world", true, nil),
pt("/world/hello", false, nil),
}},
// String pattern tests
{parseStringPattern("/hello", false),
"/hello", []patternTest{
pt("/hello", true, nil),
pt("/hell", false, nil),
pt("/hello/", false, nil),
pt("/hello/world", false, nil),
}},
{parseStringPattern("/hello/:name", false),
"/hello/", []patternTest{
pt("/hello/world", true, map[string]string{
"name": "world",
}),
pt("/hell", false, nil),
pt("/hello/", false, nil),
pt("/hello/my/love", false, nil),
}},
{parseStringPattern("/a/:a/b/:b", false),
"/a/", []patternTest{
pt("/a/1/b/2", true, map[string]string{
"a": "1",
"b": "2",
}),
pt("/a", false, nil),
pt("/a//b/", false, nil),
pt("/a/1/b/2/3", false, nil),
}},
// String sub-pattern tests
{parseStringPattern("/user/:user", true),
"/user/", []patternTest{
pt("/user/bob", true, map[string]string{
"user": "bob",
}),
pt("/user/bob/friends/123", true, map[string]string{
"user": "bob",
}),
pt("/user/", false, nil),
pt("/user//", false, nil),
}},
{parseStringPattern("/user/:user/friends", true),
"/user/", []patternTest{
pt("/user/bob/friends", true, map[string]string{
"user": "bob",
}),
pt("/user/bob/friends/123", true, map[string]string{
"user": "bob",
}),
pt("/user/bob/enemies", false, nil),
}},
}
func TestPatterns(t *testing.T) {
t.Parallel()
for _, pt := range patternTests {
p := pt.pat.Prefix()
if p != pt.prefix {
t.Errorf("Expected prefix %q for %v, got %q", pt.prefix,
pt.pat, p)
} else {
for _, test := range pt.tests {
runTest(t, pt.pat, test)
}
}
}
}
func runTest(t *testing.T, p Pattern, test patternTest) {
result := p.Match(test.r, test.c)
if result != test.match {
t.Errorf("Expected match(%v, %#v) to return %v", p,
test.r.URL.Path, test.match)
return
}
if !reflect.DeepEqual(test.c, test.cout) {
t.Errorf("Expected a context of %v, instead got %v", test.cout,
test.c)
}
}

Loading…
Cancel
Save