Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions pattern/pattern.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ type wildcardSearch struct {
baseSearch
prefix []byte
suffix []byte
middle []*substring
middle [][]byte
middleLen int
narrowed bool
}
Expand All @@ -96,9 +96,9 @@ func newWildcardSearch(base baseSearch, token *parser.Literal) *wildcardSearch {
// all of the rest can be an asterix or a middle
for i := 1; i < len(terms)-1; i++ {
if terms[i].Kind == parser.TermText {
term := newSubstringPattern([]byte(terms[i].Data))
s.middle = append(s.middle, term)
s.middleLen += len(terms[i].Data)
val := util.StringToByteUnsafe(terms[i].Data)
s.middle = append(s.middle, val)
s.middleLen += len(val)
}
}
return s
Expand Down Expand Up @@ -151,6 +151,17 @@ func (s *wildcardSearch) checkMiddle(val []byte) bool {
return findSequence(val[len(s.prefix):len(val)-len(s.suffix)], s.middle) == len(s.middle)
}

func findSequence(haystack []byte, needles [][]byte) int {
for cur, val := range needles {
start := bytes.Index(haystack, val)
if start == -1 {
return cur
}
haystack = haystack[start+len(val):]
}
return len(needles)
}

func (s *wildcardSearch) check(val []byte) bool {
return s.checkPrefix(val) && s.checkSuffix(val) && s.checkMiddle(val)
}
Expand Down
126 changes: 126 additions & 0 deletions pattern/pattern_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math/rand"
"sort"
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -482,3 +483,128 @@ func TestPatternIPRange(t *testing.T) {

testAll(t, tp, tests)
}

func testFindSequence(a *assert.Assertions, cnt int, needles []string, haystack string) {
var needlesB [][]byte
for _, needle := range needles {
needlesB = append(needlesB, []byte(needle))
}
res := findSequence([]byte(haystack), needlesB)
a.Equal(cnt, res, "wrong total number of matches")
}

func TestFindSequence(t *testing.T) {
a := assert.New(t)

testFindSequence(a, 2, []string{"abra", "ada"}, "abracadabra")
testFindSequence(a, 2, []string{"aba", "aba"}, "abacaba")
testFindSequence(a, 2, []string{"aba", "caba"}, "abacaba")
testFindSequence(a, 1, []string{"abacaba"}, "abacaba")
testFindSequence(a, 0, []string{"abacaba"}, "aba")
testFindSequence(a, 1, []string{"aba"}, "abacaba")
testFindSequence(a, 0, []string{"dad"}, "abacaba")
testFindSequence(a, 1, []string{"aba", "dad"}, "abacaba")
testFindSequence(a, 0, []string{"dad", "aba"}, "abacaba")

testFindSequence(a, 2, []string{"needle", "haystack"}, "can you find a needle in a haystack?")
testFindSequence(a, 2, []string{"k8s_pod", "_prod"}, "\"k8s_pod\":{\"main_prod\"}")

testFindSequence(a, 2, []string{"!13", "37#"}, "woah!13@37#test")

testFindSequence(a, 1, []string{"abc"}, strings.Repeat("ab", 1024)+"c")
}

func BenchmarkFindSequence_Deterministic(b *testing.B) {
type testCase struct {
haystack []byte
needles [][]byte
}

type namedTestCase struct {
name string
cases []testCase
}

testCases := []namedTestCase{
{
name: "regular-cases",
cases: []testCase{
{bb("Hello, world!"), [][]byte{bb("orl")}},
{bb("some-k8s-service"), [][]byte{bb("k8s")}},
},
},
{
name: "corner-cases",
cases: []testCase{
{bb(strings.Repeat("ab", 32) + "c"), [][]byte{bb("abc")}},
{bb(strings.Repeat("ab", 64) + "c"), [][]byte{bb("abc")}},
{bb(strings.Repeat("ab", 1024) + "c"), [][]byte{bb("abc")}},
{bb(strings.Repeat("ab", 16384) + "c"), [][]byte{bb("abc")}},
},
},
}

for _, tc := range testCases {
for i, c := range tc.cases {
b.Run(tc.name+"-"+strconv.Itoa(i), func(b *testing.B) {
for b.Loop() {
findSequence([]byte(c.haystack), c.needles)
}
})
}
}
}

func BenchmarkFindSequence_Random(b *testing.B) {
sizes := []struct {
name string
haystackSize int
needleSize int
needleCount int
}{
{"tiny", 64, 3, 2},
{"small", 256, 10, 3},
{"medium", 1024, 50, 5},
{"large", 16384, 200, 10},
{"extra-large", 1048576, 1024, 100},
}

for _, size := range sizes {
b.Run(size.name, func(b *testing.B) {
haystack, needles := generateTestData(
size.haystackSize, size.needleSize, size.needleCount, 256,
)
b.ResetTimer()
for b.Loop() {
findSequence(haystack, needles)
b.SetBytes(int64(len(haystack)))
}
})
}
}

func generateTestData(haystackSize, needleSize, needleCount, charset int) ([]byte, [][]byte) {
haystack := generateRandomBytes(haystackSize, charset)

needles := make([][]byte, needleCount)
for i := range needleCount {
pattern := generateRandomBytes(needleSize, charset)
pos := rand.Intn(len(haystack) - needleSize)
copy(haystack[pos:], pattern)
needles[i] = pattern
}

return haystack, needles
}

func generateRandomBytes(size, charset int) []byte {
b := make([]byte, size)
for i := range b {
b[i] = byte(rand.Intn(charset))
}
return b
}

func bb(s string) []byte {
return []byte(s)
}
59 changes: 0 additions & 59 deletions pattern/substring.go

This file was deleted.

99 changes: 0 additions & 99 deletions pattern/substring_test.go

This file was deleted.

Loading