golang/src/strings/replace_test.go

584 lines
14 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
"fmt"
. "strings"
"testing"
)
var htmlEscaper = NewReplacer(
"&", "&",
"<", "&lt;",
">", "&gt;",
`"`, "&quot;",
"'", "&apos;",
)
var htmlUnescaper = NewReplacer(
"&amp;", "&",
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&apos;", "'",
)
// The http package's old HTML escaping function.
func oldHTMLEscape(s string) string {
s = Replace(s, "&", "&amp;", -1)
s = Replace(s, "<", "&lt;", -1)
s = Replace(s, ">", "&gt;", -1)
s = Replace(s, `"`, "&quot;", -1)
s = Replace(s, "'", "&apos;", -1)
return s
}
var capitalLetters = NewReplacer("a", "A", "b", "B")
// TestReplacer tests the replacer implementations.
func TestReplacer(t *testing.T) {
type testCase struct {
r *Replacer
in, out string
}
var testCases []testCase
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
str := func(b byte) string {
return string([]byte{b})
}
var s []string
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
s = nil
for i := 0; i < 256; i++ {
s = append(s, str(byte(i)), str(byte(i+1)))
}
inc := NewReplacer(s...)
// Test cases with 1-byte old strings, 1-byte new strings.
testCases = append(testCases,
testCase{capitalLetters, "brad", "BrAd"},
testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
testCase{capitalLetters, "", ""},
testCase{inc, "brad", "csbe"},
testCase{inc, "\x00\xff", "\x01\x00"},
testCase{inc, "", ""},
testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
)
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
s = nil
for i := 0; i < 256; i++ {
n := i + 1 - 'a'
if n < 1 {
n = 1
}
s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
}
repeat := NewReplacer(s...)
// Test cases with 1-byte old strings, variable length new strings.
testCases = append(testCases,
testCase{htmlEscaper, "No changes", "No changes"},
testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
testCase{htmlEscaper, "", ""},
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
testCase{repeat, "abba", "abbbba"},
testCase{repeat, "", ""},
testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
)
// The remaining test cases have variable length old strings.
testCases = append(testCases,
testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
testCase{htmlUnescaper, "", ""},
testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
)
// gen1 has multiple old strings of variable length. There is no
// overall non-empty common prefix, but some pairwise common prefixes.
gen1 := NewReplacer(
"aaa", "3[aaa]",
"aa", "2[aa]",
"a", "1[a]",
"i", "i",
"longerst", "most long",
"longer", "medium",
"long", "short",
"xx", "xx",
"x", "X",
"X", "Y",
"Y", "Z",
)
testCases = append(testCases,
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
testCase{gen1, "xxxxx", "xxxxX"},
testCase{gen1, "XiX", "YiY"},
testCase{gen1, "", ""},
)
// gen2 has multiple old strings with no pairwise common prefix.
gen2 := NewReplacer(
"roses", "red",
"violets", "blue",
"sugar", "sweet",
)
testCases = append(testCases,
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
testCase{gen2, "", ""},
)
// gen3 has multiple old strings with an overall common prefix.
gen3 := NewReplacer(
"abracadabra", "poof",
"abracadabrakazam", "splat",
"abraham", "lincoln",
"abrasion", "scrape",
"abraham", "isaac",
)
testCases = append(testCases,
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
testCase{gen3, "abrasion abracad", "scrape abracad"},
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
testCase{gen3, "", ""},
)
// foo{1,2,3,4} have multiple old strings with an overall common prefix
// and 1- or 2- byte extensions from the common prefix.
foo1 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo3", "C",
)
foo2 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo31", "C",
"foo32", "D",
)
foo3 := NewReplacer(
"foo11", "A",
"foo12", "B",
"foo31", "C",
"foo32", "D",
)
foo4 := NewReplacer(
"foo12", "B",
"foo32", "D",
)
testCases = append(testCases,
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
testCase{foo1, "", ""},
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
testCase{foo2, "", ""},
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo3, "", ""},
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo4, "", ""},
)
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
allBytes := make([]byte, 256)
for i := range allBytes {
allBytes[i] = byte(i)
}
allString := string(allBytes)
genAll := NewReplacer(
allString, "[all]",
"\xff", "[ff]",
"\x00", "[00]",
)
testCases = append(testCases,
testCase{genAll, allString, "[all]"},
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
testCase{genAll, "", ""},
)
// Test cases with empty old strings.
blankToX1 := NewReplacer("", "X")
blankToX2 := NewReplacer("", "X", "", "")
blankHighPriority := NewReplacer("", "X", "o", "O")
blankLowPriority := NewReplacer("o", "O", "", "X")
blankNoOp1 := NewReplacer("", "")
blankNoOp2 := NewReplacer("", "", "", "A")
blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
testCases = append(testCases,
testCase{blankToX1, "foo", "XfXoXoX"},
testCase{blankToX1, "", "X"},
testCase{blankToX2, "foo", "XfXoXoX"},
testCase{blankToX2, "", "X"},
testCase{blankHighPriority, "oo", "XOXOX"},
testCase{blankHighPriority, "ii", "XiXiX"},
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
testCase{blankHighPriority, "", "X"},
testCase{blankLowPriority, "oo", "OOX"},
testCase{blankLowPriority, "ii", "XiXiX"},
testCase{blankLowPriority, "oiio", "OXiXiOX"},
testCase{blankLowPriority, "iooi", "XiOOXiX"},
testCase{blankLowPriority, "", "X"},
testCase{blankNoOp1, "foo", "foo"},
testCase{blankNoOp1, "", ""},
testCase{blankNoOp2, "foo", "foo"},
testCase{blankNoOp2, "", ""},
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
testCase{blankFoo, "", "X"},
)
// single string replacer
abcMatcher := NewReplacer("abc", "[match]")
testCases = append(testCases,
testCase{abcMatcher, "", ""},
testCase{abcMatcher, "ab", "ab"},
testCase{abcMatcher, "abc", "[match]"},
testCase{abcMatcher, "abcd", "[match]d"},
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
)
// Issue 6659 cases (more single string replacer)
noHello := NewReplacer("Hello", "")
testCases = append(testCases,
testCase{noHello, "Hello", ""},
testCase{noHello, "Hellox", "x"},
testCase{noHello, "xHello", "x"},
testCase{noHello, "xHellox", "xx"},
)
// No-arg test cases.
nop := NewReplacer()
testCases = append(testCases,
testCase{nop, "abc", "abc"},
testCase{nop, "", ""},
)
// Run the test cases.
for i, tc := range testCases {
if s := tc.r.Replace(tc.in); s != tc.out {
t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
}
var buf bytes.Buffer
n, err := tc.r.WriteString(&buf, tc.in)
if err != nil {
t.Errorf("%d. WriteString: %v", i, err)
continue
}
got := buf.String()
if got != tc.out {
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
continue
}
if n != len(tc.out) {
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
i, tc.in, n, len(tc.out), tc.out)
}
}
}
var algorithmTestCases = []struct {
r *Replacer
want string
}{
{capitalLetters, "*strings.byteReplacer"},
{htmlEscaper, "*strings.byteStringReplacer"},
{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
{NewReplacer("", "X"), "*strings.genericReplacer"},
{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
}
// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
func TestPickAlgorithm(t *testing.T) {
for i, tc := range algorithmTestCases {
got := fmt.Sprintf("%T", tc.r.Replacer())
if got != tc.want {
t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
}
}
}
type errWriter struct{}
func (errWriter) Write(p []byte) (n int, err error) {
return 0, fmt.Errorf("unwritable")
}
// TestWriteStringError tests that WriteString returns an error
// received from the underlying io.Writer.
func TestWriteStringError(t *testing.T) {
for i, tc := range algorithmTestCases {
n, err := tc.r.WriteString(errWriter{}, "abc")
if n != 0 || err == nil || err.Error() != "unwritable" {
t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
}
}
}
// TestGenericTrieBuilding verifies the structure of the generated trie. There
// is one node per line, and the key ending with the current line is in the
// trie if it ends with a "+".
func TestGenericTrieBuilding(t *testing.T) {
testCases := []struct{ in, out string }{
{"abc;abdef;abdefgh;xx;xy;z", `-
a-
.b-
..c+
..d-
...ef+
.....gh+
x-
.x+
.y+
z+
`},
{"abracadabra;abracadabrakazam;abraham;abrasion", `-
a-
.bra-
....c-
.....adabra+
...........kazam+
....h-
.....am+
....s-
.....ion+
`},
{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
X+
Y+
a+
.a+
..a+
i+
l-
.ong+
....er+
......st+
x+
.x+
`},
{"foo;;foo;foo1", `+
f-
.oo+
...1+
`},
}
for _, tc := range testCases {
keys := Split(tc.in, ";")
args := make([]string, len(keys)*2)
for i, key := range keys {
args[i*2] = key
}
got := NewReplacer(args...).PrintTrie()
// Remove tabs from tc.out
wantbuf := make([]byte, 0, len(tc.out))
for i := 0; i < len(tc.out); i++ {
if tc.out[i] != '\t' {
wantbuf = append(wantbuf, tc.out[i])
}
}
want := string(wantbuf)
if got != want {
t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
}
}
}
func BenchmarkGenericNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch1(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123")
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch2(b *testing.B) {
str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
for i := 0; i < b.N; i++ {
htmlUnescaper.Replace(str)
}
}
func benchmarkSingleString(b *testing.B, pattern, text string) {
r := NewReplacer(pattern, "[match]")
b.SetBytes(int64(len(text)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.Replace(text)
}
}
func BenchmarkSingleMaxSkipping(b *testing.B) {
benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
}
func BenchmarkSingleLongSuffixFail(b *testing.B) {
benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
}
func BenchmarkSingleMatch(b *testing.B) {
benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
}
func BenchmarkByteByteNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteByteMatch(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteStringMatch(b *testing.B) {
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeNew(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeOld(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
oldHTMLEscape(str)
}
}
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
str := Repeat("I <3 to escape HTML & other text too.", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
htmlEscaper.WriteString(buf, str)
buf.Reset()
}
}
func BenchmarkByteReplacerWriteString(b *testing.B) {
str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
capitalLetters.WriteString(buf, str)
buf.Reset()
}
}
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
Replace(Replace(str, "a", "A", -1), "b", "B", -1)
}
}
// BenchmarkByteByteMap compares byteByteImpl against Map.
func BenchmarkByteByteMap(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
fn := func(r rune) rune {
switch r {
case 'a':
return 'A'
case 'b':
return 'B'
}
return r
}
for i := 0; i < b.N; i++ {
Map(fn, str)
}
}
var mapdata = []struct{ name, data string }{
{"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
{"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
}
func BenchmarkMap(b *testing.B) {
mapidentity := func(r rune) rune {
return r
}
b.Run("identity", func(b *testing.B) {
for _, md := range mapdata {
b.Run(md.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
Map(mapidentity, md.data)
}
})
}
})
mapchange := func(r rune) rune {
if 'a' <= r && r <= 'z' {
return r + 'A' - 'a'
}
if 'α' <= r && r <= 'ω' {
return r + 'Α' - 'α'
}
return r
}
b.Run("change", func(b *testing.B) {
for _, md := range mapdata {
b.Run(md.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
Map(mapchange, md.data)
}
})
}
})
}