golang/src/strings/strings_test.go

2089 lines
53 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
"fmt"
"io"
"math"
"math/rand"
"reflect"
"strconv"
. "strings"
"testing"
"unicode"
"unicode/utf8"
"unsafe"
)
func eq(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
var abcd = "abcd"
var faces = "☺☻☹"
var commas = "1,2,3,4"
var dots = "1....2....3....4"
type IndexTest struct {
s string
sep string
out int
}
var indexTests = []IndexTest{
{"", "", 0},
{"", "a", -1},
{"", "foo", -1},
{"fo", "foo", -1},
{"foo", "foo", 0},
{"oofofoofooo", "f", 2},
{"oofofoofooo", "foo", 4},
{"barfoobarfoo", "foo", 3},
{"foo", "", 0},
{"foo", "o", 1},
{"abcABCabc", "A", 3},
{"jrzm6jjhorimglljrea4w3rlgosts0w2gia17hno2td4qd1jz", "jz", 47},
{"ekkuk5oft4eq0ocpacknhwouic1uua46unx12l37nioq9wbpnocqks6", "ks6", 52},
{"999f2xmimunbuyew5vrkla9cpwhmxan8o98ec", "98ec", 33},
{"9lpt9r98i04k8bz6c6dsrthb96bhi", "96bhi", 24},
{"55u558eqfaod2r2gu42xxsu631xf0zobs5840vl", "5840vl", 33},
// cases with one byte strings - test special case in Index()
{"", "a", -1},
{"x", "a", -1},
{"x", "x", 0},
{"abc", "a", 0},
{"abc", "b", 1},
{"abc", "c", 2},
{"abc", "x", -1},
// test special cases in Index() for short strings
{"", "ab", -1},
{"bc", "ab", -1},
{"ab", "ab", 0},
{"xab", "ab", 1},
{"xab"[:2], "ab", -1},
{"", "abc", -1},
{"xbc", "abc", -1},
{"abc", "abc", 0},
{"xabc", "abc", 1},
{"xabc"[:3], "abc", -1},
{"xabxc", "abc", -1},
{"", "abcd", -1},
{"xbcd", "abcd", -1},
{"abcd", "abcd", 0},
{"xabcd", "abcd", 1},
{"xyabcd"[:5], "abcd", -1},
{"xbcqq", "abcqq", -1},
{"abcqq", "abcqq", 0},
{"xabcqq", "abcqq", 1},
{"xyabcqq"[:6], "abcqq", -1},
{"xabxcqq", "abcqq", -1},
{"xabcqxq", "abcqq", -1},
{"", "01234567", -1},
{"32145678", "01234567", -1},
{"01234567", "01234567", 0},
{"x01234567", "01234567", 1},
{"x0123456x01234567", "01234567", 9},
{"xx01234567"[:9], "01234567", -1},
{"", "0123456789", -1},
{"3214567844", "0123456789", -1},
{"0123456789", "0123456789", 0},
{"x0123456789", "0123456789", 1},
{"x012345678x0123456789", "0123456789", 11},
{"xyz0123456789"[:12], "0123456789", -1},
{"x01234567x89", "0123456789", -1},
{"", "0123456789012345", -1},
{"3214567889012345", "0123456789012345", -1},
{"0123456789012345", "0123456789012345", 0},
{"x0123456789012345", "0123456789012345", 1},
{"x012345678901234x0123456789012345", "0123456789012345", 17},
{"", "01234567890123456789", -1},
{"32145678890123456789", "01234567890123456789", -1},
{"01234567890123456789", "01234567890123456789", 0},
{"x01234567890123456789", "01234567890123456789", 1},
{"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
{"", "0123456789012345678901234567890", -1},
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
{"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
{"", "01234567890123456789012345678901", -1},
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
{"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
{"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
{"", "0123456789012345678901234567890123456789", -1},
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
// test fallback to Rabin-Karp.
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
}
var lastIndexTests = []IndexTest{
{"", "", 0},
{"", "a", -1},
{"", "foo", -1},
{"fo", "foo", -1},
{"foo", "foo", 0},
{"foo", "f", 0},
{"oofofoofooo", "f", 7},
{"oofofoofooo", "foo", 7},
{"barfoobarfoo", "foo", 9},
{"foo", "", 3},
{"foo", "o", 2},
{"abcABCabc", "A", 3},
{"abcABCabc", "a", 6},
}
var indexAnyTests = []IndexTest{
{"", "", -1},
{"", "a", -1},
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
{"\x80", "\xffb", 0},
{"aaa", "a", 0},
{"abc", "xyz", -1},
{"abc", "xcz", 2},
{"ab☺c", "x☺yz", 2},
{"a☺b☻c☹d", "cx", len("a☺b☻")},
{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"aRegExp*", ".(|)*+?^$[]", 7},
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 4},
{"012\x80bcb\x80210", "\xffb", 3},
{"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
var lastIndexAnyTests = []IndexTest{
{"", "", -1},
{"", "a", -1},
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
{"\x80", "\xffb", 0},
{"aaa", "a", 2},
{"abc", "xyz", -1},
{"abc", "ab", 1},
{"ab☺c", "x☺yz", 2},
{"a☺b☻c☹d", "cx", len("a☺b☻")},
{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"a.RegExp*", ".(|)*+?^$[]", 8},
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 6},
{"012\x80bcb\x80210", "\xffb", 7},
{"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
for _, test := range testCases {
actual := f(test.s, test.sep)
if actual != test.out {
t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
}
}
}
func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
func TestLastIndexAny(t *testing.T) {
runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests)
}
func TestIndexByte(t *testing.T) {
for _, tt := range indexTests {
if len(tt.sep) != 1 {
continue
}
pos := IndexByte(tt.s, tt.sep[0])
if pos != tt.out {
t.Errorf(`IndexByte(%q, %q) = %v; want %v`, tt.s, tt.sep[0], pos, tt.out)
}
}
}
func TestLastIndexByte(t *testing.T) {
testCases := []IndexTest{
{"", "q", -1},
{"abcdef", "q", -1},
{"abcdefabcdef", "a", len("abcdef")}, // something in the middle
{"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
{"zabcdefabcdef", "z", 0}, // first byte
{"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
}
for _, test := range testCases {
actual := LastIndexByte(test.s, test.sep[0])
if actual != test.out {
t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
}
}
}
func simpleIndex(s, sep string) int {
n := len(sep)
for i := n; i <= len(s); i++ {
if s[i-n:i] == sep {
return i - n
}
}
return -1
}
func TestIndexRandom(t *testing.T) {
const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
for times := 0; times < 10; times++ {
for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
s1 := make([]byte, strLen)
for i := range s1 {
s1[i] = chars[rand.Intn(len(chars))]
}
s := string(s1)
for i := 0; i < 50; i++ {
begin := rand.Intn(len(s) + 1)
end := begin + rand.Intn(len(s)+1-begin)
sep := s[begin:end]
if i%4 == 0 {
pos := rand.Intn(len(sep) + 1)
sep = sep[:pos] + "A" + sep[pos:]
}
want := simpleIndex(s, sep)
res := Index(s, sep)
if res != want {
t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
}
}
}
}
}
func TestIndexRune(t *testing.T) {
tests := []struct {
in string
rune rune
want int
}{
{"", 'a', -1},
{"", '☺', -1},
{"foo", '☹', -1},
{"foo", 'o', 1},
{"foo☺bar", '☺', 3},
{"foo☺☻☹bar", '☹', 9},
{"a A x", 'A', 2},
{"some_text=some_value", '=', 9},
{"☺a", 'a', 3},
{"a☻☺b", '☺', 4},
// RuneError should match any invalid UTF-8 byte sequence.
{"<22>", '<27>', 0},
{"\xff", '<27>', 0},
{"☻x<E298BB>", '<27>', len("☻x")},
{"☻x\xe2\x98", '<27>', len("☻x")},
{"☻x\xe2\x98<39>", '<27>', len("☻x")},
{"☻x\xe2\x98x", '<27>', len("☻x")},
// Invalid rune values should never match.
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", -1, -1},
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
{"a☺b☻c☹d\xe2\x98<39>\xff<66>\xed\xa0\x80", utf8.MaxRune + 1, -1},
}
for _, tt := range tests {
if got := IndexRune(tt.in, tt.rune); got != tt.want {
t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
}
}
haystack := "test世界"
allocs := testing.AllocsPerRun(1000, func() {
if i := IndexRune(haystack, 's'); i != 2 {
t.Fatalf("'s' at %d; want 2", i)
}
if i := IndexRune(haystack, '世'); i != 4 {
t.Fatalf("'世' at %d; want 4", i)
}
})
if allocs != 0 && testing.CoverMode() == "" {
t.Errorf("expected no allocations, got %f", allocs)
}
}
const benchmarkString = "some_text=some☺value"
func BenchmarkIndexRune(b *testing.B) {
if got := IndexRune(benchmarkString, '☺'); got != 14 {
b.Fatalf("wrong index: expected 14, got=%d", got)
}
for i := 0; i < b.N; i++ {
IndexRune(benchmarkString, '☺')
}
}
var benchmarkLongString = Repeat(" ", 100) + benchmarkString
func BenchmarkIndexRuneLongString(b *testing.B) {
if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
b.Fatalf("wrong index: expected 114, got=%d", got)
}
for i := 0; i < b.N; i++ {
IndexRune(benchmarkLongString, '☺')
}
}
func BenchmarkIndexRuneFastPath(b *testing.B) {
if got := IndexRune(benchmarkString, 'v'); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
}
for i := 0; i < b.N; i++ {
IndexRune(benchmarkString, 'v')
}
}
func BenchmarkIndex(b *testing.B) {
if got := Index(benchmarkString, "v"); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
}
for i := 0; i < b.N; i++ {
Index(benchmarkString, "v")
}
}
func BenchmarkLastIndex(b *testing.B) {
if got := Index(benchmarkString, "v"); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
}
for i := 0; i < b.N; i++ {
LastIndex(benchmarkString, "v")
}
}
func BenchmarkIndexByte(b *testing.B) {
if got := IndexByte(benchmarkString, 'v'); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
}
for i := 0; i < b.N; i++ {
IndexByte(benchmarkString, 'v')
}
}
type SplitTest struct {
s string
sep string
n int
a []string
}
var splittests = []SplitTest{
{"", "", -1, []string{}},
{abcd, "", 2, []string{"a", "bcd"}},
{abcd, "", 4, []string{"a", "b", "c", "d"}},
{abcd, "", -1, []string{"a", "b", "c", "d"}},
{faces, "", -1, []string{"☺", "☻", "☹"}},
{faces, "", 3, []string{"☺", "☻", "☹"}},
{faces, "", 17, []string{"☺", "☻", "☹"}},
{"☺<>☹", "", -1, []string{"☺", "<22>", "☹"}},
{abcd, "a", 0, nil},
{abcd, "a", -1, []string{"", "bcd"}},
{abcd, "z", -1, []string{"abcd"}},
{commas, ",", -1, []string{"1", "2", "3", "4"}},
{dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
{faces, "☹", -1, []string{"☺☻", ""}},
{faces, "~", -1, []string{faces}},
{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
{"1 2", " ", 3, []string{"1", "2"}},
{"", "T", math.MaxInt / 4, []string{""}},
{"\xff-\xff", "", -1, []string{"\xff", "-", "\xff"}},
{"\xff-\xff", "-", -1, []string{"\xff", "\xff"}},
}
func TestSplit(t *testing.T) {
for _, tt := range splittests {
a := SplitN(tt.s, tt.sep, tt.n)
if !eq(a, tt.a) {
t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
continue
}
if tt.n == 0 {
continue
}
s := Join(a, tt.sep)
if s != tt.s {
t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
}
if tt.n < 0 {
b := Split(tt.s, tt.sep)
if !reflect.DeepEqual(a, b) {
t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
}
}
}
}
var splitaftertests = []SplitTest{
{abcd, "a", -1, []string{"a", "bcd"}},
{abcd, "z", -1, []string{"abcd"}},
{abcd, "", -1, []string{"a", "b", "c", "d"}},
{commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
{dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
{faces, "☹", -1, []string{"☺☻☹", ""}},
{faces, "~", -1, []string{faces}},
{faces, "", -1, []string{"☺", "☻", "☹"}},
{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
{"1 2", " ", 3, []string{"1 ", "2"}},
{"123", "", 2, []string{"1", "23"}},
{"123", "", 17, []string{"1", "2", "3"}},
}
func TestSplitAfter(t *testing.T) {
for _, tt := range splitaftertests {
a := SplitAfterN(tt.s, tt.sep, tt.n)
if !eq(a, tt.a) {
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
continue
}
s := Join(a, "")
if s != tt.s {
t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
}
if tt.n < 0 {
b := SplitAfter(tt.s, tt.sep)
if !reflect.DeepEqual(a, b) {
t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
}
}
}
}
type FieldsTest struct {
s string
a []string
}
var fieldstests = []FieldsTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
{"\u2000", []string{}},
{" abc ", []string{"abc"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1 2 3 4", []string{"1", "2", "3", "4"}},
{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
{"\u2000\u2001\u2002", []string{}},
{"\n™\t™\n", []string{"™", "™"}},
{"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
{"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
{"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
{faces, []string{faces}},
}
func TestFields(t *testing.T) {
for _, tt := range fieldstests {
a := Fields(tt.s)
if !eq(a, tt.a) {
t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
continue
}
}
}
var FieldsFuncTests = []FieldsTest{
{"", []string{}},
{"XX", []string{}},
{"XXhiXXX", []string{"hi"}},
{"aXXbXXXcX", []string{"a", "b", "c"}},
}
func TestFieldsFunc(t *testing.T) {
for _, tt := range fieldstests {
a := FieldsFunc(tt.s, unicode.IsSpace)
if !eq(a, tt.a) {
t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
continue
}
}
pred := func(c rune) bool { return c == 'X' }
for _, tt := range FieldsFuncTests {
a := FieldsFunc(tt.s, pred)
if !eq(a, tt.a) {
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
}
}
}
// Test case for any function which accepts and returns a single string.
type StringTest struct {
in, out string
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
for _, tc := range testCases {
actual := f(tc.in)
if actual != tc.out {
t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
}
}
}
var upperTests = []StringTest{
{"", ""},
{"ONLYUPPER", "ONLYUPPER"},
{"abc", "ABC"},
{"AbC123", "ABC123"},
{"azAZ09_", "AZAZ09_"},
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
{"RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIAIDSJJAIDIJADSJIADJIOOKKO"},
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
}
var lowerTests = []StringTest{
{"", ""},
{"abc", "abc"},
{"AbC123", "abc123"},
{"azAZ09_", "azaz09_"},
{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
{"renan bastos 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "renan bastos 93 aosdajdjaidjaidajiaidsjjaidijadsjiadjiookko"},
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
}
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
var trimSpaceTests = []StringTest{
{"", ""},
{"abc", "abc"},
{space + "abc" + space, "abc"},
{" ", ""},
{" \t\r\n \t\t\r\r\n\n ", ""},
{" \t\r\n x\t\t\r\r\n\n ", "x"},
{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
{"1 \t\r\n2", "1 \t\r\n2"},
{" x\x80", "x\x80"},
{" x\xc0", "x\xc0"},
{"x \xc0\xc0 ", "x \xc0\xc0"},
{"x \xc0", "x \xc0"},
{"x \xc0 ", "x \xc0"},
{"x \xc0\xc0 ", "x \xc0\xc0"},
{"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
{"x ☺ ", "x ☺"},
}
func tenRunes(ch rune) string {
r := make([]rune, 10)
for i := range r {
r[i] = ch
}
return string(r)
}
// User-defined self-inverse mapping function
func rot13(r rune) rune {
step := rune(13)
if r >= 'a' && r <= 'z' {
return ((r - 'a' + step) % 26) + 'a'
}
if r >= 'A' && r <= 'Z' {
return ((r - 'A' + step) % 26) + 'A'
}
return r
}
func TestMap(t *testing.T) {
// Run a couple of awful growth/shrinkage tests
a := tenRunes('a')
// 1. Grow. This triggers two reallocations in Map.
maxRune := func(rune) rune { return unicode.MaxRune }
m := Map(maxRune, a)
expect := tenRunes(unicode.MaxRune)
if m != expect {
t.Errorf("growing: expected %q got %q", expect, m)
}
// 2. Shrink
minRune := func(rune) rune { return 'a' }
m = Map(minRune, tenRunes(unicode.MaxRune))
expect = a
if m != expect {
t.Errorf("shrinking: expected %q got %q", expect, m)
}
// 3. Rot13
m = Map(rot13, "a to zed")
expect = "n gb mrq"
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 4. Rot13^2
m = Map(rot13, Map(rot13, "a to zed"))
expect = "a to zed"
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 5. Drop
dropNotLatin := func(r rune) rune {
if unicode.Is(unicode.Latin, r) {
return r
}
return -1
}
m = Map(dropNotLatin, "Hello, 세계")
expect = "Hello"
if m != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
// 6. Identity
identity := func(r rune) rune {
return r
}
orig := "Input string that we expect not to be copied."
m = Map(identity, orig)
if unsafe.StringData(orig) != unsafe.StringData(m) {
t.Error("unexpected copy during identity map")
}
// 7. Handle invalid UTF-8 sequence
replaceNotLatin := func(r rune) rune {
if unicode.Is(unicode.Latin, r) {
return r
}
return utf8.RuneError
}
m = Map(replaceNotLatin, "Hello\255World")
expect = "Hello\uFFFDWorld"
if m != expect {
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
}
// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
encode := func(r rune) rune {
switch r {
case utf8.RuneSelf:
return unicode.MaxRune
case unicode.MaxRune:
return utf8.RuneSelf
}
return r
}
s := string(rune(utf8.RuneSelf)) + string(utf8.MaxRune)
r := string(utf8.MaxRune) + string(rune(utf8.RuneSelf)) // reverse of s
m = Map(encode, s)
if m != r {
t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
}
m = Map(encode, r)
if m != s {
t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
}
// 9. Check mapping occurs in the front, middle and back
trimSpaces := func(r rune) rune {
if unicode.IsSpace(r) {
return -1
}
return r
}
m = Map(trimSpaces, " abc 123 ")
expect = "abc123"
if m != expect {
t.Errorf("trimSpaces: expected %q got %q", expect, m)
}
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
var toValidUTF8Tests = []struct {
in string
repl string
out string
}{
{"", "\uFFFD", ""},
{"abc", "\uFFFD", "abc"},
{"\uFDDD", "\uFFFD", "\uFDDD"},
{"a\xffb", "\uFFFD", "a\uFFFDb"},
{"a\xffb\uFFFD", "X", "aXb\uFFFD"},
{"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
{"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
{"\xC0\xAF", "\uFFFD", "\uFFFD"},
{"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
{"\xed\xa0\x80", "abc", "abc"},
{"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
{"\xF0\x80\x80\xaf", "☺", "☺"},
{"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
{"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
}
func TestToValidUTF8(t *testing.T) {
for _, tc := range toValidUTF8Tests {
got := ToValidUTF8(tc.in, tc.repl)
if got != tc.out {
t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
}
}
}
func BenchmarkToUpper(b *testing.B) {
for _, tc := range upperTests {
b.Run(tc.in, func(b *testing.B) {
for i := 0; i < b.N; i++ {
actual := ToUpper(tc.in)
if actual != tc.out {
b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
}
}
})
}
}
func BenchmarkToLower(b *testing.B) {
for _, tc := range lowerTests {
b.Run(tc.in, func(b *testing.B) {
for i := 0; i < b.N; i++ {
actual := ToLower(tc.in)
if actual != tc.out {
b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
}
}
})
}
}
func BenchmarkMapNoChanges(b *testing.B) {
identity := func(r rune) rune {
return r
}
for i := 0; i < b.N; i++ {
Map(identity, "Some string that won't be modified.")
}
}
func TestSpecialCase(t *testing.T) {
lower := "abcçdefgğhıijklmnoöprsştuüvyz"
upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
u := ToUpperSpecial(unicode.TurkishCase, upper)
if u != upper {
t.Errorf("Upper(upper) is %s not %s", u, upper)
}
u = ToUpperSpecial(unicode.TurkishCase, lower)
if u != upper {
t.Errorf("Upper(lower) is %s not %s", u, upper)
}
l := ToLowerSpecial(unicode.TurkishCase, lower)
if l != lower {
t.Errorf("Lower(lower) is %s not %s", l, lower)
}
l = ToLowerSpecial(unicode.TurkishCase, upper)
if l != lower {
t.Errorf("Lower(upper) is %s not %s", l, lower)
}
}
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
var trimTests = []struct {
f string
in, arg, out string
}{
{"Trim", "abba", "a", "bb"},
{"Trim", "abba", "ab", ""},
{"TrimLeft", "abba", "ab", ""},
{"TrimRight", "abba", "ab", ""},
{"TrimLeft", "abba", "a", "bba"},
{"TrimLeft", "abba", "b", "abba"},
{"TrimRight", "abba", "a", "abb"},
{"TrimRight", "abba", "b", "abba"},
{"Trim", "<tag>", "<>", "tag"},
{"Trim", "* listitem", " *", "listitem"},
{"Trim", `"quote"`, `"`, "quote"},
{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
{"Trim", "\x80test\xff", "\xff", "test"},
{"Trim", " Ġ ", " ", "Ġ"},
{"Trim", " Ġİ0", "0 ", "Ġİ"},
//empty string tests
{"Trim", "abba", "", "abba"},
{"Trim", "", "123", ""},
{"Trim", "", "", ""},
{"TrimLeft", "abba", "", "abba"},
{"TrimLeft", "", "123", ""},
{"TrimLeft", "", "", ""},
{"TrimRight", "abba", "", "abba"},
{"TrimRight", "", "123", ""},
{"TrimRight", "", "", ""},
{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
{"TrimPrefix", "aabb", "a", "abb"},
{"TrimPrefix", "aabb", "b", "aabb"},
{"TrimSuffix", "aabb", "a", "aabb"},
{"TrimSuffix", "aabb", "b", "aab"},
}
func TestTrim(t *testing.T) {
for _, tc := range trimTests {
name := tc.f
var f func(string, string) string
switch name {
case "Trim":
f = Trim
case "TrimLeft":
f = TrimLeft
case "TrimRight":
f = TrimRight
case "TrimPrefix":
f = TrimPrefix
case "TrimSuffix":
f = TrimSuffix
default:
t.Errorf("Undefined trim function %s", name)
}
actual := f(tc.in, tc.arg)
if actual != tc.out {
t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
}
}
}
func BenchmarkTrim(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, tc := range trimTests {
name := tc.f
var f func(string, string) string
switch name {
case "Trim":
f = Trim
case "TrimLeft":
f = TrimLeft
case "TrimRight":
f = TrimRight
case "TrimPrefix":
f = TrimPrefix
case "TrimSuffix":
f = TrimSuffix
default:
b.Errorf("Undefined trim function %s", name)
}
actual := f(tc.in, tc.arg)
if actual != tc.out {
b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
}
}
}
}
func BenchmarkToValidUTF8(b *testing.B) {
tests := []struct {
name string
input string
}{
{"Valid", "typical"},
{"InvalidASCII", "foo\xffbar"},
{"InvalidNonASCII", "日本語\xff日本語"},
}
replacement := "\uFFFD"
b.ResetTimer()
for _, test := range tests {
b.Run(test.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToValidUTF8(test.input, replacement)
}
})
}
}
type predicate struct {
f func(rune) bool
name string
}
var isSpace = predicate{unicode.IsSpace, "IsSpace"}
var isDigit = predicate{unicode.IsDigit, "IsDigit"}
var isUpper = predicate{unicode.IsUpper, "IsUpper"}
var isValidRune = predicate{
func(r rune) bool {
return r != utf8.RuneError
},
"IsValidRune",
}
func not(p predicate) predicate {
return predicate{
func(r rune) bool {
return !p.f(r)
},
"not " + p.name,
}
}
var trimFuncTests = []struct {
f predicate
in string
trimOut string
leftOut string
rightOut string
}{
{isSpace, space + " hello " + space,
"hello",
"hello " + space,
space + " hello"},
{isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
"hello",
"hello34\u0e50\u0e51",
"\u0e50\u0e5212hello"},
{isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
"hello",
"helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
"\u2C6F\u2C6F\u2C6F\u2C6FABCDhello"},
{not(isSpace), "hello" + space + "hello",
space,
space + "hello",
"hello" + space},
{not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
"\u0e50\u0e521234\u0e50\u0e51",
"\u0e50\u0e521234\u0e50\u0e51helo",
"hello\u0e50\u0e521234\u0e50\u0e51"},
{isValidRune, "ab\xc0a\xc0cd",
"\xc0a\xc0",
"\xc0a\xc0cd",
"ab\xc0a\xc0"},
{not(isValidRune), "\xc0a\xc0",
"a",
"a\xc0",
"\xc0a"},
{isSpace, "",
"",
"",
""},
{isSpace, " ",
"",
"",
""},
}
func TestTrimFunc(t *testing.T) {
for _, tc := range trimFuncTests {
trimmers := []struct {
name string
trim func(s string, f func(r rune) bool) string
out string
}{
{"TrimFunc", TrimFunc, tc.trimOut},
{"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
{"TrimRightFunc", TrimRightFunc, tc.rightOut},
}
for _, trimmer := range trimmers {
actual := trimmer.trim(tc.in, tc.f.f)
if actual != trimmer.out {
t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
}
}
}
}
var indexFuncTests = []struct {
in string
f predicate
first, last int
}{
{"", isValidRune, -1, -1},
{"abc", isDigit, -1, -1},
{"0123", isDigit, 0, 3},
{"a1b", isDigit, 1, 1},
{space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
{"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
{"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
{"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
// tests of invalid UTF-8
{"\x801", isDigit, 1, 1},
{"\x80abc", isDigit, -1, -1},
{"\xc0a\xc0", isValidRune, 1, 1},
{"\xc0a\xc0", not(isValidRune), 0, 2},
{"\xc0☺\xc0", not(isValidRune), 0, 4},
{"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
{"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
{"a\xe0\x80cd", not(isValidRune), 1, 2},
{"\x80\x80\x80\x80", not(isValidRune), 0, 3},
}
func TestIndexFunc(t *testing.T) {
for _, tc := range indexFuncTests {
first := IndexFunc(tc.in, tc.f.f)
if first != tc.first {
t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
}
last := LastIndexFunc(tc.in, tc.f.f)
if last != tc.last {
t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
}
}
}
func equal(m string, s1, s2 string, t *testing.T) bool {
if s1 == s2 {
return true
}
e1 := Split(s1, "")
e2 := Split(s2, "")
for i, c1 := range e1 {
if i >= len(e2) {
break
}
r1, _ := utf8.DecodeRuneInString(c1)
r2, _ := utf8.DecodeRuneInString(e2[i])
if r1 != r2 {
t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
}
}
return false
}
func TestCaseConsistency(t *testing.T) {
// Make a string of all the runes.
numRunes := int(unicode.MaxRune + 1)
if testing.Short() {
numRunes = 1000
}
a := make([]rune, numRunes)
for i := range a {
a[i] = rune(i)
}
s := string(a)
// convert the cases.
upper := ToUpper(s)
lower := ToLower(s)
// Consistency checks
if n := utf8.RuneCountInString(upper); n != numRunes {
t.Error("rune count wrong in upper:", n)
}
if n := utf8.RuneCountInString(lower); n != numRunes {
t.Error("rune count wrong in lower:", n)
}
if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
t.Error("ToUpper(upper) consistency fail")
}
if !equal("ToLower(lower)", ToLower(lower), lower, t) {
t.Error("ToLower(lower) consistency fail")
}
/*
These fail because of non-one-to-oneness of the data, such as multiple
upper case 'I' mapping to 'i'. We comment them out but keep them for
interest.
For instance: CAPITAL LETTER I WITH DOT ABOVE:
unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
t.Error("ToUpper(lower) consistency fail");
}
if !equal("ToLower(upper)", ToLower(upper), lower, t) {
t.Error("ToLower(upper) consistency fail");
}
*/
}
var longString = "a" + string(make([]byte, 1<<16)) + "z"
var longSpaces = func() string {
b := make([]byte, 200)
for i := range b {
b[i] = ' '
}
return string(b)
}()
var RepeatTests = []struct {
in, out string
count int
}{
{"", "", 0},
{"", "", 1},
{"", "", 2},
{"-", "", 0},
{"-", "-", 1},
{"-", "----------", 10},
{"abc ", "abc abc abc ", 3},
{" ", " ", 1},
{"--", "----", 2},
{"===", "======", 2},
{"000", "000000000", 3},
{"\t\t\t\t", "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", 4},
{" ", longSpaces, len(longSpaces)},
// Tests for results over the chunkLimit
{string(rune(0)), string(make([]byte, 1<<16)), 1 << 16},
{longString, longString + longString, 2},
}
func TestRepeat(t *testing.T) {
for _, tt := range RepeatTests {
a := Repeat(tt.in, tt.count)
if !equal("Repeat(s)", a, tt.out, t) {
t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
continue
}
}
}
func repeat(s string, count int) (err error) {
defer func() {
if r := recover(); r != nil {
switch v := r.(type) {
case error:
err = v
default:
err = fmt.Errorf("%s", v)
}
}
}()
Repeat(s, count)
return
}
// See Issue golang.org/issue/16237
func TestRepeatCatchesOverflow(t *testing.T) {
type testCase struct {
s string
count int
errStr string
}
runTestCases := func(prefix string, tests []testCase) {
for i, tt := range tests {
err := repeat(tt.s, tt.count)
if tt.errStr == "" {
if err != nil {
t.Errorf("#%d panicked %v", i, err)
}
continue
}
if err == nil || !Contains(err.Error(), tt.errStr) {
t.Errorf("%s#%d got %q want %q", prefix, i, err, tt.errStr)
}
}
}
const maxInt = int(^uint(0) >> 1)
runTestCases("", []testCase{
0: {"--", -2147483647, "negative"},
1: {"", maxInt, ""},
2: {"-", 10, ""},
3: {"gopher", 0, ""},
4: {"-", -1, "negative"},
5: {"--", -102, "negative"},
6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
})
const is64Bit = 1<<(^uintptr(0)>>63)/2 != 0
if !is64Bit {
return
}
runTestCases("64-bit", []testCase{
0: {"-", maxInt, "out of range"},
})
}
func runesEqual(a, b []rune) bool {
if len(a) != len(b) {
return false
}
for i, r := range a {
if r != b[i] {
return false
}
}
return true
}
var RunesTests = []struct {
in string
out []rune
lossy bool
}{
{"", []rune{}, false},
{" ", []rune{32}, false},
{"ABC", []rune{65, 66, 67}, false},
{"abc", []rune{97, 98, 99}, false},
{"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
{"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
{"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
}
func TestRunes(t *testing.T) {
for _, tt := range RunesTests {
a := []rune(tt.in)
if !runesEqual(a, tt.out) {
t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
continue
}
if !tt.lossy {
// can only test reassembly if we didn't lose information
s := string(a)
if s != tt.in {
t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
}
}
}
}
func TestReadByte(t *testing.T) {
testStrings := []string{"", abcd, faces, commas}
for _, s := range testStrings {
reader := NewReader(s)
if e := reader.UnreadByte(); e == nil {
t.Errorf("Unreading %q at beginning: expected error", s)
}
var res bytes.Buffer
for {
b, e := reader.ReadByte()
if e == io.EOF {
break
}
if e != nil {
t.Errorf("Reading %q: %s", s, e)
break
}
res.WriteByte(b)
// unread and read again
e = reader.UnreadByte()
if e != nil {
t.Errorf("Unreading %q: %s", s, e)
break
}
b1, e := reader.ReadByte()
if e != nil {
t.Errorf("Reading %q after unreading: %s", s, e)
break
}
if b1 != b {
t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
break
}
}
if res.String() != s {
t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
}
}
}
func TestReadRune(t *testing.T) {
testStrings := []string{"", abcd, faces, commas}
for _, s := range testStrings {
reader := NewReader(s)
if e := reader.UnreadRune(); e == nil {
t.Errorf("Unreading %q at beginning: expected error", s)
}
res := ""
for {
r, z, e := reader.ReadRune()
if e == io.EOF {
break
}
if e != nil {
t.Errorf("Reading %q: %s", s, e)
break
}
res += string(r)
// unread and read again
e = reader.UnreadRune()
if e != nil {
t.Errorf("Unreading %q: %s", s, e)
break
}
r1, z1, e := reader.ReadRune()
if e != nil {
t.Errorf("Reading %q after unreading: %s", s, e)
break
}
if r1 != r {
t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
break
}
if z1 != z {
t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
break
}
}
if res != s {
t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
}
}
}
var UnreadRuneErrorTests = []struct {
name string
f func(*Reader)
}{
{"Read", func(r *Reader) { r.Read([]byte{0}) }},
{"ReadByte", func(r *Reader) { r.ReadByte() }},
{"UnreadRune", func(r *Reader) { r.UnreadRune() }},
{"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
{"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
}
func TestUnreadRuneError(t *testing.T) {
for _, tt := range UnreadRuneErrorTests {
reader := NewReader("0123456789")
if _, _, err := reader.ReadRune(); err != nil {
// should not happen
t.Fatal(err)
}
tt.f(reader)
err := reader.UnreadRune()
if err == nil {
t.Errorf("Unreading after %s: expected error", tt.name)
}
}
}
var ReplaceTests = []struct {
in string
old, new string
n int
out string
}{
{"hello", "l", "L", 0, "hello"},
{"hello", "l", "L", -1, "heLLo"},
{"hello", "x", "X", -1, "hello"},
{"", "x", "X", -1, ""},
{"radar", "r", "<r>", -1, "<r>ada<r>"},
{"", "", "<>", -1, "<>"},
{"banana", "a", "<>", -1, "b<>n<>n<>"},
{"banana", "a", "<>", 1, "b<>nana"},
{"banana", "a", "<>", 1000, "b<>n<>n<>"},
{"banana", "an", "<>", -1, "b<><>a"},
{"banana", "ana", "<>", -1, "b<>na"},
{"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
{"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
{"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
{"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
{"banana", "", "<>", 1, "<>banana"},
{"banana", "a", "a", -1, "banana"},
{"banana", "a", "a", 1, "banana"},
{"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
}
func TestReplace(t *testing.T) {
for _, tt := range ReplaceTests {
if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
}
if tt.n == -1 {
s := ReplaceAll(tt.in, tt.old, tt.new)
if s != tt.out {
t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out)
}
}
}
}
var TitleTests = []struct {
in, out string
}{
{"", ""},
{"a", "A"},
{" aaa aaa aaa ", " Aaa Aaa Aaa "},
{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
{"123a456", "123a456"},
{"double-blind", "Double-Blind"},
{"ÿøû", "Ÿøû"},
{"with_underscore", "With_underscore"},
{"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
}
func TestTitle(t *testing.T) {
for _, tt := range TitleTests {
if s := Title(tt.in); s != tt.out {
t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
}
}
}
var ContainsTests = []struct {
str, substr string
expected bool
}{
{"abc", "bc", true},
{"abc", "bcd", false},
{"abc", "", true},
{"", "a", false},
// cases to cover code in runtime/asm_amd64.s:indexShortStr
// 2-byte needle
{"xxxxxx", "01", false},
{"01xxxx", "01", true},
{"xx01xx", "01", true},
{"xxxx01", "01", true},
{"01xxxxx"[1:], "01", false},
{"xxxxx01"[:6], "01", false},
// 3-byte needle
{"xxxxxxx", "012", false},
{"012xxxx", "012", true},
{"xx012xx", "012", true},
{"xxxx012", "012", true},
{"012xxxxx"[1:], "012", false},
{"xxxxx012"[:7], "012", false},
// 4-byte needle
{"xxxxxxxx", "0123", false},
{"0123xxxx", "0123", true},
{"xx0123xx", "0123", true},
{"xxxx0123", "0123", true},
{"0123xxxxx"[1:], "0123", false},
{"xxxxx0123"[:8], "0123", false},
// 5-7-byte needle
{"xxxxxxxxx", "01234", false},
{"01234xxxx", "01234", true},
{"xx01234xx", "01234", true},
{"xxxx01234", "01234", true},
{"01234xxxxx"[1:], "01234", false},
{"xxxxx01234"[:9], "01234", false},
// 8-byte needle
{"xxxxxxxxxxxx", "01234567", false},
{"01234567xxxx", "01234567", true},
{"xx01234567xx", "01234567", true},
{"xxxx01234567", "01234567", true},
{"01234567xxxxx"[1:], "01234567", false},
{"xxxxx01234567"[:12], "01234567", false},
// 9-15-byte needle
{"xxxxxxxxxxxxx", "012345678", false},
{"012345678xxxx", "012345678", true},
{"xx012345678xx", "012345678", true},
{"xxxx012345678", "012345678", true},
{"012345678xxxxx"[1:], "012345678", false},
{"xxxxx012345678"[:13], "012345678", false},
// 16-byte needle
{"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
{"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
{"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
{"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
{"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
{"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
// 17-31-byte needle
{"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
{"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
{"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
{"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
{"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
{"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},
// partial match cases
{"xx01x", "012", false}, // 3
{"xx0123x", "01234", false}, // 5-7
{"xx01234567x", "012345678", false}, // 9-15
{"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
}
func TestContains(t *testing.T) {
for _, ct := range ContainsTests {
if Contains(ct.str, ct.substr) != ct.expected {
t.Errorf("Contains(%s, %s) = %v, want %v",
ct.str, ct.substr, !ct.expected, ct.expected)
}
}
}
var ContainsAnyTests = []struct {
str, substr string
expected bool
}{
{"", "", false},
{"", "a", false},
{"", "abc", false},
{"a", "", false},
{"a", "a", true},
{"aaa", "a", true},
{"abc", "xyz", false},
{"abc", "xcz", true},
{"a☺b☻c☹d", "uvw☻xyz", true},
{"aRegExp*", ".(|)*+?^$[]", true},
{dots + dots + dots, " ", false},
}
func TestContainsAny(t *testing.T) {
for _, ct := range ContainsAnyTests {
if ContainsAny(ct.str, ct.substr) != ct.expected {
t.Errorf("ContainsAny(%s, %s) = %v, want %v",
ct.str, ct.substr, !ct.expected, ct.expected)
}
}
}
var ContainsRuneTests = []struct {
str string
r rune
expected bool
}{
{"", 'a', false},
{"a", 'a', true},
{"aaa", 'a', true},
{"abc", 'y', false},
{"abc", 'c', true},
{"a☺b☻c☹d", 'x', false},
{"a☺b☻c☹d", '☻', true},
{"aRegExp*", '*', true},
}
func TestContainsRune(t *testing.T) {
for _, ct := range ContainsRuneTests {
if ContainsRune(ct.str, ct.r) != ct.expected {
t.Errorf("ContainsRune(%q, %q) = %v, want %v",
ct.str, ct.r, !ct.expected, ct.expected)
}
}
}
func TestContainsFunc(t *testing.T) {
for _, ct := range ContainsRuneTests {
if ContainsFunc(ct.str, func(r rune) bool {
return ct.r == r
}) != ct.expected {
t.Errorf("ContainsFunc(%q, func(%q)) = %v, want %v",
ct.str, ct.r, !ct.expected, ct.expected)
}
}
}
var EqualFoldTests = []struct {
s, t string
out bool
}{
{"abc", "abc", true},
{"ABcd", "ABcd", true},
{"123abc", "123ABC", true},
{"αβδ", "ΑΒΔ", true},
{"abc", "xyz", false},
{"abc", "XYZ", false},
{"abcdefghijk", "abcdefghijX", false},
{"abcdefghijk", "abcdefghij\u212A", true},
{"abcdefghijK", "abcdefghij\u212A", true},
{"abcdefghijkz", "abcdefghij\u212Ay", false},
{"abcdefghijKz", "abcdefghij\u212Ay", false},
{"1", "2", false},
{"utf-8", "US-ASCII", false},
}
func TestEqualFold(t *testing.T) {
for _, tt := range EqualFoldTests {
if out := EqualFold(tt.s, tt.t); out != tt.out {
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
}
if out := EqualFold(tt.t, tt.s); out != tt.out {
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
}
}
}
func BenchmarkEqualFold(b *testing.B) {
b.Run("Tests", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tt := range EqualFoldTests {
if out := EqualFold(tt.s, tt.t); out != tt.out {
b.Fatal("wrong result")
}
}
}
})
const s1 = "abcdefghijKz"
const s2 = "abcDefGhijKz"
b.Run("ASCII", func(b *testing.B) {
for i := 0; i < b.N; i++ {
EqualFold(s1, s2)
}
})
b.Run("UnicodePrefix", func(b *testing.B) {
for i := 0; i < b.N; i++ {
EqualFold("αβδ"+s1, "ΑΒΔ"+s2)
}
})
b.Run("UnicodeSuffix", func(b *testing.B) {
for i := 0; i < b.N; i++ {
EqualFold(s1+"αβδ", s2+"ΑΒΔ")
}
})
}
var CountTests = []struct {
s, sep string
num int
}{
{"", "", 1},
{"", "notempty", 0},
{"notempty", "", 9},
{"smaller", "not smaller", 0},
{"12345678987654321", "6", 2},
{"611161116", "6", 3},
{"notequal", "NotEqual", 0},
{"equal", "equal", 1},
{"abc1231231123q", "123", 3},
{"11111", "11", 2},
}
func TestCount(t *testing.T) {
for _, tt := range CountTests {
if num := Count(tt.s, tt.sep); num != tt.num {
t.Errorf("Count(%q, %q) = %d, want %d", tt.s, tt.sep, num, tt.num)
}
}
}
var cutTests = []struct {
s, sep string
before, after string
found bool
}{
{"abc", "b", "a", "c", true},
{"abc", "a", "", "bc", true},
{"abc", "c", "ab", "", true},
{"abc", "abc", "", "", true},
{"abc", "", "", "abc", true},
{"abc", "d", "abc", "", false},
{"", "d", "", "", false},
{"", "", "", "", true},
}
func TestCut(t *testing.T) {
for _, tt := range cutTests {
if before, after, found := Cut(tt.s, tt.sep); before != tt.before || after != tt.after || found != tt.found {
t.Errorf("Cut(%q, %q) = %q, %q, %v, want %q, %q, %v", tt.s, tt.sep, before, after, found, tt.before, tt.after, tt.found)
}
}
}
var cutPrefixTests = []struct {
s, sep string
after string
found bool
}{
{"abc", "a", "bc", true},
{"abc", "abc", "", true},
{"abc", "", "abc", true},
{"abc", "d", "abc", false},
{"", "d", "", false},
{"", "", "", true},
}
func TestCutPrefix(t *testing.T) {
for _, tt := range cutPrefixTests {
if after, found := CutPrefix(tt.s, tt.sep); after != tt.after || found != tt.found {
t.Errorf("CutPrefix(%q, %q) = %q, %v, want %q, %v", tt.s, tt.sep, after, found, tt.after, tt.found)
}
}
}
var cutSuffixTests = []struct {
s, sep string
before string
found bool
}{
{"abc", "bc", "a", true},
{"abc", "abc", "", true},
{"abc", "", "abc", true},
{"abc", "d", "abc", false},
{"", "d", "", false},
{"", "", "", true},
}
func TestCutSuffix(t *testing.T) {
for _, tt := range cutSuffixTests {
if before, found := CutSuffix(tt.s, tt.sep); before != tt.before || found != tt.found {
t.Errorf("CutSuffix(%q, %q) = %q, %v, want %q, %v", tt.s, tt.sep, before, found, tt.before, tt.found)
}
}
}
func makeBenchInputHard() string {
tokens := [...]string{
"<a>", "<p>", "<b>", "<strong>",
"</a>", "</p>", "</b>", "</strong>",
"hello", "world",
}
x := make([]byte, 0, 1<<20)
for {
i := rand.Intn(len(tokens))
if len(x)+len(tokens[i]) >= 1<<20 {
break
}
x = append(x, tokens[i]...)
}
return string(x)
}
var benchInputHard = makeBenchInputHard()
func benchmarkIndexHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ {
Index(benchInputHard, sep)
}
}
func benchmarkLastIndexHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ {
LastIndex(benchInputHard, sep)
}
}
func benchmarkCountHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ {
Count(benchInputHard, sep)
}
}
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
func BenchmarkIndexHard4(b *testing.B) {
benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
}
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
var benchNeedleTorture = Repeat("ABC", 1<<10+1)
func BenchmarkIndexTorture(b *testing.B) {
for i := 0; i < b.N; i++ {
Index(benchInputTorture, benchNeedleTorture)
}
}
func BenchmarkCountTorture(b *testing.B) {
for i := 0; i < b.N; i++ {
Count(benchInputTorture, benchNeedleTorture)
}
}
func BenchmarkCountTortureOverlapping(b *testing.B) {
A := Repeat("ABC", 1<<20)
B := Repeat("ABC", 1<<10)
for i := 0; i < b.N; i++ {
Count(A, B)
}
}
func BenchmarkCountByte(b *testing.B) {
indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
benchStr := Repeat(benchmarkString,
(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
benchFunc := func(b *testing.B, benchStr string) {
b.SetBytes(int64(len(benchStr)))
for i := 0; i < b.N; i++ {
Count(benchStr, "=")
}
}
for _, size := range indexSizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, benchStr[:size])
})
}
}
var makeFieldsInput = func() string {
x := make([]byte, 1<<20)
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
for i := range x {
switch rand.Intn(10) {
case 0:
x[i] = ' '
case 1:
if i > 0 && x[i-1] == 'x' {
copy(x[i-1:], "χ")
break
}
fallthrough
default:
x[i] = 'x'
}
}
return string(x)
}
var makeFieldsInputASCII = func() string {
x := make([]byte, 1<<20)
// Input is ~10% space, rest ASCII non-space.
for i := range x {
if rand.Intn(10) == 0 {
x[i] = ' '
} else {
x[i] = 'x'
}
}
return string(x)
}
var stringdata = []struct{ name, data string }{
{"ASCII", makeFieldsInputASCII()},
{"Mixed", makeFieldsInput()},
}
func BenchmarkFields(b *testing.B) {
for _, sd := range stringdata {
b.Run(sd.name, func(b *testing.B) {
for j := 1 << 4; j <= 1<<20; j <<= 4 {
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(j))
data := sd.data[:j]
for i := 0; i < b.N; i++ {
Fields(data)
}
})
}
})
}
}
func BenchmarkFieldsFunc(b *testing.B) {
for _, sd := range stringdata {
b.Run(sd.name, func(b *testing.B) {
for j := 1 << 4; j <= 1<<20; j <<= 4 {
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(j))
data := sd.data[:j]
for i := 0; i < b.N; i++ {
FieldsFunc(data, unicode.IsSpace)
}
})
}
})
}
}
func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "")
}
}
func BenchmarkSplitSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "/")
}
}
func BenchmarkSplitMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "hello")
}
}
func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "/", 10)
}
}
func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "hello", 10)
}
}
func BenchmarkRepeat(b *testing.B) {
s := "0123456789"
for _, n := range []int{5, 10} {
for _, c := range []int{0, 1, 2, 6} {
b.Run(fmt.Sprintf("%dx%d", n, c), func(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat(s[:n], c)
}
})
}
}
}
func BenchmarkRepeatLarge(b *testing.B) {
s := Repeat("@", 8*1024)
for j := 8; j <= 30; j++ {
for _, k := range []int{1, 16, 4097} {
s := s[:k]
n := (1 << j) / k
if n == 0 {
continue
}
b.Run(fmt.Sprintf("%d/%d", 1<<j, k), func(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat(s, n)
}
b.SetBytes(int64(n * len(s)))
})
}
}
}
func BenchmarkRepeatSpaces(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
Repeat(" ", 2)
}
}
func BenchmarkIndexAnyASCII(b *testing.B) {
x := Repeat("#", 2048) // Never matches set
cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
for k := 1; k <= 2048; k <<= 4 {
for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
IndexAny(x[:k], cs[:j])
}
})
}
}
}
func BenchmarkIndexAnyUTF8(b *testing.B) {
x := Repeat("#", 2048) // Never matches set
cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
for k := 1; k <= 2048; k <<= 4 {
for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
IndexAny(x[:k], cs[:j])
}
})
}
}
}
func BenchmarkLastIndexAnyASCII(b *testing.B) {
x := Repeat("#", 2048) // Never matches set
cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
for k := 1; k <= 2048; k <<= 4 {
for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
LastIndexAny(x[:k], cs[:j])
}
})
}
}
}
func BenchmarkLastIndexAnyUTF8(b *testing.B) {
x := Repeat("#", 2048) // Never matches set
cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
for k := 1; k <= 2048; k <<= 4 {
for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
LastIndexAny(x[:k], cs[:j])
}
})
}
}
}
func BenchmarkTrimASCII(b *testing.B) {
cs := "0123456789abcdef"
for k := 1; k <= 4096; k <<= 4 {
for j := 1; j <= 16; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
x := Repeat(cs[:j], k) // Always matches set
for i := 0; i < b.N; i++ {
Trim(x[:k], cs[:j])
}
})
}
}
}
func BenchmarkTrimByte(b *testing.B) {
x := " the quick brown fox "
for i := 0; i < b.N; i++ {
Trim(x, " ")
}
}
func BenchmarkIndexPeriodic(b *testing.B) {
key := "aa"
for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
for i := 0; i < b.N; i++ {
Index(s, key)
}
})
}
}
func BenchmarkJoin(b *testing.B) {
vals := []string{"red", "yellow", "pink", "green", "purple", "orange", "blue"}
for l := 0; l <= len(vals); l++ {
b.Run(strconv.Itoa(l), func(b *testing.B) {
b.ReportAllocs()
vals := vals[:l]
for i := 0; i < b.N; i++ {
Join(vals, " and ")
}
})
}
}
func BenchmarkTrimSpace(b *testing.B) {
tests := []struct{ name, input string }{
{"NoTrim", "typical"},
{"ASCII", " foo bar "},
{"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
{"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
}
for _, test := range tests {
b.Run(test.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
TrimSpace(test.input)
}
})
}
}
var stringSink string
func BenchmarkReplaceAll(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
stringSink = ReplaceAll("banana", "a", "<>")
}
}