mirror of https://go.googlesource.com/go
201 lines
4.3 KiB
Go
201 lines
4.3 KiB
Go
// Copyright 2023 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package syscall_test
|
|
|
|
import (
|
|
"fmt"
|
|
"slices"
|
|
"syscall"
|
|
"testing"
|
|
"unicode/utf16"
|
|
"unicode/utf8"
|
|
"unsafe"
|
|
)
|
|
|
|
var wtf8tests = []struct {
|
|
str string
|
|
wstr []uint16
|
|
}{
|
|
{
|
|
str: "\x00",
|
|
wstr: []uint16{0x00},
|
|
},
|
|
{
|
|
str: "\x5C",
|
|
wstr: []uint16{0x5C},
|
|
},
|
|
{
|
|
str: "\x7F",
|
|
wstr: []uint16{0x7F},
|
|
},
|
|
|
|
// 2-byte
|
|
{
|
|
str: "\xC2\x80",
|
|
wstr: []uint16{0x80},
|
|
},
|
|
{
|
|
str: "\xD7\x8A",
|
|
wstr: []uint16{0x05CA},
|
|
},
|
|
{
|
|
str: "\xDF\xBF",
|
|
wstr: []uint16{0x07FF},
|
|
},
|
|
|
|
// 3-byte
|
|
{
|
|
str: "\xE0\xA0\x80",
|
|
wstr: []uint16{0x0800},
|
|
},
|
|
{
|
|
str: "\xE2\xB0\xBC",
|
|
wstr: []uint16{0x2C3C},
|
|
},
|
|
{
|
|
str: "\xEF\xBF\xBF",
|
|
wstr: []uint16{0xFFFF},
|
|
},
|
|
// unmatched surrogate halves
|
|
// high surrogates: 0xD800 to 0xDBFF
|
|
{
|
|
str: "\xED\xA0\x80",
|
|
wstr: []uint16{0xD800},
|
|
},
|
|
{
|
|
// "High surrogate followed by another high surrogate"
|
|
str: "\xED\xA0\x80\xED\xA0\x80",
|
|
wstr: []uint16{0xD800, 0xD800},
|
|
},
|
|
{
|
|
// "High surrogate followed by a symbol that is not a surrogate"
|
|
str: string([]byte{0xED, 0xA0, 0x80, 0xA}),
|
|
wstr: []uint16{0xD800, 0xA},
|
|
},
|
|
{
|
|
// "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate"
|
|
str: string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}),
|
|
wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800},
|
|
},
|
|
{
|
|
str: "\xED\xA6\xAF",
|
|
wstr: []uint16{0xD9AF},
|
|
},
|
|
{
|
|
str: "\xED\xAF\xBF",
|
|
wstr: []uint16{0xDBFF},
|
|
},
|
|
// low surrogates: 0xDC00 to 0xDFFF
|
|
{
|
|
str: "\xED\xB0\x80",
|
|
wstr: []uint16{0xDC00},
|
|
},
|
|
{
|
|
// "Low surrogate followed by another low surrogate"
|
|
str: "\xED\xB0\x80\xED\xB0\x80",
|
|
wstr: []uint16{0xDC00, 0xDC00},
|
|
},
|
|
{
|
|
// "Low surrogate followed by a symbol that is not a surrogate"
|
|
str: string([]byte{0xED, 0xB0, 0x80, 0xA}),
|
|
wstr: []uint16{0xDC00, 0xA},
|
|
},
|
|
{
|
|
// "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate"
|
|
str: string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}),
|
|
wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00},
|
|
},
|
|
{
|
|
str: "\xED\xBB\xAE",
|
|
wstr: []uint16{0xDEEE},
|
|
},
|
|
{
|
|
str: "\xED\xBF\xBF",
|
|
wstr: []uint16{0xDFFF},
|
|
},
|
|
|
|
// 4-byte
|
|
{
|
|
str: "\xF0\x90\x80\x80",
|
|
wstr: []uint16{0xD800, 0xDC00},
|
|
},
|
|
{
|
|
str: "\xF0\x9D\x8C\x86",
|
|
wstr: []uint16{0xD834, 0xDF06},
|
|
},
|
|
{
|
|
str: "\xF4\x8F\xBF\xBF",
|
|
wstr: []uint16{0xDBFF, 0xDFFF},
|
|
},
|
|
}
|
|
|
|
func TestWTF16Rountrip(t *testing.T) {
|
|
for _, tt := range wtf8tests {
|
|
t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
|
|
got := syscall.EncodeWTF16(tt.str, nil)
|
|
got2 := string(syscall.DecodeWTF16(got, nil))
|
|
if got2 != tt.str {
|
|
t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestWTF16Golden(t *testing.T) {
|
|
for _, tt := range wtf8tests {
|
|
t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
|
|
got := syscall.EncodeWTF16(tt.str, nil)
|
|
if !slices.Equal(got, tt.wstr) {
|
|
t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func FuzzEncodeWTF16(f *testing.F) {
|
|
for _, tt := range wtf8tests {
|
|
f.Add(tt.str)
|
|
}
|
|
f.Fuzz(func(t *testing.T, b string) {
|
|
// test that there are no panics
|
|
got := syscall.EncodeWTF16(b, nil)
|
|
syscall.DecodeWTF16(got, nil)
|
|
if utf8.ValidString(b) {
|
|
// if the input is a valid UTF-8 string, then
|
|
// test that syscall.EncodeWTF16 behaves as
|
|
// utf16.Encode
|
|
want := utf16.Encode([]rune(b))
|
|
if !slices.Equal(got, want) {
|
|
t.Errorf("got:\n%v\nwant:\n%v", got, want)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
func FuzzDecodeWTF16(f *testing.F) {
|
|
for _, tt := range wtf8tests {
|
|
b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2)
|
|
f.Add(b)
|
|
}
|
|
f.Fuzz(func(t *testing.T, b []byte) {
|
|
u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2)
|
|
got := syscall.DecodeWTF16(u16, nil)
|
|
if utf8.Valid(got) {
|
|
// if the input is a valid UTF-8 string, then
|
|
// test that syscall.DecodeWTF16 behaves as
|
|
// utf16.Decode
|
|
want := utf16.Decode(u16)
|
|
if string(got) != string(want) {
|
|
t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want))
|
|
}
|
|
}
|
|
// WTF-8 should always roundtrip
|
|
got2 := syscall.EncodeWTF16(string(got), nil)
|
|
if !slices.Equal(got2, u16) {
|
|
t.Errorf("got:\n%v\nwant:\n%v", got2, u16)
|
|
}
|
|
})
|
|
}
|