rust/library/core/tests/ascii.rs

492 lines
15 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

use core::char::from_u32;
#[test]
fn test_is_ascii() {
assert!(b"".is_ascii());
assert!(b"banana\0\x7F".is_ascii());
assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
assert!("".is_ascii());
assert!("banana\0\u{7F}".is_ascii());
assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
}
#[test]
fn test_to_ascii_uppercase() {
assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
assert_eq!("hıß".to_ascii_uppercase(), "Hıß");
for i in 0..501 {
let upper =
if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } else { i };
assert_eq!(
(from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
(from_u32(upper).unwrap()).to_string()
);
}
}
#[test]
fn test_to_ascii_lowercase() {
assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
// Dotted capital I, Kelvin sign, Sharp S.
assert_eq!("ß".to_ascii_lowercase(), "ß");
for i in 0..501 {
let lower =
if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
assert_eq!(
(from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
(from_u32(lower).unwrap()).to_string()
);
}
}
#[test]
fn test_make_ascii_lower_case() {
macro_rules! test {
($from: expr, $to: expr) => {{
let mut x = $from;
x.make_ascii_lowercase();
assert_eq!(x, $to);
}};
}
test!(b'A', b'a');
test!(b'a', b'a');
test!(b'!', b'!');
test!('A', 'a');
test!('À', 'À');
test!('a', 'a');
test!('!', '!');
test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
test!("ß".to_string(), "ß");
}
#[test]
fn test_make_ascii_upper_case() {
macro_rules! test {
($from: expr, $to: expr) => {{
let mut x = $from;
x.make_ascii_uppercase();
assert_eq!(x, $to);
}};
}
test!(b'a', b'A');
test!(b'A', b'A');
test!(b'!', b'!');
test!('a', 'A');
test!('à', 'à');
test!('A', 'A');
test!('!', '!');
test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
test!("hıß".to_string(), "Hıß");
let mut x = "Hello".to_string();
x[..3].make_ascii_uppercase(); // Test IndexMut on String.
assert_eq!(x, "HELlo")
}
#[test]
fn test_eq_ignore_ascii_case() {
assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
// Dotted capital I, Kelvin sign, Sharp S.
assert!("ß".eq_ignore_ascii_case("ß"));
assert!(!"İ".eq_ignore_ascii_case("i"));
assert!(!"".eq_ignore_ascii_case("k"));
assert!(!"ß".eq_ignore_ascii_case("s"));
for i in 0..501 {
let lower =
if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
assert!(
(from_u32(i).unwrap())
.to_string()
.eq_ignore_ascii_case(&from_u32(lower).unwrap().to_string())
);
}
}
#[test]
fn inference_works() {
let x = "a".to_string();
let _ = x.eq_ignore_ascii_case("A");
}
// Shorthands used by the is_ascii_* tests.
macro_rules! assert_all {
($what:ident, $($str:tt),+) => {{
$(
for b in $str.chars() {
if !b.$what() {
panic!("expected {}({}) but it isn't",
stringify!($what), b);
}
}
for b in $str.as_bytes().iter() {
if !b.$what() {
panic!("expected {}(0x{:02x})) but it isn't",
stringify!($what), b);
}
}
)+
}};
($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+))
}
macro_rules! assert_none {
($what:ident, $($str:tt),+) => {{
$(
for b in $str.chars() {
if b.$what() {
panic!("expected not-{}({}) but it is",
stringify!($what), b);
}
}
for b in $str.as_bytes().iter() {
if b.$what() {
panic!("expected not-{}(0x{:02x})) but it is",
stringify!($what), b);
}
}
)+
}};
($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+))
}
#[test]
fn test_is_ascii_alphabetic() {
assert_all!(
is_ascii_alphabetic,
"",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
);
assert_none!(
is_ascii_alphabetic,
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_uppercase() {
assert_all!(is_ascii_uppercase, "", "ABCDEFGHIJKLMNOQPRSTUVWXYZ",);
assert_none!(
is_ascii_uppercase,
"abcdefghijklmnopqrstuvwxyz",
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_lowercase() {
assert_all!(is_ascii_lowercase, "abcdefghijklmnopqrstuvwxyz",);
assert_none!(
is_ascii_lowercase,
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_alphanumeric() {
assert_all!(
is_ascii_alphanumeric,
"",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
);
assert_none!(
is_ascii_alphanumeric,
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_digit() {
assert_all!(is_ascii_digit, "", "0123456789",);
assert_none!(
is_ascii_digit,
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_octdigit() {
assert_all!(is_ascii_octdigit, "", "01234567");
assert_none!(
is_ascii_octdigit,
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_hexdigit() {
assert_all!(is_ascii_hexdigit, "", "0123456789", "abcdefABCDEF",);
assert_none!(
is_ascii_hexdigit,
"ghijklmnopqrstuvwxyz",
"GHIJKLMNOQPRSTUVWXYZ",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_punctuation() {
assert_all!(is_ascii_punctuation, "", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",);
assert_none!(
is_ascii_punctuation,
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_graphic() {
assert_all!(
is_ascii_graphic,
"",
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
);
assert_none!(
is_ascii_graphic,
" \t\n\x0c\r",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_whitespace() {
assert_all!(is_ascii_whitespace, "", " \t\n\x0c\r",);
assert_none!(
is_ascii_whitespace,
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x0b\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
}
#[test]
fn test_is_ascii_control() {
assert_all!(
is_ascii_control,
"",
"\x00\x01\x02\x03\x04\x05\x06\x07",
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17",
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x7f",
);
assert_none!(
is_ascii_control,
"abcdefghijklmnopqrstuvwxyz",
"ABCDEFGHIJKLMNOQPRSTUVWXYZ",
"0123456789",
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
" ",
);
}
// `is_ascii` does a good amount of pointer manipulation and has
// alignment-dependent computation. This is all sanity-checked via
// `debug_assert!`s, so we test various sizes/alignments thoroughly versus an
// "obviously correct" baseline function.
#[test]
fn test_is_ascii_align_size_thoroughly() {
// The "obviously-correct" baseline mentioned above.
fn is_ascii_baseline(s: &[u8]) -> bool {
s.iter().all(|b| b.is_ascii())
}
// Helper to repeat `l` copies of `b0` followed by `l` copies of `b1`.
fn repeat_concat(b0: u8, b1: u8, l: usize) -> Vec<u8> {
use core::iter::repeat;
repeat(b0).take(l).chain(repeat(b1).take(l)).collect()
}
// Miri is too slow
let iter = if cfg!(miri) { 0..20 } else { 0..100 };
for i in iter {
#[cfg(not(miri))]
let cases = &[
b"a".repeat(i),
b"\0".repeat(i),
b"\x7f".repeat(i),
b"\x80".repeat(i),
b"\xff".repeat(i),
repeat_concat(b'a', 0x80u8, i),
repeat_concat(0x80u8, b'a', i),
];
#[cfg(miri)]
let cases = &[b"a".repeat(i), b"\x80".repeat(i), repeat_concat(b'a', 0x80u8, i)];
for case in cases {
for pos in 0..=case.len() {
// Potentially misaligned head
let prefix = &case[pos..];
assert_eq!(is_ascii_baseline(prefix), prefix.is_ascii(),);
// Potentially misaligned tail
let suffix = &case[..case.len() - pos];
assert_eq!(is_ascii_baseline(suffix), suffix.is_ascii(),);
// Both head and tail are potentially misaligned
let mid = &case[(pos / 2)..(case.len() - (pos / 2))];
assert_eq!(is_ascii_baseline(mid), mid.is_ascii(),);
}
}
}
}
#[test]
fn ascii_const() {
// test that the `is_ascii` methods of `char` and `u8` are usable in a const context
const CHAR_IS_ASCII: bool = 'a'.is_ascii();
assert!(CHAR_IS_ASCII);
const BYTE_IS_ASCII: bool = 97u8.is_ascii();
assert!(BYTE_IS_ASCII);
}
#[test]
fn ascii_ctype_const() {
macro_rules! suite {
( $( $fn:ident => [$a:ident, $A:ident, $nine:ident, $dot:ident, $space:ident]; )* ) => {
$(
mod $fn {
const CHAR_A_LOWER: bool = 'a'.$fn();
const CHAR_A_UPPER: bool = 'A'.$fn();
const CHAR_NINE: bool = '9'.$fn();
const CHAR_DOT: bool = '.'.$fn();
const CHAR_SPACE: bool = ' '.$fn();
const U8_A_LOWER: bool = b'a'.$fn();
const U8_A_UPPER: bool = b'A'.$fn();
const U8_NINE: bool = b'9'.$fn();
const U8_DOT: bool = b'.'.$fn();
const U8_SPACE: bool = b' '.$fn();
pub fn run() {
assert_eq!(CHAR_A_LOWER, $a);
assert_eq!(CHAR_A_UPPER, $A);
assert_eq!(CHAR_NINE, $nine);
assert_eq!(CHAR_DOT, $dot);
assert_eq!(CHAR_SPACE, $space);
assert_eq!(U8_A_LOWER, $a);
assert_eq!(U8_A_UPPER, $A);
assert_eq!(U8_NINE, $nine);
assert_eq!(U8_DOT, $dot);
assert_eq!(U8_SPACE, $space);
}
}
)*
$( $fn::run(); )*
}
}
suite! {
// 'a' 'A' '9' '.' ' '
is_ascii_alphabetic => [true, true, false, false, false];
is_ascii_uppercase => [false, true, false, false, false];
is_ascii_lowercase => [true, false, false, false, false];
is_ascii_alphanumeric => [true, true, true, false, false];
is_ascii_digit => [false, false, true, false, false];
is_ascii_octdigit => [false, false, false, false, false];
is_ascii_hexdigit => [true, true, true, false, false];
is_ascii_punctuation => [false, false, false, true, false];
is_ascii_graphic => [true, true, true, true, false];
is_ascii_whitespace => [false, false, false, false, true];
is_ascii_control => [false, false, false, false, false];
}
}
#[test]
fn test_ascii_display() {
assert_eq!(b"foo'bar".escape_ascii().to_string(), r#"foo\'bar"#);
assert_eq!(b"\0\xff".escape_ascii().to_string(), r#"\x00\xff"#);
let mut it = b"\0fastpath\xffremainder\xff".escape_ascii();
let _ = it.advance_by(4);
let _ = it.advance_back_by(4);
assert_eq!(it.to_string(), r#"fastpath\xffremainder"#);
}