Arrange for zero to be canonical

We find that it is common for large ranges of chars to be false -- and that
means that it is plausibly common for us to ask about a word that is entirely
empty. Therefore, we should make sure that we do not need to rotate bits or
otherwise perform some operation to map to the zero word; canonicalize it first
if possible.
This commit is contained in:
Mark Rousskov
2020-03-21 17:20:57 -04:00
parent 233ab2f168
commit a7ec6f8fe0
2 changed files with 243 additions and 254 deletions

View File

@@ -56,64 +56,65 @@ pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
pub mod alphabetic {
const BITSET_LAST_CHUNK_MAP: u16 = 393;
static BITSET_CHUNKS_MAP: [u8; 394] = [
61, 18, 2, 35, 46, 39, 38, 74, 37, 25, 70, 34, 36, 73, 66, 5, 52, 58, 54, 58, 58, 58, 69,
64, 43, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 6, 6, 23,
47, 49, 65, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57, 33, 17, 51, 40, 53, 4, 16, 41, 45,
30, 55, 28, 42, 27, 0, 67, 71, 1, 56, 6, 12, 31, 58, 58, 58, 58, 58, 6, 6, 63, 58, 58, 58,
58, 58, 58, 58, 6, 29, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 6,
68, 58, 50, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 20, 58, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 14, 22, 58, 58, 58, 58, 26, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 32, 24, 58, 58, 58, 58, 48, 60, 58, 58, 19, 58, 58, 44, 59, 58, 58, 58, 58,
58, 58, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 6, 6, 6, 6,
6, 6, 6, 15, 72, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
58, 58, 58, 58, 58, 58, 6, 62, 58, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13,
11, 35, 32, 14, 25, 18, 17, 74, 16, 29, 12, 61, 15, 73, 66, 36, 9, 0, 6, 0, 0, 0, 70, 64,
22, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 38, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 42, 39, 39, 53, 26, 28, 65, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 60, 48, 8, 19, 5, 34, 47, 20, 24, 57, 7, 55, 21, 31, 69, 67, 71, 13, 3,
39, 43, 58, 0, 0, 0, 0, 0, 39, 39, 63, 0, 0, 0, 0, 0, 0, 0, 39, 56, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 39, 68, 0, 10, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 41, 39,
39, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 52, 0, 0, 0, 0, 30, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 59, 54, 0, 0, 0, 0, 27, 4, 0, 0, 49, 0, 0, 23, 2, 0, 0, 0, 0, 0, 0,
0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 51, 39, 39, 39, 39, 39, 39, 39,
46, 72, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 33, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 40, 0, 0, 0, 0, 0, 0, 39, 62, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 44,
];
static BITSET_INDEX_CHUNKS: [[u8; 8]; 75] = [
[0, 252, 121, 172, 14, 172, 172, 172], [13, 51, 125, 172, 79, 35, 166, 172],
[15, 15, 7, 15, 221, 27, 76, 138], [15, 15, 10, 15, 15, 15, 15, 15],
[15, 15, 11, 108, 247, 194, 172, 172], [15, 15, 15, 15, 8, 96, 91, 104],
[15, 15, 15, 15, 15, 15, 15, 15], [15, 15, 15, 15, 15, 15, 15, 172],
[15, 15, 15, 15, 15, 15, 15, 193], [15, 15, 15, 15, 15, 15, 15, 210],
[15, 15, 15, 15, 15, 15, 15, 214], [15, 15, 15, 15, 15, 15, 47, 238],
[15, 15, 15, 15, 15, 15, 188, 172], [15, 15, 15, 15, 15, 181, 172, 172],
[15, 15, 15, 15, 192, 45, 15, 15], [15, 15, 15, 15, 207, 15, 15, 15],
[15, 15, 15, 15, 209, 153, 172, 172], [15, 15, 15, 15, 215, 5, 232, 110],
[15, 15, 15, 145, 172, 77, 33, 218], [15, 15, 15, 176, 15, 170, 172, 172],
[15, 15, 15, 187, 179, 172, 172, 172], [15, 15, 15, 191, 15, 15, 15, 15],
[15, 15, 15, 213, 172, 172, 172, 172], [15, 15, 182, 251, 15, 15, 15, 15],
[15, 15, 230, 61, 235, 236, 237, 234], [15, 22, 88, 19, 20, 189, 244, 248],
[15, 103, 161, 172, 172, 172, 172, 172], [15, 158, 15, 171, 172, 172, 87, 245],
[15, 177, 118, 151, 205, 126, 15, 164], [15, 178, 172, 172, 172, 172, 172, 172],
[15, 179, 205, 205, 195, 172, 172, 172], [15, 200, 15, 15, 15, 175, 172, 172],
[15, 224, 63, 225, 90, 17, 15, 15], [15, 228, 15, 188, 92, 16, 204, 18],
[15, 229, 25, 119, 133, 134, 1, 165], [26, 37, 15, 80, 5, 4, 204, 115],
[30, 211, 40, 208, 120, 132, 239, 180], [59, 5, 23, 60, 15, 15, 15, 15],
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 172, 172, 172, 172], [0, 0, 0, 0, 243, 10, 180, 0],
[0, 0, 0, 124, 0, 0, 203, 0], [0, 0, 0, 199, 0, 0, 0, 0],
[0, 0, 24, 185, 242, 112, 231, 168], [0, 0, 55, 197, 0, 0, 0, 0],
[0, 0, 141, 0, 46, 177, 243, 123], [0, 54, 172, 214, 113, 34, 216, 163],
[0, 83, 241, 0, 62, 29, 179, 0], [0, 172, 0, 0, 172, 4, 159, 142],
[0, 249, 116, 3, 172, 172, 172, 172], [1, 172, 172, 172, 172, 172, 172, 172],
[14, 51, 125, 0, 79, 35, 166, 0], [26, 37, 172, 80, 6, 5, 204, 115],
[30, 211, 40, 208, 120, 132, 239, 180], [59, 6, 23, 60, 172, 172, 172, 172],
[67, 157, 68, 139, 66, 58, 99, 136], [75, 128, 69, 106, 71, 143, 74, 167],
[78, 254, 15, 212, 172, 207, 172, 172], [82, 122, 192, 130, 117, 172, 6, 172],
[94, 172, 44, 196, 70, 156, 172, 172], [105, 226, 31, 217, 48, 15, 28, 243],
[111, 93, 109, 172, 172, 172, 172, 172], [127, 102, 190, 154, 208, 137, 186, 172],
[147, 149, 53, 43, 216, 50, 72, 107], [148, 12, 15, 202, 32, 15, 233, 52],
[150, 172, 172, 172, 97, 183, 172, 172], [152, 206, 15, 64, 41, 101, 220, 89],
[172, 15, 172, 172, 15, 3, 159, 142], [172, 54, 15, 214, 113, 34, 0, 163],
[172, 83, 241, 172, 62, 29, 179, 172], [172, 172, 24, 185, 242, 112, 231, 168],
[172, 172, 55, 197, 172, 172, 172, 172], [172, 172, 141, 172, 46, 177, 243, 123],
[172, 172, 172, 124, 172, 172, 203, 172], [172, 172, 172, 172, 15, 15, 15, 15],
[172, 172, 172, 172, 172, 172, 172, 172], [172, 172, 172, 172, 243, 9, 180, 172],
[172, 172, 172, 199, 172, 172, 172, 172], [172, 249, 116, 2, 15, 15, 15, 15],
[191, 172, 172, 172, 172, 172, 172, 172], [200, 172, 172, 172, 172, 172, 172, 172],
[201, 172, 172, 172, 172, 172, 172, 172], [209, 56, 0, 129, 38, 42, 15, 198],
[209, 95, 65, 114, 15, 15, 15, 250], [211, 172, 30, 85, 81, 174, 36, 155],
[211, 192, 172, 146, 202, 73, 184, 172], [222, 223, 15, 135, 39, 144, 86, 21],
[226, 15, 15, 15, 15, 15, 15, 15], [227, 5, 162, 211, 172, 172, 172, 172],
[231, 15, 15, 15, 15, 15, 15, 15], [240, 131, 84, 173, 219, 253, 57, 140],
[246, 169, 98, 160, 173, 49, 100, 172],
[78, 254, 172, 212, 0, 207, 0, 0], [82, 122, 192, 130, 117, 0, 7, 0],
[94, 0, 44, 196, 70, 156, 0, 0], [105, 1, 31, 218, 48, 172, 28, 243],
[111, 93, 109, 0, 0, 0, 0, 0], [127, 102, 190, 154, 208, 137, 186, 0],
[147, 149, 53, 43, 217, 50, 72, 107], [148, 13, 172, 202, 32, 172, 233, 52],
[150, 0, 0, 0, 97, 183, 0, 0], [152, 206, 172, 64, 41, 101, 221, 89],
[172, 22, 88, 19, 20, 189, 244, 248], [172, 103, 161, 0, 0, 0, 0, 0],
[172, 158, 172, 171, 0, 0, 87, 245], [172, 172, 8, 172, 222, 27, 76, 138],
[172, 172, 11, 172, 172, 172, 172, 172], [172, 172, 12, 108, 247, 194, 0, 0],
[172, 172, 172, 145, 0, 77, 33, 219], [172, 172, 172, 172, 9, 96, 91, 104],
[172, 172, 172, 172, 172, 172, 47, 238], [172, 172, 172, 172, 172, 172, 172, 0],
[172, 172, 172, 172, 172, 172, 172, 172], [172, 172, 172, 172, 172, 172, 172, 193],
[172, 172, 172, 172, 172, 172, 172, 210], [172, 172, 172, 172, 172, 172, 172, 214],
[172, 172, 172, 172, 172, 172, 188, 0], [172, 172, 172, 172, 172, 181, 0, 0],
[172, 172, 172, 172, 192, 45, 172, 172], [172, 172, 172, 172, 207, 172, 172, 172],
[172, 172, 172, 172, 209, 153, 0, 0], [172, 172, 172, 172, 215, 6, 232, 110],
[172, 172, 172, 176, 172, 170, 0, 0], [172, 172, 172, 187, 179, 0, 0, 0],
[172, 172, 172, 191, 172, 172, 172, 172], [172, 172, 172, 213, 0, 0, 0, 0],
[172, 172, 182, 251, 172, 172, 172, 172], [172, 172, 230, 61, 235, 236, 237, 234],
[172, 177, 118, 151, 205, 126, 172, 164], [172, 178, 0, 0, 0, 0, 0, 0],
[172, 179, 205, 205, 195, 0, 0, 0], [172, 200, 172, 172, 172, 175, 0, 0],
[172, 225, 63, 226, 90, 17, 172, 172], [172, 228, 172, 188, 92, 16, 204, 18],
[172, 229, 25, 119, 133, 134, 2, 165], [191, 0, 0, 0, 0, 0, 0, 0],
[200, 0, 0, 0, 0, 0, 0, 0], [201, 0, 0, 0, 0, 0, 0, 0],
[209, 56, 216, 129, 38, 42, 172, 198], [209, 95, 65, 114, 172, 172, 172, 250],
[211, 0, 30, 85, 81, 174, 36, 155], [211, 192, 0, 146, 202, 73, 184, 0],
[216, 252, 121, 0, 15, 0, 0, 0], [223, 224, 172, 135, 39, 144, 86, 21],
[227, 6, 162, 211, 0, 0, 0, 0], [231, 172, 172, 172, 172, 172, 172, 172],
[240, 131, 84, 173, 220, 253, 57, 140], [246, 169, 98, 160, 173, 49, 100, 0],
];
static BITSET_CANONICAL: [u64; 172] = [
0b0111111111111111111111111111111111111111111111111111111111111111,
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111111111111111111111111111111111111111111111111111111111111110,
0b1111111111001111111111111111111111111111111111111111111111111111,
0b1111111101111111111111111111111111111111011111111111111111111111,
0b1111111111111111111111111111111111111111111111111000011111111111,
@@ -128,7 +129,6 @@ pub mod alphabetic {
0b1000111111110000011111111111111111111111111111111111111111111111,
0b0111111101111111111111111111111111111111111111111111110111111111,
0b0000000000000000000001111111111111100111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111111111111011011,
0b1111111111111111111111111111111111111111111111011111110001011111,
0b1111111111111111111111111111111111111111111110000000000000000000,
@@ -287,15 +287,15 @@ pub mod alphabetic {
0b0000000000000000000000000000000000000000000000000000000010110011,
];
static BITSET_MAPPING: [(u8, u8); 83] = [
(0, 191), (0, 65), (0, 188), (0, 187), (0, 186), (0, 185), (0, 184), (0, 182), (0, 181),
(0, 180), (0, 178), (0, 79), (0, 175), (0, 174), (0, 173), (0, 169), (0, 165), (0, 164),
(0, 162), (0, 161), (0, 160), (0, 158), (0, 155), (0, 151), (0, 150), (0, 149), (0, 148),
(0, 147), (0, 144), (0, 112), (0, 143), (0, 113), (0, 141), (0, 140), (0, 139), (0, 138),
(0, 137), (0, 136), (0, 135), (0, 134), (0, 132), (0, 131), (0, 130), (0, 129), (0, 61),
(0, 60), (0, 55), (0, 53), (0, 52), (0, 49), (0, 48), (0, 32), (0, 22), (0, 5), (0, 1),
(1, 129), (1, 58), (1, 57), (1, 50), (1, 42), (1, 28), (1, 21), (2, 180), (2, 30), (2, 24),
(2, 18), (3, 132), (3, 33), (3, 17), (4, 80), (4, 32), (5, 112), (5, 16), (6, 96), (6, 3),
(7, 38), (8, 32), (9, 17), (10, 69), (11, 32), (12, 187), (13, 179), (14, 141),
(0, 64), (1, 64), (1, 189), (1, 188), (1, 187), (1, 186), (1, 185), (1, 183), (1, 182),
(1, 181), (1, 179), (1, 78), (1, 176), (1, 175), (1, 174), (1, 170), (1, 166), (1, 165),
(1, 163), (1, 162), (1, 161), (1, 159), (1, 156), (1, 152), (1, 151), (1, 150), (1, 149),
(1, 148), (1, 145), (1, 111), (1, 144), (1, 112), (1, 142), (1, 141), (1, 140), (1, 139),
(1, 138), (1, 137), (1, 136), (1, 135), (1, 133), (1, 132), (1, 131), (1, 130), (1, 63),
(1, 60), (1, 59), (1, 54), (1, 52), (1, 51), (1, 48), (1, 47), (1, 31), (1, 21), (1, 4),
(2, 129), (2, 58), (2, 57), (2, 50), (2, 42), (2, 28), (2, 21), (3, 180), (3, 30), (3, 24),
(3, 18), (4, 132), (4, 33), (4, 17), (5, 80), (5, 32), (6, 112), (6, 16), (7, 96), (7, 3),
(8, 38), (9, 32), (10, 17), (11, 69), (12, 32), (13, 187), (14, 179), (15, 141),
];
pub fn lookup(c: char) -> bool {
@@ -314,58 +314,53 @@ pub mod alphabetic {
pub mod case_ignorable {
const BITSET_LAST_CHUNK_MAP: u16 = 1792;
static BITSET_CHUNKS_MAP: [u8; 251] = [
14, 28, 47, 22, 19, 11, 4, 13, 9, 40, 39, 32, 49, 23, 15, 36, 18, 39, 39, 39, 39, 39, 27,
26, 12, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 25, 39, 30, 24, 20, 16, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 33, 39, 17, 38, 31, 39, 39, 39, 7, 41, 46, 3, 10, 1,
6, 51, 8, 5, 42, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 50, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 44, 39, 35, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 29, 39, 39, 39, 39, 39, 39, 39, 39, 39,
34, 48, 39, 39, 39, 0, 39, 39, 21, 43, 39, 39, 45, 39, 39, 39, 39, 37, 2,
36, 19, 18, 44, 41, 33, 22, 35, 31, 6, 0, 7, 49, 45, 37, 3, 40, 0, 0, 0, 0, 0, 20, 48, 34,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 0, 10, 46, 42,
38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 39, 2, 11, 0, 0, 0, 29, 9, 17, 26, 32, 24, 28, 51, 30,
27, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4, 21, 0, 0, 0, 23, 0, 0, 43, 13, 0, 0, 15, 0, 0, 0, 0, 1, 25,
];
static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
[3, 75, 88, 142, 142, 142, 142, 142], [5, 110, 38, 181, 142, 142, 12, 182],
[21, 4, 142, 142, 4, 4, 4, 160], [28, 163, 50, 131, 76, 138, 6, 120],
[31, 103, 14, 105, 54, 106, 124, 119], [33, 142, 17, 142, 34, 175, 118, 142],
[35, 123, 71, 142, 96, 142, 142, 142], [37, 142, 142, 144, 142, 142, 142, 142],
[41, 115, 117, 142, 142, 142, 142, 142], [42, 78, 112, 139, 142, 142, 142, 142],
[45, 142, 142, 98, 54, 77, 142, 142], [58, 74, 58, 29, 14, 104, 126, 122],
[62, 142, 180, 2, 142, 142, 142, 142], [63, 164, 53, 121, 67, 168, 52, 129],
[65, 177, 68, 142, 142, 142, 142, 142], [70, 16, 142, 66, 23, 69, 20, 0],
[72, 57, 30, 73, 142, 97, 142, 94], [87, 178, 142, 141, 47, 179, 143, 61],
[89, 40, 113, 85, 142, 142, 142, 142], [90, 151, 142, 19, 56, 84, 59, 46],
[95, 142, 142, 39, 162, 174, 49, 100], [99, 142, 142, 142, 167, 142, 142, 142],
[114, 86, 142, 91, 25, 158, 10, 51], [116, 36, 24, 123, 55, 81, 93, 83],
[130, 32, 155, 146, 159, 137, 150, 148], [133, 142, 142, 142, 142, 142, 142, 142],
[136, 142, 142, 142, 142, 142, 142, 142], [142, 1, 142, 153, 142, 15, 142, 22],
[142, 142, 26, 4, 4, 64, 176, 142], [142, 142, 102, 142, 142, 142, 142, 142],
[142, 142, 142, 16, 142, 142, 142, 142], [142, 142, 142, 135, 142, 170, 142, 142],
[142, 142, 142, 142, 79, 82, 48, 111], [142, 142, 142, 142, 134, 142, 7, 125],
[142, 142, 142, 142, 142, 27, 92, 142], [142, 142, 142, 142, 142, 132, 108, 101],
[142, 142, 142, 142, 142, 142, 13, 43], [142, 142, 142, 142, 142, 142, 142, 8],
[142, 142, 142, 142, 142, 142, 142, 140], [142, 142, 142, 142, 142, 142, 142, 142],
[142, 142, 142, 142, 142, 152, 142, 142], [142, 142, 142, 142, 156, 142, 142, 142],
[142, 142, 142, 147, 142, 142, 142, 142], [142, 142, 142, 157, 142, 142, 142, 142],
[142, 142, 142, 169, 9, 128, 142, 142], [142, 142, 142, 172, 142, 161, 142, 142],
[142, 142, 145, 142, 142, 173, 142, 142], [142, 142, 171, 142, 142, 109, 11, 80],
[142, 149, 142, 142, 142, 142, 142, 142], [154, 127, 18, 142, 60, 142, 142, 142],
[165, 142, 142, 142, 142, 142, 142, 142], [166, 142, 142, 142, 44, 127, 142, 107],
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 8], [0, 0, 0, 0, 0, 0, 0, 141],
[0, 0, 0, 0, 0, 0, 14, 42], [0, 0, 0, 0, 0, 27, 92, 0], [0, 0, 0, 0, 0, 133, 108, 101],
[0, 0, 0, 0, 0, 152, 0, 0], [0, 0, 0, 0, 79, 82, 47, 111], [0, 0, 0, 0, 135, 0, 5, 126],
[0, 0, 0, 0, 156, 0, 0, 0], [0, 0, 0, 17, 0, 0, 0, 0], [0, 0, 0, 136, 0, 168, 0, 0],
[0, 0, 0, 147, 0, 0, 0, 0], [0, 0, 0, 157, 0, 0, 0, 0], [0, 0, 0, 167, 9, 129, 0, 0],
[0, 0, 0, 170, 0, 161, 0, 0], [0, 0, 102, 0, 0, 0, 0, 0], [0, 0, 145, 0, 0, 171, 0, 0],
[0, 0, 169, 0, 0, 109, 12, 80], [0, 0, 174, 123, 123, 64, 176, 0],
[0, 49, 0, 153, 0, 16, 0, 23], [0, 149, 0, 0, 0, 0, 0, 0],
[2, 103, 15, 105, 54, 106, 125, 119], [4, 75, 88, 0, 0, 0, 0, 0],
[6, 110, 37, 181, 0, 0, 13, 182], [22, 123, 0, 0, 123, 123, 123, 11],
[28, 163, 50, 132, 76, 139, 7, 120], [32, 0, 18, 0, 33, 175, 118, 0],
[34, 124, 71, 0, 96, 0, 0, 0], [36, 0, 0, 144, 0, 0, 0, 0], [40, 115, 117, 0, 0, 0, 0, 0],
[41, 78, 112, 140, 0, 0, 0, 0], [44, 0, 0, 98, 54, 77, 0, 0],
[58, 74, 58, 29, 15, 104, 127, 122], [62, 0, 180, 3, 0, 0, 0, 0],
[63, 164, 53, 121, 67, 160, 52, 130], [65, 177, 68, 0, 0, 0, 0, 0],
[70, 17, 0, 66, 24, 69, 21, 1], [72, 57, 30, 73, 0, 97, 0, 94],
[87, 178, 0, 142, 46, 179, 143, 61], [89, 39, 113, 85, 0, 0, 0, 0],
[90, 151, 0, 20, 56, 84, 59, 45], [95, 0, 0, 38, 162, 172, 48, 100],
[99, 0, 0, 0, 159, 0, 0, 0], [114, 86, 0, 91, 26, 158, 10, 51],
[116, 35, 25, 124, 55, 81, 93, 83], [131, 31, 155, 146, 173, 138, 150, 148],
[134, 0, 0, 0, 0, 0, 0, 0], [137, 0, 0, 0, 0, 0, 0, 0], [154, 128, 19, 0, 60, 0, 0, 0],
[165, 0, 0, 0, 0, 0, 0, 0], [166, 0, 0, 0, 43, 128, 0, 107],
];
static BITSET_CANONICAL: [u64; 123] = [
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111101111111111111111111111111111111111111111111111111111111111,
0b0011000000000000000000000000000000000000000000000000000000000000,
0b1100000000000000000000000000000000000000000000000000000000010001,
0b0111000000000000000000000000000000000000000000000000000000000000,
0b1111100001111111111111111111111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111100000000000000000000000000000000000000000000000000,
0b1111111100000000000000000000000000000000000000000000000000000000,
0b0111111111000000000000000000000000000000000000000000000000000011,
0b1111111111111100000000000000000000000000000000000000000000000000,
0b1111100000000000000000000000000000000000000000000000000000000000,
0b0000000001111111000000000000000000000000000000000000000000000000,
0b0000000000000001111111111100000000000000000000000000000000000000,
0b0000000000000000111111111111111111111111111111111111111111111111,
0b1011111111111111111111111111111111111111111111100000000000000000,
0b1011000000111100000000000000000000000000000000000000000000000000,
0b1010000000000000000000000000000000000000000000000000000000000000,
@@ -381,12 +376,10 @@ pub mod case_ignorable {
0b1111111111111111111100000000000000000000000000000000000000000000,
0b1111111111111111000000001000000000000000000000000000000000000000,
0b1111111111111111000000000000000000000000000000101000000000000000,
0b1111111111111111000000000000000000000000000000000000000000000000,
0b1111111111111000000000111000000000000000000000000000000000000000,
0b1111111100000000000000000000000000000000000000000000000000000010,
0b1111110000000000000000000000110000000000000000000010000110111110,
0b1100000110011101000000000000000000000000000000000000000000000000,
0b1100000000000000000000000000000000000000000000000000000000010001,
0b1011111111110111100000000000000000000000000000000000000000000000,
0b1011111101111111000000000000000000000000000000000000000000000000,
0b1011010001111110000000000000000000000000000000000000000000000000,
@@ -405,6 +398,7 @@ pub mod case_ignorable {
0b0100000000000000000000000000000000000100000000000100000010000000,
0b0011111110110000000000000000000000000000000000000000000000000000,
0b0011001111001000000000000000000000000000000000000000000000000111,
0b0011000000000000000000000000000000000000000000000000000000000000,
0b0010011001111000000000000000000000000000000000000000000000000011,
0b0010010000111111111110000000000000000000000000000000000000000000,
0b0001111111111111111111111111111111111110111111111110000011011111,
@@ -480,13 +474,13 @@ pub mod case_ignorable {
0b0000000000000000000000000000000000000000000000000010000000000001,
];
static BITSET_MAPPING: [(u8, u8); 60] = [
(0, 70), (0, 71), (0, 190), (0, 72), (0, 73), (0, 188), (0, 76), (0, 82), (0, 83), (0, 85),
(0, 91), (0, 100), (0, 102), (0, 117), (0, 118), (0, 121), (0, 66), (0, 67), (0, 69),
(1, 190), (1, 34), (1, 41), (1, 47), (1, 52), (1, 55), (1, 60), (2, 6), (2, 12), (2, 29),
(2, 33), (2, 51), (3, 84), (3, 101), (3, 109), (3, 117), (4, 181), (4, 158), (4, 144),
(5, 12), (5, 46), (5, 7), (6, 176), (6, 134), (6, 57), (7, 62), (7, 63), (8, 53), (8, 59),
(9, 19), (9, 32), (10, 32), (10, 33), (11, 184), (12, 184), (13, 33), (14, 170), (15, 1),
(16, 33), (17, 179), (18, 23),
(0, 64), (1, 70), (1, 71), (1, 190), (1, 72), (1, 73), (1, 188), (1, 76), (1, 82), (1, 83),
(1, 85), (1, 91), (1, 100), (1, 102), (1, 117), (1, 118), (1, 121), (1, 66), (1, 67),
(1, 69), (2, 160), (2, 153), (2, 147), (2, 142), (2, 139), (2, 134), (3, 6), (3, 12),
(3, 29), (3, 33), (3, 51), (4, 84), (4, 101), (4, 109), (4, 117), (5, 181), (5, 62),
(5, 63), (6, 12), (6, 46), (6, 7), (7, 176), (7, 134), (7, 57), (8, 53), (8, 59), (9, 19),
(9, 32), (10, 32), (10, 33), (11, 142), (11, 64), (12, 184), (13, 184), (14, 33), (15, 170),
(16, 1), (17, 33), (18, 179), (19, 23),
];
pub fn lookup(c: char) -> bool {
@@ -505,41 +499,40 @@ pub mod case_ignorable {
pub mod cased {
const BITSET_LAST_CHUNK_MAP: u16 = 124;
static BITSET_CHUNKS_MAP: [u8; 124] = [
4, 0, 18, 18, 6, 18, 18, 9, 5, 8, 18, 3, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 14, 15, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 16, 18, 1, 18, 10, 18, 18,
7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 13, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 2, 18, 18, 18, 18, 11, 12,
13, 15, 0, 0, 8, 0, 0, 11, 14, 10, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 2, 0, 16, 0, 12, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0,
0, 0, 0, 7, 6,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
[5, 5, 7, 5, 50, 10, 40, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[5, 42, 16, 24, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[5, 53, 38, 0, 20, 9, 5, 5, 5, 5, 4, 18, 55, 56, 57, 54],
[51, 52, 5, 29, 30, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 2, 27, 1, 5, 5, 48, 6, 5, 5, 28, 31, 58, 35, 14, 49],
[58, 34, 32, 58, 19, 11, 62, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 12, 37, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 12, 61],
[58, 58, 12, 44, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 17, 45, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 36, 58, 5, 5, 5, 58, 5, 5, 5, 5, 3, 22, 21, 23],
[58, 58, 47, 47, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 58, 58, 5, 39, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 58, 58, 16, 60, 41, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 5, 58, 58, 58, 58, 58, 58],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 46, 43, 58, 13, 5, 8, 26],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 16, 15, 5, 58],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 25, 59, 58, 58],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 33, 58, 58, 58],
[58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 15, 39, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 59, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 44, 0, 13, 39, 8, 26],
[0, 0, 0, 0, 16, 60, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 12, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 61],
[0, 0, 12, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 17, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 36, 0, 39, 39, 39, 0, 39, 39, 39, 39, 4, 22, 21, 23],
[0, 0, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 3, 27, 2, 39, 39, 49, 6, 39, 39, 28, 31, 0, 35, 14, 50],
[0, 34, 32, 0, 19, 11, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[39, 39, 7, 39, 51, 10, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[39, 43, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[39, 54, 38, 1, 20, 9, 39, 39, 39, 39, 5, 18, 56, 57, 58, 55],
[52, 53, 39, 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
];
static BITSET_CANONICAL: [u64; 39] = [
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111111111111111111111111111111111111111111111111111111111101111,
0b1111111101111111111111111111111111111111011111111111111111111111,
0b0000011111111111111111111111111000000111111111111111111111111110,
0b1111111111111111111111111111111100111111001111111111111111111111,
0b1111111111111111111111110011111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111111111111110000,
0b1111111111111111111111111111111111111111111111111111110000000011,
0b1111111111111111111111111111111111111111111111110111100011111111,
@@ -575,9 +568,9 @@ pub mod cased {
0b1110101111111111110111100110010011011111111111111111111111111111,
];
static BITSET_MAPPING: [(u8, u8); 24] = [
(0, 188), (0, 183), (0, 182), (0, 176), (0, 162), (0, 160), (0, 150), (0, 146), (0, 141),
(0, 55), (0, 50), (0, 44), (0, 43), (0, 27), (0, 17), (1, 180), (1, 30), (1, 24), (1, 18),
(2, 187), (2, 160), (2, 15), (3, 32), (4, 93),
(0, 64), (1, 188), (1, 183), (1, 182), (1, 176), (1, 162), (1, 160), (1, 150), (1, 146),
(1, 141), (1, 55), (1, 50), (1, 44), (1, 43), (1, 27), (1, 17), (2, 180), (2, 30), (2, 24),
(2, 18), (3, 160), (3, 15), (4, 32), (5, 93),
];
pub fn lookup(c: char) -> bool {
@@ -625,44 +618,37 @@ pub mod cc {
pub mod grapheme_extend {
const BITSET_LAST_CHUNK_MAP: u16 = 1792;
static BITSET_CHUNKS_MAP: [u8; 246] = [
34, 30, 41, 44, 17, 11, 0, 12, 9, 36, 34, 29, 43, 20, 13, 34, 21, 34, 34, 34, 34, 34, 26,
34, 16, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 23, 18, 14, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 31, 34, 15, 35, 27, 34, 34, 34, 7, 37, 25, 4, 10,
22, 6, 2, 8, 5, 28, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 34, 33, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 24, 34, 34, 34, 34, 34, 34, 34, 34,
34, 32, 42, 34, 34, 34, 1, 34, 34, 19, 38, 34, 34, 39, 3,
0, 6, 17, 44, 37, 31, 20, 32, 29, 4, 0, 5, 43, 40, 33, 0, 41, 0, 0, 0, 0, 0, 9, 0, 36, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 38, 34, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 7, 0, 35, 1, 10, 0, 0, 0, 27, 8, 16, 24, 30, 42, 26, 22, 28, 25, 11, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 14, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 19, 0, 0,
0, 21, 0, 0, 39, 12, 0, 0, 13, 23,
];
static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
[1, 85, 27, 86, 34, 84, 100, 88], [4, 60, 71, 120, 120, 120, 120, 120],
[7, 120, 120, 120, 33, 101, 120, 87], [13, 8, 120, 120, 8, 8, 8, 139],
[17, 0, 51, 120, 61, 111, 138, 97], [21, 120, 56, 120, 22, 141, 95, 120],
[24, 99, 55, 120, 79, 120, 120, 120], [28, 120, 120, 116, 120, 120, 120, 120],
[31, 92, 94, 120, 120, 120, 120, 120], [32, 63, 91, 120, 120, 120, 120, 120],
[36, 120, 120, 80, 34, 62, 120, 120], [48, 59, 48, 9, 19, 83, 38, 96],
[50, 104, 44, 140, 53, 30, 43, 102], [54, 120, 120, 52, 120, 120, 120, 6],
[57, 47, 20, 58, 120, 120, 120, 77], [70, 120, 120, 120, 120, 120, 115, 120],
[72, 120, 113, 120, 120, 120, 120, 120], [73, 123, 120, 14, 46, 82, 35, 37],
[78, 120, 120, 29, 110, 127, 41, 109], [81, 120, 120, 120, 5, 120, 120, 120],
[93, 26, 16, 99, 45, 64, 76, 66], [103, 120, 120, 68, 120, 120, 120, 120],
[112, 89, 25, 137, 120, 120, 23, 143], [120, 39, 115, 118, 120, 120, 120, 120],
[120, 120, 114, 120, 120, 120, 120, 120], [120, 120, 117, 120, 120, 126, 120, 120],
[120, 120, 120, 67, 120, 136, 120, 13], [120, 120, 120, 107, 120, 11, 120, 120],
[120, 120, 120, 119, 120, 120, 120, 120], [120, 120, 120, 120, 2, 65, 40, 90],
[120, 120, 120, 120, 8, 139, 120, 120], [120, 120, 120, 120, 106, 120, 120, 120],
[120, 120, 120, 120, 120, 18, 75, 120], [120, 120, 120, 120, 120, 105, 129, 108],
[120, 120, 120, 120, 120, 120, 120, 120], [120, 120, 120, 120, 120, 120, 120, 135],
[120, 120, 120, 120, 120, 124, 120, 120], [120, 120, 120, 120, 130, 120, 120, 120],
[120, 120, 120, 131, 120, 120, 120, 120], [120, 120, 120, 134, 120, 133, 120, 120],
[120, 120, 120, 142, 5, 120, 120, 120], [120, 120, 132, 120, 120, 120, 10, 98],
[120, 121, 120, 120, 120, 120, 120, 120], [122, 120, 12, 120, 49, 120, 120, 120],
[128, 69, 120, 74, 15, 125, 3, 42],
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 135], [0, 0, 0, 0, 0, 18, 75, 0],
[0, 0, 0, 0, 0, 106, 129, 109], [0, 0, 0, 0, 0, 124, 0, 0], [0, 0, 0, 0, 3, 65, 40, 90],
[0, 0, 0, 0, 99, 139, 0, 0], [0, 0, 0, 0, 107, 0, 0, 0], [0, 0, 0, 0, 130, 0, 0, 0],
[0, 0, 0, 67, 0, 136, 0, 14], [0, 0, 0, 108, 0, 12, 0, 0], [0, 0, 0, 120, 0, 0, 0, 0],
[0, 0, 0, 131, 0, 0, 0, 0], [0, 0, 0, 134, 0, 133, 0, 0], [0, 0, 0, 142, 6, 0, 0, 0],
[0, 0, 115, 0, 0, 0, 0, 0], [0, 0, 118, 0, 0, 126, 0, 0], [0, 0, 132, 0, 0, 0, 11, 98],
[0, 39, 116, 119, 0, 0, 0, 0], [0, 121, 0, 0, 0, 0, 0, 0], [2, 85, 27, 86, 34, 84, 101, 88],
[5, 60, 71, 0, 0, 0, 0, 0], [8, 0, 0, 0, 33, 102, 0, 87], [14, 99, 0, 0, 99, 99, 99, 139],
[17, 1, 51, 0, 61, 112, 138, 97], [21, 0, 56, 0, 22, 141, 95, 0],
[24, 100, 55, 0, 79, 0, 0, 0], [28, 0, 0, 117, 0, 0, 0, 0], [31, 92, 94, 0, 0, 0, 0, 0],
[32, 63, 91, 0, 0, 0, 0, 0], [36, 0, 0, 80, 34, 62, 0, 0], [48, 59, 48, 10, 19, 83, 38, 96],
[50, 105, 44, 140, 53, 30, 43, 103], [54, 0, 0, 52, 0, 0, 0, 7],
[57, 47, 20, 58, 0, 0, 0, 77], [70, 0, 0, 0, 0, 0, 116, 0], [72, 0, 114, 0, 0, 0, 0, 0],
[73, 123, 0, 15, 46, 82, 35, 37], [78, 0, 0, 29, 111, 127, 41, 110],
[81, 0, 0, 0, 6, 0, 0, 0], [93, 26, 9, 100, 45, 64, 76, 66], [104, 0, 0, 68, 0, 0, 0, 0],
[113, 89, 25, 137, 0, 0, 23, 143], [122, 0, 13, 0, 49, 0, 0, 0],
[128, 69, 0, 74, 16, 125, 4, 42],
];
static BITSET_CANONICAL: [u64; 99] = [
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1000000000000000000000000000000000000000000000000000000001111111,
0b1100000000000000000000000000000000000000000000000000000000010001,
0b0000000000011100000000000000000000000000000111000000000000000000,
@@ -671,7 +657,7 @@ pub mod grapheme_extend {
0b0000000001111111000000000000000000000000000000000000000000000000,
0b1111101111111111111111111111111111111111111111111111111111111111,
0b0000011011111111100000000000000000000000000000000000000000000000,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111111000000000000000000000000000000000000000000000000,
0b1111110000000000000000000000110000000000000000000010000110111110,
0b1011111111111111111111111111111111111111111111100000000000000000,
0b0000011111000000000000000000000000000000000000000000000000000000,
@@ -679,7 +665,6 @@ pub mod grapheme_extend {
0b1111111111111111111111111111111100000000000000000000000000000000,
0b1111111111111111111111111111101111111111111110000000000000000000,
0b1111111111111111000000000000000000000000000000100000000000000000,
0b1111111111111111000000000000000000000000000000000000000000000000,
0b1111111100000000000000000000000000000000000000000000000000000010,
0b1111100000000111110000111010000000000000000000000000000000000000,
0b1101000000000000000000000000000000000000000000000000000000000010,
@@ -764,11 +749,11 @@ pub mod grapheme_extend {
0b0000000000000000000000000000000000000000000000000000000010110110,
];
static BITSET_MAPPING: [(u8, u8); 45] = [
(0, 191), (0, 190), (0, 188), (0, 185), (0, 179), (0, 8), (0, 176), (0, 161), (0, 159),
(0, 155), (0, 154), (0, 39), (0, 140), (0, 57), (1, 165), (1, 161), (1, 160), (1, 153),
(1, 147), (1, 142), (1, 139), (2, 181), (2, 176), (2, 167), (2, 153), (2, 149), (3, 26),
(3, 32), (3, 33), (3, 42), (4, 88), (4, 109), (4, 117), (5, 19), (5, 20), (5, 32), (6, 67),
(6, 69), (7, 183), (7, 7), (8, 144), (9, 178), (10, 184), (11, 58), (12, 23),
(0, 64), (1, 191), (1, 190), (1, 188), (1, 185), (1, 179), (1, 8), (1, 176), (1, 161),
(1, 159), (1, 155), (1, 154), (1, 39), (1, 140), (1, 57), (2, 165), (2, 161), (2, 160),
(2, 153), (2, 147), (2, 142), (2, 139), (3, 176), (3, 167), (3, 153), (3, 149), (4, 26),
(4, 32), (4, 33), (4, 42), (5, 88), (5, 109), (5, 117), (6, 19), (6, 20), (6, 32), (7, 67),
(7, 69), (8, 183), (8, 7), (9, 64), (10, 178), (11, 184), (12, 58), (13, 23),
];
pub fn lookup(c: char) -> bool {
@@ -787,39 +772,40 @@ pub mod grapheme_extend {
pub mod lowercase {
const BITSET_LAST_CHUNK_MAP: u16 = 122;
static BITSET_CHUNKS_MAP: [u8; 119] = [
16, 2, 9, 9, 4, 9, 9, 15, 3, 12, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 8, 10, 9, 0, 9, 14, 9, 9, 13, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 5,
13, 16, 0, 0, 8, 0, 0, 11, 12, 9, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 3, 1, 0, 14, 0, 7, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 6,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 18] = [
[10, 55, 52, 6, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[15, 24, 20, 34, 35, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[15, 46, 1, 19, 63, 8, 54, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 5, 39, 52, 27, 14, 70, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 50, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 25],
[52, 52, 52, 52, 9, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 8, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 13, 2, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 40, 52, 52, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 56, 52, 52],
[52, 52, 52, 52, 52, 52, 52, 52, 52, 62, 38, 52, 47, 43, 45, 29],
[52, 52, 52, 57, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 65, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 52, 66, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
[52, 52, 54, 52, 2, 2, 2, 52, 20, 20, 64, 20, 32, 23, 22, 33],
[52, 69, 30, 16, 21, 48, 49, 44, 42, 7, 31, 37, 52, 26, 12, 28],
[60, 36, 51, 11, 61, 58, 17, 4, 0, 59, 71, 18, 67, 68, 3, 41],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 52, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 39, 0, 47, 43, 45, 30],
[0, 0, 0, 0, 10, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26],
[0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 54, 0, 52, 52, 52, 0, 21, 21, 64, 21, 33, 24, 23, 34],
[0, 5, 71, 0, 28, 15, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 61, 31, 17, 22, 48, 49, 44, 42, 8, 32, 38, 0, 27, 13, 29],
[11, 55, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[16, 25, 21, 35, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[16, 46, 2, 20, 63, 9, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[60, 37, 51, 12, 70, 58, 18, 1, 6, 59, 68, 19, 65, 66, 3, 41],
];
static BITSET_CANONICAL: [u64; 52] = [
0b0000111111111111111111111111110000000000000000000000000011111111,
0b1010101010101010101010101010101010101010101010101010100000000010,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111111111111000000000000000000000000001111110111111111,
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111111111111111110000000000000000000000000011111111111111111111,
0b1000000000000010000000000000000000000000000000000000000000000000,
0b1010101010101010101010101010101010101010101010101010100000000010,
0b1111111111111111111111000000000000000000000000001111110111111111,
0b0000111111111111111111111111111111111111000000000000000000000000,
0b1000000000000010000000000000000000000000000000000000000000000000,
0b0000111111111111111111111111110000000000000000000000000011111111,
0b0000000000000111111111111111111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111010101010000101,
0b1111111111111111111111111111111100000000000000000000000000000000,
0b1111111111111111111111111111110000000000000000000000000000000000,
@@ -852,7 +838,6 @@ pub mod lowercase {
0b0000000000000000000000001111111111111111110111111100000000000000,
0b0000000000000000000000000001111100000000000000000000000000000011,
0b0000000000000000000000000000000000111010101010101010101010101010,
0b0000000000000000000000000000000000011111111111110000000000000000,
0b0000000000000000000000000000000000000000111110000000000001111111,
0b0000000000000000000000000000000000000000000000000000101111110111,
0b1001001111111010101010101010101010101010101010101010101010101010,
@@ -867,9 +852,9 @@ pub mod lowercase {
0b1110101111000000000000000000000000001111111111111111111111111100,
];
static BITSET_MAPPING: [(u8, u8); 20] = [
(0, 188), (0, 184), (0, 179), (0, 172), (0, 161), (0, 146), (0, 144), (0, 140), (0, 136),
(0, 132), (1, 146), (1, 144), (1, 83), (2, 160), (2, 141), (3, 12), (3, 6), (4, 77),
(5, 187), (6, 78),
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
(6, 132), (7, 93),
];
pub fn lookup(c: char) -> bool {
@@ -888,45 +873,36 @@ pub mod lowercase {
pub mod n {
const BITSET_LAST_CHUNK_MAP: u16 = 253;
static BITSET_CHUNKS_MAP: [u8; 250] = [
45, 19, 19, 39, 23, 40, 6, 37, 33, 17, 19, 12, 42, 32, 41, 19, 8, 19, 2, 16, 19, 19, 13,
19, 1, 43, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 44, 46, 34, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 25, 15, 14, 31, 19, 4, 7, 11, 5, 9, 26, 36,
35, 28, 19, 10, 20, 19, 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 38, 19, 30, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 27, 19, 18, 19, 19, 19, 19, 22, 26, 19, 19, 29, 19, 3, 19, 24, 21,
45, 0, 0, 37, 7, 38, 26, 35, 31, 5, 0, 12, 42, 21, 39, 0, 28, 0, 22, 4, 0, 0, 13, 0, 40,
44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 46,
32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 8, 14, 20, 0, 24, 27, 11, 25, 29, 15, 34, 33, 17, 0,
30, 2, 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16, 0, 1, 0, 0, 0, 0, 6, 15, 0, 0, 18, 0, 23, 0, 9, 3,
];
static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
[12, 52, 44, 44, 44, 44, 44, 44], [27, 44, 44, 44, 44, 44, 67, 44],
[44, 15, 51, 16, 44, 44, 44, 44], [44, 17, 34, 44, 23, 44, 44, 44],
[44, 18, 11, 4, 62, 44, 61, 2], [44, 19, 44, 44, 56, 66, 44, 46],
[44, 20, 44, 58, 44, 31, 44, 58], [44, 22, 72, 65, 44, 43, 53, 44],
[44, 29, 45, 44, 44, 14, 42, 44], [44, 36, 44, 59, 1, 44, 44, 33],
[44, 37, 44, 44, 44, 55, 57, 44], [44, 44, 44, 3, 59, 44, 44, 44],
[44, 44, 44, 10, 44, 44, 44, 8], [44, 44, 44, 24, 44, 44, 44, 44],
[44, 44, 44, 25, 5, 41, 44, 64], [44, 44, 44, 44, 9, 0, 60, 44],
[44, 44, 44, 44, 44, 1, 48, 44], [44, 44, 44, 44, 44, 7, 44, 44],
[44, 44, 44, 44, 44, 44, 44, 13], [44, 44, 44, 44, 44, 44, 44, 44],
[44, 44, 44, 44, 44, 44, 44, 49], [44, 44, 44, 44, 44, 44, 44, 59],
[44, 44, 44, 44, 44, 45, 44, 44], [44, 44, 44, 44, 44, 58, 44, 30],
[44, 44, 44, 44, 47, 44, 44, 44], [44, 44, 44, 44, 55, 44, 44, 44],
[44, 44, 44, 59, 44, 44, 44, 44], [44, 44, 44, 69, 44, 68, 44, 44],
[44, 44, 44, 71, 44, 55, 44, 44], [44, 44, 44, 73, 44, 55, 44, 44],
[44, 44, 50, 44, 44, 44, 44, 44], [44, 44, 57, 44, 44, 44, 44, 44],
[44, 44, 70, 44, 44, 55, 59, 44], [44, 45, 55, 44, 44, 44, 44, 44],
[44, 55, 44, 44, 44, 44, 44, 59], [44, 55, 44, 45, 26, 44, 44, 44],
[44, 55, 44, 55, 44, 44, 44, 44], [44, 55, 44, 55, 69, 44, 44, 44],
[44, 57, 44, 44, 44, 38, 44, 44], [44, 57, 44, 59, 44, 44, 44, 45],
[44, 58, 44, 58, 44, 32, 44, 35], [44, 70, 44, 44, 44, 44, 44, 44],
[55, 44, 44, 44, 44, 54, 44, 40], [57, 39, 6, 44, 44, 44, 44, 44],
[57, 44, 44, 58, 44, 44, 44, 44], [59, 44, 21, 44, 44, 44, 44, 44],
[63, 44, 44, 55, 45, 44, 44, 28],
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 13], [0, 0, 0, 0, 0, 0, 0, 49],
[0, 0, 0, 0, 0, 0, 0, 59], [0, 0, 0, 0, 0, 2, 48, 0], [0, 0, 0, 0, 0, 8, 0, 0],
[0, 0, 0, 0, 0, 45, 0, 0], [0, 0, 0, 0, 0, 58, 0, 30], [0, 0, 0, 0, 10, 1, 60, 0],
[0, 0, 0, 0, 47, 0, 0, 0], [0, 0, 0, 0, 55, 0, 0, 0], [0, 0, 0, 4, 59, 0, 0, 0],
[0, 0, 0, 11, 0, 0, 0, 9], [0, 0, 0, 24, 0, 0, 0, 0], [0, 0, 0, 25, 6, 41, 0, 64],
[0, 0, 0, 59, 0, 0, 0, 0], [0, 0, 0, 69, 0, 68, 0, 0], [0, 0, 0, 71, 0, 55, 0, 0],
[0, 0, 0, 73, 0, 55, 0, 0], [0, 0, 50, 0, 0, 0, 0, 0], [0, 0, 57, 0, 0, 0, 0, 0],
[0, 0, 70, 0, 0, 55, 59, 0], [0, 15, 51, 16, 0, 0, 0, 0], [0, 17, 34, 0, 23, 0, 0, 0],
[0, 18, 12, 5, 62, 0, 61, 3], [0, 19, 0, 0, 56, 66, 0, 46], [0, 20, 0, 58, 0, 31, 0, 58],
[0, 22, 72, 65, 0, 43, 53, 0], [0, 29, 45, 0, 0, 14, 42, 0], [0, 36, 0, 59, 2, 0, 0, 33],
[0, 37, 0, 0, 0, 55, 57, 0], [0, 45, 55, 0, 0, 0, 0, 0], [0, 55, 0, 0, 0, 0, 0, 59],
[0, 55, 0, 45, 26, 0, 0, 0], [0, 55, 0, 55, 0, 0, 0, 0], [0, 55, 0, 55, 69, 0, 0, 0],
[0, 57, 0, 0, 0, 38, 0, 0], [0, 57, 0, 59, 0, 0, 0, 45], [0, 58, 0, 58, 0, 32, 0, 35],
[0, 70, 0, 0, 0, 0, 0, 0], [27, 0, 0, 0, 0, 0, 67, 0], [44, 52, 0, 0, 0, 0, 0, 0],
[55, 0, 0, 0, 0, 54, 0, 40], [57, 0, 0, 58, 0, 0, 0, 0], [57, 39, 7, 0, 0, 0, 0, 0],
[59, 0, 21, 0, 0, 0, 0, 0], [63, 0, 0, 55, 45, 0, 0, 28],
];
static BITSET_CANONICAL: [u64; 44] = [
0b0000000000000000000000000000000000000000000000000000000000000000,
0b0000000111111111111111111111111111111111111111111111111111111111,
0b1111111111000000000000000000000000000000000000000000000000000000,
0b1111111111111111111111111111111111111111111111001111111111111111,
@@ -939,7 +915,6 @@ pub mod n {
0b0000000000001111111111111111111111111111111111111111111110000000,
0b0000000000000001110000000000000000000000000000000000000000000000,
0b0000000000000000111111111000000000000000000000000000000000000000,
0b1111111111111111111111111111111111111111111111111111111111111111,
0b1111111111111111111111111111111111111111111111111100000000000000,
0b1111111111111111111111111111111111111111111111110000000000000000,
0b1111111111111111111111111111111100000000000000000000000000000000,
@@ -973,10 +948,10 @@ pub mod n {
0b1111111100000000000000000000000011111111000000000000000000000000,
];
static BITSET_MAPPING: [(u8, u8); 30] = [
(0, 185), (0, 175), (0, 76), (0, 172), (0, 165), (0, 164), (0, 162), (0, 157), (0, 138),
(0, 112), (1, 16), (1, 26), (1, 39), (1, 42), (1, 48), (1, 58), (2, 122), (2, 108), (3, 28),
(3, 54), (4, 22), (4, 48), (5, 49), (5, 50), (6, 47), (7, 55), (8, 32), (9, 108), (10, 47),
(11, 32),
(0, 64), (1, 175), (1, 76), (1, 172), (1, 165), (1, 164), (1, 162), (1, 157), (1, 138),
(1, 112), (2, 16), (2, 26), (2, 39), (2, 42), (2, 48), (2, 58), (3, 122), (3, 108), (4, 28),
(4, 54), (5, 22), (5, 48), (6, 49), (6, 50), (7, 47), (8, 55), (9, 32), (10, 108), (11, 47),
(12, 32),
];
pub fn lookup(c: char) -> bool {

View File

@@ -301,7 +301,21 @@ impl Canonicalized {
Canonicalized(usize),
}
while let Some((&to, _)) = mappings.iter().max_by_key(|m| m.1.len()) {
// Map 0 first, so that it is the first canonical word.
// This is realistically not inefficient because 0 is not mapped to by
// anything else (a shift pattern could do it, but would be wasteful).
//
// However, 0s are quite common in the overall dataset, and it is quite
// wasteful to have to go through a mapping function to determine that
// we have a zero.
//
// FIXME: Experiment with choosing most common words in overall data set
// for canonical when possible.
while let Some((&to, _)) = mappings
.iter()
.find(|(&to, _)| to == 0)
.or_else(|| mappings.iter().max_by_key(|m| m.1.len()))
{
// Get the mapping with the most entries. Currently, no mapping can
// only exist transitively (i.e., there is no A, B, C such that A
// does not map to C and but A maps to B maps to C), so this is