Pre-pop zero chunks before mapping LAST_CHUNK_MAP

This avoids wasting a small amount of space for some of the data sets.

The chunk resizing is caused by but not directly related to changes in this
commit.

Alphabetic     : 3036 bytes
Case_Ignorable : 2133 bytes    (- 3 bytes)
Cased          : 934 bytes
Cc             : 32 bytes
Grapheme_Extend: 1760 bytes    (-14 bytes)
Lowercase      : 985 bytes
N              : 1220 bytes    (- 5 bytes)
Uppercase      : 934 bytes
White_Space    : 97 bytes
Total table sizes: 11131 bytes (-22 bytes)
This commit is contained in:
Mark Rousskov
2020-03-20 18:38:08 -04:00
parent 580a6342ef
commit 6c7691a37b
2 changed files with 88 additions and 96 deletions

View File

@@ -134,49 +134,41 @@ pub mod alphabetic {
#[rustfmt::skip]
pub mod case_ignorable {
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 33);
static BITSET_CHUNKS_MAP: [u8; 125] = [
25, 14, 21, 30, 28, 4, 17, 23, 22, 0, 0, 16, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 13, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 3, 6, 9, 0, 7, 11, 32, 31, 26, 29, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0,
10, 0, 8, 0, 19, 0, 12, 0, 1,
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 51);
static BITSET_CHUNKS_MAP: [u8; 250] = [
36, 19, 16, 26, 29, 40, 47, 38, 42, 5, 0, 9, 23, 25, 34, 3, 30, 0, 0, 0, 0, 0, 21, 31, 39,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 15, 22, 28,
33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 32, 1, 11, 0, 0, 0, 44, 8, 18, 50, 41, 49, 45, 37, 43,
46, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6, 20, 0, 0, 0, 48, 0, 0, 27, 12, 0, 0, 10, 0, 0, 0, 0, 2,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 34] = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 166],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 47, 57],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 173, 3],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 90, 136, 38],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 104, 7, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 78, 27, 0, 148, 138, 81, 44, 119],
[0, 0, 0, 0, 0, 0, 0, 0, 154, 0, 0, 58, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 167, 99, 77, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 48, 0, 116, 0, 0],
[0, 0, 0, 0, 0, 172, 70, 0, 0, 8, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 67, 0, 0, 24, 0, 0],
[0, 0, 0, 29, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 135, 0, 0, 0, 0, 16, 162, 46, 86, 51, 80, 13, 111],
[0, 0, 12, 0, 0, 43, 163, 92, 35, 82, 0, 71, 175, 14, 83, 131],
[0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 133, 0, 87, 0, 150, 0, 178, 75, 0, 0, 0, 0, 0, 0, 0],
[20, 5, 61, 0, 120, 0, 0, 0, 32, 156, 176, 1, 126, 91, 69, 88],
[26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[62, 0, 0, 0, 137, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0],
[66, 0, 0, 152, 72, 25, 134, 59, 102, 124, 165, 101, 0, 64, 0, 68],
[73, 33, 0, 181, 125, 85, 122, 139, 123, 100, 123, 169, 155, 54, 4, 18],
[74, 151, 36, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[106, 135, 0, 112, 177, 107, 180, 168, 0, 0, 0, 0, 0, 0, 157, 142],
[109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[113, 50, 108, 0, 0, 0, 0, 0, 0, 0, 174, 182, 182, 114, 10, 0],
[115, 0, 0, 0, 141, 5, 0, 49, 145, 34, 31, 0, 0, 0, 0, 0],
[118, 0, 42, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[143, 95, 37, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0],
[161, 0, 103, 0, 160, 11, 30, 0, 0, 0, 0, 93, 0, 0, 0, 0],
[164, 55, 155, 53, 127, 52, 2, 28, 117, 21, 128, 19, 110, 147, 129, 9],
[170, 41, 153, 6, 0, 0, 159, 39, 158, 1, 105, 0, 65, 0, 0, 0],
[171, 149, 132, 17, 98, 89, 146, 23, 140, 0, 0, 63, 127, 97, 0, 0],
[179, 182, 0, 0, 182, 182, 182, 79, 0, 0, 0, 0, 0, 0, 0, 0],
static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130], [0, 0, 0, 0, 0, 0, 0, 166],
[0, 0, 0, 0, 0, 0, 157, 142], [0, 0, 0, 0, 0, 22, 47, 57], [0, 0, 0, 0, 0, 45, 0, 0],
[0, 0, 0, 0, 0, 172, 70, 0], [0, 0, 0, 0, 40, 0, 173, 3], [0, 0, 0, 0, 60, 0, 0, 0],
[0, 0, 0, 0, 94, 90, 136, 38], [0, 0, 0, 29, 0, 15, 0, 0], [0, 0, 0, 48, 0, 116, 0, 0],
[0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 93, 0, 0, 0, 0], [0, 0, 0, 96, 104, 7, 0, 0],
[0, 0, 0, 135, 0, 0, 0, 0], [0, 0, 12, 0, 0, 43, 163, 92], [0, 0, 56, 0, 0, 0, 0, 0],
[0, 0, 67, 0, 0, 24, 0, 0], [0, 0, 174, 182, 182, 114, 10, 0], [0, 8, 0, 0, 0, 0, 0, 0],
[0, 133, 0, 87, 0, 150, 0, 178], [16, 162, 46, 86, 51, 80, 13, 111],
[20, 5, 61, 0, 120, 0, 0, 0], [26, 0, 0, 0, 0, 0, 0, 0], [32, 156, 176, 1, 126, 91, 69, 88],
[35, 82, 0, 71, 175, 14, 83, 131], [62, 0, 0, 0, 137, 0, 0, 0],
[66, 0, 0, 152, 72, 25, 134, 59], [73, 33, 0, 181, 125, 85, 122, 139],
[74, 151, 36, 84, 0, 0, 0, 0], [75, 0, 0, 0, 0, 0, 0, 0],
[78, 27, 0, 148, 138, 81, 44, 119], [102, 124, 165, 101, 0, 64, 0, 68],
[106, 135, 0, 112, 177, 107, 180, 168], [109, 0, 0, 0, 0, 0, 0, 0],
[113, 50, 108, 0, 0, 0, 0, 0], [115, 0, 0, 0, 141, 5, 0, 49],
[117, 21, 128, 19, 110, 147, 129, 9], [118, 0, 42, 144, 0, 0, 0, 0],
[123, 100, 123, 169, 155, 54, 4, 18], [140, 0, 0, 63, 127, 97, 0, 0],
[143, 95, 37, 121, 0, 0, 0, 0], [145, 34, 31, 0, 0, 0, 0, 0], [154, 0, 0, 58, 0, 0, 0, 0],
[158, 1, 105, 0, 65, 0, 0, 0], [161, 0, 103, 0, 160, 11, 30, 0],
[164, 55, 155, 53, 127, 52, 2, 28], [167, 99, 77, 0, 0, 0, 0, 0],
[170, 41, 153, 6, 0, 0, 159, 39], [171, 149, 132, 17, 98, 89, 146, 23],
[179, 182, 0, 0, 182, 182, 182, 79],
];
static BITSET: [u64; 183] = [
0, 1, 2, 3, 4, 8, 13, 15, 28, 64, 176, 191, 1016, 1792, 2047, 4080, 4096, 8192, 8193,
@@ -288,11 +280,12 @@ pub mod cased {
#[rustfmt::skip]
pub mod cc {
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (0, 0);
static BITSET_CHUNKS_MAP: [u8; 0] = [
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (2, 1);
static BITSET_CHUNKS_MAP: [u8; 2] = [
1, 2,
];
static BITSET_INDEX_CHUNKS: [[u8; 5]; 1] = [
[1, 2, 1, 0, 0],
static BITSET_INDEX_CHUNKS: [[u8; 1]; 3] = [
[0], [1], [2],
];
static BITSET: [u64; 3] = [
0, 4294967295, 9223372036854775808,
@@ -311,46 +304,37 @@ pub mod cc {
#[rustfmt::skip]
pub mod grapheme_extend {
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 30);
static BITSET_CHUNKS_MAP: [u8; 123] = [
4, 15, 21, 27, 25, 3, 18, 23, 17, 0, 0, 14, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 2, 7, 10, 0, 8, 12, 29, 28, 24, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0,
11, 0, 9, 0, 19, 0, 13,
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 44);
static BITSET_CHUNKS_MAP: [u8; 245] = [
0, 8, 15, 22, 26, 33, 40, 32, 35, 3, 0, 7, 21, 23, 30, 0, 20, 0, 0, 0, 0, 0, 12, 0, 27, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 25, 29, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 5, 0, 28, 1, 10, 0, 0, 0, 37, 6, 17, 43, 34, 42, 38, 31, 36, 39, 13, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 14, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 18, 0, 0,
0, 41, 0, 0, 24, 11, 0, 0, 9,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 31] = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 20, 46],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 74, 106, 31],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 143, 66, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 87, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 37, 70, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 37, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0, 48, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 134, 82, 64, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 39, 0, 94, 0, 0],
[0, 0, 0, 0, 0, 133, 58, 0, 0, 5, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 55, 0, 0, 18, 0, 0],
[0, 0, 0, 21, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 71, 0, 118, 0, 142, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 9, 0, 0, 0, 129, 7, 26, 67, 0, 59, 140, 11, 68, 104],
[0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[12, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[13, 0, 50, 0, 96, 0, 0, 0, 27, 123, 139, 1, 100, 75, 57, 72],
[51, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0],
[54, 0, 0, 120, 61, 19, 105, 47, 85, 98, 131, 84, 0, 0, 0, 56],
[60, 28, 0, 141, 99, 45, 111, 109, 97, 83, 97, 136, 132, 44, 108, 22],
[63, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[89, 0, 0, 91, 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0],
[93, 0, 0, 0, 113, 3, 0, 40, 115, 29, 24, 0, 0, 0, 0, 0],
[114, 78, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0],
[128, 0, 86, 0, 127, 8, 23, 0, 0, 0, 0, 76, 0, 0, 0, 0],
[130, 42, 122, 41, 112, 43, 2, 36, 95, 15, 101, 14, 90, 117, 102, 6],
[137, 34, 124, 4, 0, 0, 126, 32, 125, 1, 88, 0, 53, 0, 0, 0],
[138, 119, 92, 0, 81, 73, 116, 17, 110, 0, 0, 52, 112, 80, 0, 0],
[142, 143, 0, 0, 143, 143, 143, 66, 0, 0, 0, 0, 0, 0, 0, 0],
static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 103], [0, 0, 0, 0, 0, 16, 20, 46],
[0, 0, 0, 0, 0, 38, 0, 0], [0, 0, 0, 0, 0, 133, 58, 0], [0, 0, 0, 0, 33, 0, 0, 0],
[0, 0, 0, 0, 49, 0, 0, 0], [0, 0, 0, 0, 77, 74, 106, 31], [0, 0, 0, 0, 143, 66, 0, 0],
[0, 0, 0, 21, 0, 10, 0, 0], [0, 0, 0, 39, 0, 94, 0, 0], [0, 0, 0, 62, 0, 0, 0, 0],
[0, 0, 0, 71, 0, 118, 0, 142], [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 79, 87, 0, 0, 0],
[0, 0, 9, 0, 0, 0, 129, 7], [0, 0, 35, 0, 0, 0, 0, 0], [0, 0, 55, 0, 0, 18, 0, 0],
[0, 5, 0, 0, 0, 0, 0, 0], [0, 107, 37, 70, 0, 0, 0, 0], [12, 0, 0, 69, 0, 0, 0, 0],
[13, 0, 50, 0, 96, 0, 0, 0], [26, 67, 0, 59, 140, 11, 68, 104],
[27, 123, 139, 1, 100, 75, 57, 72], [51, 0, 0, 0, 87, 0, 0, 0],
[54, 0, 0, 120, 61, 19, 105, 47], [60, 28, 0, 141, 99, 45, 111, 109],
[63, 0, 25, 0, 0, 0, 0, 0], [65, 0, 0, 0, 0, 0, 37, 0], [85, 98, 131, 84, 0, 0, 0, 56],
[89, 0, 0, 91, 0, 0, 0, 135], [93, 0, 0, 0, 113, 3, 0, 40],
[95, 15, 101, 14, 90, 117, 102, 6], [97, 83, 97, 136, 132, 44, 108, 22],
[110, 0, 0, 52, 112, 80, 0, 0], [114, 78, 30, 0, 0, 0, 0, 0], [115, 29, 24, 0, 0, 0, 0, 0],
[121, 0, 0, 48, 0, 0, 0, 0], [125, 1, 88, 0, 53, 0, 0, 0], [128, 0, 86, 0, 127, 8, 23, 0],
[130, 42, 122, 41, 112, 43, 2, 36], [134, 82, 64, 0, 0, 0, 0, 0],
[137, 34, 124, 4, 0, 0, 126, 32], [138, 119, 92, 0, 81, 73, 116, 17],
[142, 143, 0, 0, 143, 143, 143, 66],
];
static BITSET: [u64; 144] = [
0, 1, 2, 8, 13, 28, 64, 182, 191, 1016, 2032, 2047, 4096, 14336, 16128, 32640, 32768,
@@ -454,8 +438,8 @@ pub mod lowercase {
#[rustfmt::skip]
pub mod n {
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (254, 0);
static BITSET_CHUNKS_MAP: [u8; 254] = [
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 2);
static BITSET_CHUNKS_MAP: [u8; 249] = [
44, 0, 0, 29, 5, 31, 35, 26, 22, 6, 0, 12, 40, 20, 27, 0, 33, 0, 39, 7, 0, 0, 17, 0, 45,
42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 43,
@@ -464,7 +448,7 @@ pub mod n {
30, 1, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8, 0, 0, 0, 0, 2,
14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8,
];
static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 11], [0, 0, 0, 0, 0, 0, 0, 47],

View File

@@ -67,7 +67,7 @@ impl RawEmitter {
panic!("cannot pack {} into 8 bits", unique_words.len());
}
// needed for the chunk mapping to work
assert_eq!(unique_words[0], 0, "first word is all zeros");
assert_eq!(unique_words[0], 0, "has a zero word");
let word_indices = unique_words
.iter()
@@ -80,7 +80,7 @@ impl RawEmitter {
let mut best = None;
for length in 1..=64 {
let mut temp = self.clone();
temp.emit_chunk_map(&compressed_words, length);
temp.emit_chunk_map(word_indices[&0], &compressed_words, length);
if let Some((_, size)) = best {
if temp.bytes_used < size {
best = Some((length, temp.bytes_used));
@@ -89,7 +89,7 @@ impl RawEmitter {
best = Some((length, temp.bytes_used));
}
}
self.emit_chunk_map(&compressed_words, best.unwrap().0);
self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0);
writeln!(
&mut self.file,
@@ -101,12 +101,12 @@ impl RawEmitter {
self.bytes_used += 8 * unique_words.len();
}
fn emit_chunk_map(&mut self, compressed_words: &[u8], chunk_length: usize) {
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
let mut compressed_words = compressed_words.to_vec();
for _ in 0..(chunk_length - (compressed_words.len() % chunk_length)) {
// pad out bitset index with zero words so we have all chunks of
// chunkchunk_length
compressed_words.push(0);
compressed_words.push(zero_at);
}
let mut chunks = BTreeSet::new();
@@ -123,6 +123,14 @@ impl RawEmitter {
for chunk in compressed_words.chunks(chunk_length) {
chunk_indices.push(chunk_map[chunk]);
}
// If one of the chunks has all of the entries point to the bitset
// word filled with zeros, then pop those off the end -- we know they
// are useless.
let zero_chunk_idx = chunks.iter().position(|chunk| chunk.iter().all(|e| *e == zero_at));
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
chunk_indices.pop();
}
writeln!(
&mut self.file,
"static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
@@ -131,9 +139,9 @@ impl RawEmitter {
)
.unwrap();
self.bytes_used += 3;
// Strip out the empty pieces, presuming our above pop() made us now
// have some trailing zeros.
while let Some(0) = chunk_indices.last() {
// Try to pop again, now that we've recorded a non-zero pointing index
// into the LAST_CHUNK_MAP.
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
chunk_indices.pop();
}
writeln!(