Remove separate encoding for a single nonzero-mapping byte

In practice, for the two data sets that still use the bitset encoding (uppercase
and lowercase) this is not a significant win, so just drop it entirely. It costs
us about 5 bytes, and the complexity is nontrivial.
This commit is contained in:
Mark Rousskov
2020-03-27 18:01:14 -04:00
parent 9c1ceece20
commit b6bc906004
3 changed files with 9 additions and 46 deletions

View File

@@ -8,7 +8,6 @@ fn bitset_search<
>(
needle: u32,
chunk_idx_map: &[u8; N],
last_chunk_idx: u16,
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
bitset_canonical: &[u64; CANONICAL],
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
@@ -16,12 +15,8 @@ fn bitset_search<
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
// The last entry of `chunk_idx_map` actually should be at `last_chunk_idx`,
// so we need to remap it
let chunk_idx = if chunk_map_idx < (chunk_idx_map.len() - 1) {
chunk_idx_map[chunk_map_idx]
} else if chunk_map_idx == last_chunk_idx as usize {
chunk_idx_map[chunk_idx_map.len() - 1]
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
v
} else {
return false;
};

View File

@@ -139,7 +139,6 @@ impl RawEmitter {
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
writeln!(&mut self.file, " c as u32,").unwrap();
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
writeln!(&mut self.file, " BITSET_LAST_CHUNK_MAP,").unwrap();
writeln!(&mut self.file, " &BITSET_INDEX_CHUNKS,").unwrap();
writeln!(&mut self.file, " &BITSET_CANONICAL,").unwrap();
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
@@ -170,29 +169,6 @@ impl RawEmitter {
chunk_indices.push(chunk_map[chunk]);
}
// If one of the chunks has all of the entries point to the bitset
// word filled with zeros, then pop those off the end -- we know they
// are useless.
let zero_chunk_idx = chunks.iter().position(|chunk| chunk.iter().all(|e| *e == zero_at));
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
chunk_indices.pop();
}
// We do not count the LAST_CHUNK_MAP as adding bytes because it's a
// small constant whose values are inlined directly into the instruction
// stream.
writeln!(
&mut self.file,
"const BITSET_LAST_CHUNK_MAP: u16 = {};",
chunk_indices.len() - 1,
)
.unwrap();
let nonzero = chunk_indices.pop().unwrap();
// Try to pop again, now that we've recorded a non-zero pointing index
// into the LAST_CHUNK_MAP.
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
chunk_indices.pop();
}
chunk_indices.push(nonzero);
writeln!(
&mut self.file,
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",