Deduplicate test and primary range_search definitions

This ensures that what we test is what we get for final results as well.
This commit is contained in:
Mark Rousskov
2020-03-21 12:20:18 -04:00
parent 7b29b70d6e
commit 5f71d98f90
4 changed files with 103 additions and 101 deletions

View File

@@ -181,7 +181,10 @@ fn main() {
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
);
table_file.push_str("use super::range_search;\n\n");
// Include the range search function
table_file.push('\n');
table_file.push_str(include_str!("range_search.rs"));
table_file.push('\n');
table_file.push_str(&version());
@@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
s.push_str(&format!("#[path = \"{}\"]\n", data_path));
s.push_str("mod unicode_data;\n\n");
s.push_str(
"
#[inline(always)]
fn range_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
const CANONICAL: usize,
const CANONICALIZED: usize,
>(
needle: u32,
chunk_idx_map: &[u8; N],
(last_chunk_idx, last_chunk_mapping): (u16, u8),
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
bitset_canonical: &[u64; CANONICAL],
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
) -> bool {
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if chunk_map_idx >= N {
if chunk_map_idx == last_chunk_idx as usize {
last_chunk_mapping
} else {
return false;
}
} else {
chunk_idx_map[chunk_map_idx]
};
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
let word = if idx < CANONICAL {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
let mut word = bitset_canonical[real_idx as usize];
let should_invert = mapping & (1 << 6) != 0;
if should_invert {
word = !word;
}
// Lower 6 bits
let quantity = mapping & ((1 << 6) - 1);
if mapping & (1 << 7) != 0 {
// shift
word >>= quantity as u64;
} else {
word = word.rotate_left(quantity as u32);
}
word
};
(word & (1 << (needle % 64) as u64)) != 0
}
",
);
s.push_str("\nfn main() {\n");
for (property, ranges) in ranges {

View File

@@ -0,0 +1,49 @@
#[inline(always)]
fn range_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
const CANONICAL: usize,
const CANONICALIZED: usize,
>(
needle: u32,
chunk_idx_map: &[u8; N],
(last_chunk_idx, last_chunk_mapping): (u16, u8),
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
bitset_canonical: &[u64; CANONICAL],
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
) -> bool {
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if chunk_map_idx >= N {
if chunk_map_idx == last_chunk_idx as usize {
last_chunk_mapping
} else {
return false;
}
} else {
chunk_idx_map[chunk_map_idx]
};
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
let word = if idx < CANONICAL {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
let mut word = bitset_canonical[real_idx as usize];
let should_invert = mapping & (1 << 6) != 0;
if should_invert {
word = !word;
}
// Lower 6 bits
let quantity = mapping & ((1 << 6) - 1);
if mapping & (1 << 7) != 0 {
// shift
word >>= quantity as u64;
} else {
word = word.rotate_left(quantity as u32);
}
word
};
(word & (1 << (needle % 64) as u64)) != 0
}