Add a right shift mapping

This saves less bytes - by far - and is likely not the best operator to choose.
But for now, it works -- a better choice may arise later.

Alphabetic     : 2538 bytes   (- 84 bytes)
Case_Ignorable : 1773 bytes   (- 30 bytes)
Cased          : 790 bytes    (- 18 bytes)
Cc             : 26 bytes     (-  6 bytes)
Grapheme_Extend: 1490 bytes   (- 18 bytes)
Lowercase      : 865 bytes    (- 36 bytes)
N              : 1040 bytes   (- 24 bytes)
Uppercase      : 778 bytes    (- 60 bytes)
White_Space    : 85 bytes     (-  6 bytes)
Total table sizes: 9385 bytes (-282 bytes)
This commit is contained in:
Mark Rousskov
2020-03-21 12:11:47 -04:00
parent b0e121d9d5
commit 7b29b70d6e
4 changed files with 828 additions and 847 deletions

View File

@@ -286,13 +286,18 @@ fn range_search<
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
let mut word = bitset_canonical[real_idx as usize];
let should_invert = mapping & (1 << 7) != 0;
let should_invert = mapping & (1 << 6) != 0;
if should_invert {
word = !word;
}
// Unset the inversion bit
let rotate_by = mapping & !(1 << 7);
word = word.rotate_left(rotate_by as u32);
// Lower 6 bits
let quantity = mapping & ((1 << 6) - 1);
if mapping & (1 << 7) != 0 {
// shift
word >>= quantity as u64;
} else {
word = word.rotate_left(quantity as u32);
}
word
};
(word & (1 << (needle % 64) as u64)) != 0

View File

@@ -233,6 +233,7 @@ impl Canonicalized {
Rotate(u32),
Invert,
RotateAndInvert(u32),
ShiftRight(u32),
}
// key is the word being mapped to
@@ -270,6 +271,18 @@ impl Canonicalized {
continue 'b;
}
}
// All possible shifts
for shift_by in 1..64 {
if a == (b >> shift_by) {
mappings
.entry(b)
.or_default()
.push((a, Mapping::ShiftRight(shift_by as u32)));
// We're not interested in further mappings between a and b
continue 'b;
}
}
}
}
// These are the bitset words which will be represented "raw" (as a u64)
@@ -384,6 +397,8 @@ impl Canonicalized {
assert!(distinct_indices.insert(idx));
}
const LOWER_6: u32 = (1 << 6) - 1;
let canonicalized_words = canonicalized_words
.into_iter()
.map(|v| {
@@ -391,14 +406,18 @@ impl Canonicalized {
u8::try_from(v.0).unwrap(),
match v.1 {
Mapping::RotateAndInvert(amount) => {
assert!(amount < (1 << 7));
1 << 7 | (amount as u8)
assert_eq!(amount, amount & LOWER_6);
1 << 6 | (amount as u8)
}
Mapping::Rotate(amount) => {
assert!(amount < (1 << 7));
assert_eq!(amount, amount & LOWER_6);
amount as u8
}
Mapping::Invert => 1 << 7,
Mapping::Invert => 1 << 6,
Mapping::ShiftRight(shift_by) => {
assert_eq!(shift_by, shift_by & LOWER_6);
1 << 7 | (shift_by as u8)
}
},
)
})