Add a right shift mapping
This saves less bytes - by far - and is likely not the best operator to choose. But for now, it works -- a better choice may arise later. Alphabetic : 2538 bytes (- 84 bytes) Case_Ignorable : 1773 bytes (- 30 bytes) Cased : 790 bytes (- 18 bytes) Cc : 26 bytes (- 6 bytes) Grapheme_Extend: 1490 bytes (- 18 bytes) Lowercase : 865 bytes (- 36 bytes) N : 1040 bytes (- 24 bytes) Uppercase : 778 bytes (- 60 bytes) White_Space : 85 bytes (- 6 bytes) Total table sizes: 9385 bytes (-282 bytes)
This commit is contained in:
@@ -286,13 +286,18 @@ fn range_search<
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 7) != 0;
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Unset the inversion bit
|
||||
let rotate_by = mapping & !(1 << 7);
|
||||
word = word.rotate_left(rotate_by as u32);
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
|
||||
@@ -233,6 +233,7 @@ impl Canonicalized {
|
||||
Rotate(u32),
|
||||
Invert,
|
||||
RotateAndInvert(u32),
|
||||
ShiftRight(u32),
|
||||
}
|
||||
|
||||
// key is the word being mapped to
|
||||
@@ -270,6 +271,18 @@ impl Canonicalized {
|
||||
continue 'b;
|
||||
}
|
||||
}
|
||||
|
||||
// All possible shifts
|
||||
for shift_by in 1..64 {
|
||||
if a == (b >> shift_by) {
|
||||
mappings
|
||||
.entry(b)
|
||||
.or_default()
|
||||
.push((a, Mapping::ShiftRight(shift_by as u32)));
|
||||
// We're not interested in further mappings between a and b
|
||||
continue 'b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// These are the bitset words which will be represented "raw" (as a u64)
|
||||
@@ -384,6 +397,8 @@ impl Canonicalized {
|
||||
assert!(distinct_indices.insert(idx));
|
||||
}
|
||||
|
||||
const LOWER_6: u32 = (1 << 6) - 1;
|
||||
|
||||
let canonicalized_words = canonicalized_words
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
@@ -391,14 +406,18 @@ impl Canonicalized {
|
||||
u8::try_from(v.0).unwrap(),
|
||||
match v.1 {
|
||||
Mapping::RotateAndInvert(amount) => {
|
||||
assert!(amount < (1 << 7));
|
||||
1 << 7 | (amount as u8)
|
||||
assert_eq!(amount, amount & LOWER_6);
|
||||
1 << 6 | (amount as u8)
|
||||
}
|
||||
Mapping::Rotate(amount) => {
|
||||
assert!(amount < (1 << 7));
|
||||
assert_eq!(amount, amount & LOWER_6);
|
||||
amount as u8
|
||||
}
|
||||
Mapping::Invert => 1 << 7,
|
||||
Mapping::Invert => 1 << 6,
|
||||
Mapping::ShiftRight(shift_by) => {
|
||||
assert_eq!(shift_by, shift_by & LOWER_6);
|
||||
1 << 7 | (shift_by as u8)
|
||||
}
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user