Add a right shift mapping
This saves less bytes - by far - and is likely not the best operator to choose. But for now, it works -- a better choice may arise later. Alphabetic : 2538 bytes (- 84 bytes) Case_Ignorable : 1773 bytes (- 30 bytes) Cased : 790 bytes (- 18 bytes) Cc : 26 bytes (- 6 bytes) Grapheme_Extend: 1490 bytes (- 18 bytes) Lowercase : 865 bytes (- 36 bytes) N : 1040 bytes (- 24 bytes) Uppercase : 778 bytes (- 60 bytes) White_Space : 85 bytes (- 6 bytes) Total table sizes: 9385 bytes (-282 bytes)
This commit is contained in:
@@ -66,12 +66,12 @@ fn range_search<
|
|||||||
} else {
|
} else {
|
||||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||||
let mut word = bitset_canonical[real_idx as usize];
|
let mut word = bitset_canonical[real_idx as usize];
|
||||||
let should_invert = mapping & (1 << 7) != 0;
|
let should_invert = mapping & (1 << 6) != 0;
|
||||||
if should_invert {
|
if should_invert {
|
||||||
word = !word;
|
word = !word;
|
||||||
}
|
}
|
||||||
// Unset the inversion bit
|
// Unset the inversion bit
|
||||||
let rotate_by = mapping & !(1 << 7);
|
let rotate_by = mapping & !(1 << 6);
|
||||||
word = word.rotate_left(rotate_by as u32);
|
word = word.rotate_left(rotate_by as u32);
|
||||||
word
|
word
|
||||||
};
|
};
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -286,13 +286,18 @@ fn range_search<
|
|||||||
} else {
|
} else {
|
||||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||||
let mut word = bitset_canonical[real_idx as usize];
|
let mut word = bitset_canonical[real_idx as usize];
|
||||||
let should_invert = mapping & (1 << 7) != 0;
|
let should_invert = mapping & (1 << 6) != 0;
|
||||||
if should_invert {
|
if should_invert {
|
||||||
word = !word;
|
word = !word;
|
||||||
}
|
}
|
||||||
// Unset the inversion bit
|
// Lower 6 bits
|
||||||
let rotate_by = mapping & !(1 << 7);
|
let quantity = mapping & ((1 << 6) - 1);
|
||||||
word = word.rotate_left(rotate_by as u32);
|
if mapping & (1 << 7) != 0 {
|
||||||
|
// shift
|
||||||
|
word >>= quantity as u64;
|
||||||
|
} else {
|
||||||
|
word = word.rotate_left(quantity as u32);
|
||||||
|
}
|
||||||
word
|
word
|
||||||
};
|
};
|
||||||
(word & (1 << (needle % 64) as u64)) != 0
|
(word & (1 << (needle % 64) as u64)) != 0
|
||||||
|
|||||||
@@ -233,6 +233,7 @@ impl Canonicalized {
|
|||||||
Rotate(u32),
|
Rotate(u32),
|
||||||
Invert,
|
Invert,
|
||||||
RotateAndInvert(u32),
|
RotateAndInvert(u32),
|
||||||
|
ShiftRight(u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
// key is the word being mapped to
|
// key is the word being mapped to
|
||||||
@@ -270,6 +271,18 @@ impl Canonicalized {
|
|||||||
continue 'b;
|
continue 'b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All possible shifts
|
||||||
|
for shift_by in 1..64 {
|
||||||
|
if a == (b >> shift_by) {
|
||||||
|
mappings
|
||||||
|
.entry(b)
|
||||||
|
.or_default()
|
||||||
|
.push((a, Mapping::ShiftRight(shift_by as u32)));
|
||||||
|
// We're not interested in further mappings between a and b
|
||||||
|
continue 'b;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// These are the bitset words which will be represented "raw" (as a u64)
|
// These are the bitset words which will be represented "raw" (as a u64)
|
||||||
@@ -384,6 +397,8 @@ impl Canonicalized {
|
|||||||
assert!(distinct_indices.insert(idx));
|
assert!(distinct_indices.insert(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const LOWER_6: u32 = (1 << 6) - 1;
|
||||||
|
|
||||||
let canonicalized_words = canonicalized_words
|
let canonicalized_words = canonicalized_words
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| {
|
.map(|v| {
|
||||||
@@ -391,14 +406,18 @@ impl Canonicalized {
|
|||||||
u8::try_from(v.0).unwrap(),
|
u8::try_from(v.0).unwrap(),
|
||||||
match v.1 {
|
match v.1 {
|
||||||
Mapping::RotateAndInvert(amount) => {
|
Mapping::RotateAndInvert(amount) => {
|
||||||
assert!(amount < (1 << 7));
|
assert_eq!(amount, amount & LOWER_6);
|
||||||
1 << 7 | (amount as u8)
|
1 << 6 | (amount as u8)
|
||||||
}
|
}
|
||||||
Mapping::Rotate(amount) => {
|
Mapping::Rotate(amount) => {
|
||||||
assert!(amount < (1 << 7));
|
assert_eq!(amount, amount & LOWER_6);
|
||||||
amount as u8
|
amount as u8
|
||||||
}
|
}
|
||||||
Mapping::Invert => 1 << 7,
|
Mapping::Invert => 1 << 6,
|
||||||
|
Mapping::ShiftRight(shift_by) => {
|
||||||
|
assert_eq!(shift_by, shift_by & LOWER_6);
|
||||||
|
1 << 7 | (shift_by as u8)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user