syntax: calculate positions of multibyte characters more correctly.
They are still are not completely correct, since it does not handle graphemes at all, just codepoints, but at least it handles the common case correctly. The calculation was previously very wrong (rather than just a little bit wrong): it wasn't accounting for the fact that every character is 1 byte, and so multibyte characters were pretending to be zero width. cc #8706
This commit is contained in:
committed by
Alex Crichton
parent
ff79a4471c
commit
8812e8ad49
@@ -460,11 +460,12 @@ impl CodeMap {
|
||||
for mbc in multibyte_chars.get().iter() {
|
||||
debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos);
|
||||
if mbc.pos < bpos {
|
||||
total_extra_bytes += mbc.bytes;
|
||||
// every character is at least one byte, so we only
|
||||
// count the actual extra bytes.
|
||||
total_extra_bytes += mbc.bytes - 1;
|
||||
// We should never see a byte position in the middle of a
|
||||
// character
|
||||
assert!(bpos == mbc.pos ||
|
||||
bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
|
||||
assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user