Add canonical combining class to std::unicode
This commit is contained in:
@@ -26,11 +26,15 @@ def fetch(f):
|
||||
def load_unicode_data(f):
|
||||
fetch(f)
|
||||
gencats = {}
|
||||
combines = []
|
||||
canon_decomp = {}
|
||||
compat_decomp = {}
|
||||
curr_cat = ""
|
||||
curr_combine = ""
|
||||
c_lo = 0
|
||||
c_hi = 0
|
||||
com_lo = 0
|
||||
com_hi = 0
|
||||
for line in fileinput.input(f):
|
||||
fields = line.split(";")
|
||||
if len(fields) != 15:
|
||||
@@ -69,7 +73,21 @@ def load_unicode_data(f):
|
||||
c_lo = code
|
||||
c_hi = code
|
||||
|
||||
return (canon_decomp, compat_decomp, gencats)
|
||||
if curr_combine == "":
|
||||
curr_combine = combine
|
||||
com_lo = code
|
||||
com_hi = code
|
||||
|
||||
if curr_combine == combine:
|
||||
com_hi = code
|
||||
else:
|
||||
if curr_combine != "0":
|
||||
combines.append((com_lo, com_hi, curr_combine))
|
||||
curr_combine = combine
|
||||
com_lo = code
|
||||
com_hi = code
|
||||
|
||||
return (canon_decomp, compat_decomp, gencats, combines)
|
||||
|
||||
|
||||
def load_derived_core_properties(f):
|
||||
@@ -193,7 +211,7 @@ def format_table_content(f, content, indent):
|
||||
line = " "*indent + chunk
|
||||
f.write(line)
|
||||
|
||||
def emit_decomp_module(f, canon, compat):
|
||||
def emit_decomp_module(f, canon, compat, combine):
|
||||
canon_keys = canon.keys()
|
||||
canon_keys.sort()
|
||||
|
||||
@@ -217,8 +235,26 @@ def emit_decomp_module(f, canon, compat):
|
||||
}
|
||||
None => None
|
||||
}
|
||||
}\n
|
||||
""")
|
||||
|
||||
f.write("""
|
||||
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
|
||||
use cmp::{Equal, Less, Greater};
|
||||
match r.bsearch(|&(lo, hi, _)| {
|
||||
if lo <= c && c <= hi { Equal }
|
||||
else if hi < c { Less }
|
||||
else { Greater }
|
||||
}) {
|
||||
Some(idx) => {
|
||||
let (_, _, result) = r[idx];
|
||||
result
|
||||
}
|
||||
None => 0
|
||||
}
|
||||
}\n\n
|
||||
""")
|
||||
|
||||
f.write(" // Canonical decompositions\n")
|
||||
f.write(" static canonical_table : &'static [(char, &'static [char])] = &[\n")
|
||||
data = ""
|
||||
@@ -237,6 +273,7 @@ def emit_decomp_module(f, canon, compat):
|
||||
data += "])"
|
||||
format_table_content(f, data, 8)
|
||||
f.write("\n ];\n\n")
|
||||
|
||||
f.write(" // Compatibility decompositions\n")
|
||||
f.write(" static compatibility_table : &'static [(char, &'static [char])] = &[\n")
|
||||
data = ""
|
||||
@@ -255,10 +292,22 @@ def emit_decomp_module(f, canon, compat):
|
||||
data += "])"
|
||||
format_table_content(f, data, 8)
|
||||
f.write("\n ];\n\n")
|
||||
|
||||
f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
|
||||
ix = 0
|
||||
for pair in combine:
|
||||
f.write(ch_prefix(ix))
|
||||
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
|
||||
ix += 1
|
||||
f.write("\n ];\n")
|
||||
|
||||
f.write(" pub fn canonical(c: char, i: &fn(char)) "
|
||||
+ "{ d(c, i, false); }\n\n")
|
||||
f.write(" pub fn compatibility(c: char, i: &fn(char)) "
|
||||
+"{ d(c, i, true); }\n\n")
|
||||
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
|
||||
+ " bsearch_range_value_table(c, combining_class_table)\n"
|
||||
+ " }\n\n")
|
||||
f.write(" fn d(c: char, i: &fn(char), k: bool) {\n")
|
||||
f.write(" use iterator::Iterator;\n");
|
||||
|
||||
@@ -302,7 +351,7 @@ for i in [r]:
|
||||
os.remove(i);
|
||||
rf = open(r, "w")
|
||||
|
||||
(canon_decomp, compat_decomp, gencats) = load_unicode_data("UnicodeData.txt")
|
||||
(canon_decomp, compat_decomp, gencats, combines) = load_unicode_data("UnicodeData.txt")
|
||||
|
||||
# Preamble
|
||||
rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
||||
@@ -324,7 +373,7 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH
|
||||
|
||||
emit_property_module(rf, "general_category", gencats)
|
||||
|
||||
emit_decomp_module(rf, canon_decomp, compat_decomp)
|
||||
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
|
||||
|
||||
derived = load_derived_core_properties("DerivedCoreProperties.txt")
|
||||
emit_property_module(rf, "derived_property", derived)
|
||||
|
||||
Reference in New Issue
Block a user