]> git.lizzy.rs Git - rust.git/commitdiff
unicode: Make statics legal
authorAlex Crichton <alex@alexcrichton.com>
Mon, 6 Oct 2014 23:14:38 +0000 (16:14 -0700)
committerAlex Crichton <alex@alexcrichton.com>
Thu, 9 Oct 2014 16:44:51 +0000 (09:44 -0700)
The tables in libunicode are far too large to want to be inlined into any other
program, so these tables are all going to remain `static`. For them to be legal,
they cannot reference one another by value, but instead use references now.

This commit also modifies the src/etc/unicode.py script to generate the right
tables.

src/etc/unicode.py
src/libunicode/normalize.rs
src/libunicode/tables.rs

index a74bb748971356a24de6de0122245d7712b09242..0b128686690f095f1f5c4974372281c8566da561 100755 (executable)
@@ -333,14 +333,14 @@ def emit_property_module(f, mod, tbl, emit_fn):
 def emit_regex_module(f, cats, w_data):
     f.write("pub mod regex {\n")
     regex_class = "&'static [(char, char)]"
-    class_table = "&'static [(&'static str, %s)]" % regex_class
+    class_table = "&'static [(&'static str, &'static %s)]" % regex_class
 
     emit_table(f, "UNICODE_CLASSES", cats, class_table,
-        pfun=lambda x: "(\"%s\",super::%s::%s_table)" % (x[0], x[1], x[0]))
+        pfun=lambda x: "(\"%s\",&super::%s::%s_table)" % (x[0], x[1], x[0]))
 
-    f.write("    pub static PERLD: %s = super::general_category::Nd_table;\n\n"
+    f.write("    pub static PERLD: &'static %s = &super::general_category::Nd_table;\n\n"
             % regex_class)
-    f.write("    pub static PERLS: %s = super::property::White_Space_table;\n\n"
+    f.write("    pub static PERLS: &'static %s = &super::property::White_Space_table;\n\n"
             % regex_class)
 
     emit_table(f, "PERLW", w_data, regex_class)
index a60e95c38272bf4f08c071587cc23a7a99a991d9..76a9476d1fc8dfc452e76e64d43822998431e2d7 100644 (file)
@@ -100,15 +100,15 @@ pub fn compose(a: char, b: char) -> Option<char> {
 }
 
 // Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior
-static S_BASE: u32 = 0xAC00;
-static L_BASE: u32 = 0x1100;
-static V_BASE: u32 = 0x1161;
-static T_BASE: u32 = 0x11A7;
-static L_COUNT: u32 = 19;
-static V_COUNT: u32 = 21;
-static T_COUNT: u32 = 28;
-static N_COUNT: u32 = (V_COUNT * T_COUNT);
-static S_COUNT: u32 = (L_COUNT * N_COUNT);
+const S_BASE: u32 = 0xAC00;
+const L_BASE: u32 = 0x1100;
+const V_BASE: u32 = 0x1161;
+const T_BASE: u32 = 0x11A7;
+const L_COUNT: u32 = 19;
+const V_COUNT: u32 = 21;
+const T_COUNT: u32 = 28;
+const N_COUNT: u32 = (V_COUNT * T_COUNT);
+const S_COUNT: u32 = (L_COUNT * N_COUNT);
 
 // Decompose a precomposed Hangul syllable
 #[inline(always)]
index 135b267262cc670b9c9c686cec62c649a7f0e7f5..e359883295f421925f87588e0d207aacbaaed403 100644 (file)
@@ -3636,108 +3636,109 @@ pub fn White_Space(c: char) -> bool {
 }
 
 pub mod regex {
-    pub static UNICODE_CLASSES: &'static [(&'static str, &'static [(char, char)])] = &[
-        ("Alphabetic", super::derived_property::Alphabetic_table), ("Arabic",
-        super::script::Arabic_table), ("Armenian", super::script::Armenian_table), ("Avestan",
-        super::script::Avestan_table), ("Balinese", super::script::Balinese_table), ("Bamum",
-        super::script::Bamum_table), ("Bassa_Vah", super::script::Bassa_Vah_table), ("Batak",
-        super::script::Batak_table), ("Bengali", super::script::Bengali_table), ("Bopomofo",
-        super::script::Bopomofo_table), ("Brahmi", super::script::Brahmi_table), ("Braille",
-        super::script::Braille_table), ("Buginese", super::script::Buginese_table), ("Buhid",
-        super::script::Buhid_table), ("C", super::general_category::C_table),
-        ("Canadian_Aboriginal", super::script::Canadian_Aboriginal_table), ("Carian",
-        super::script::Carian_table), ("Caucasian_Albanian",
-        super::script::Caucasian_Albanian_table), ("Cc", super::general_category::Cc_table), ("Cf",
-        super::general_category::Cf_table), ("Chakma", super::script::Chakma_table), ("Cham",
-        super::script::Cham_table), ("Cherokee", super::script::Cherokee_table), ("Cn",
-        super::general_category::Cn_table), ("Co", super::general_category::Co_table), ("Common",
-        super::script::Common_table), ("Coptic", super::script::Coptic_table), ("Cuneiform",
-        super::script::Cuneiform_table), ("Cypriot", super::script::Cypriot_table), ("Cyrillic",
-        super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
-        super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret",
-        super::script::Deseret_table), ("Devanagari", super::script::Devanagari_table), ("Duployan",
-        super::script::Duployan_table), ("Egyptian_Hieroglyphs",
-        super::script::Egyptian_Hieroglyphs_table), ("Elbasan", super::script::Elbasan_table),
-        ("Ethiopic", super::script::Ethiopic_table), ("Georgian", super::script::Georgian_table),
-        ("Glagolitic", super::script::Glagolitic_table), ("Gothic", super::script::Gothic_table),
-        ("Grantha", super::script::Grantha_table), ("Greek", super::script::Greek_table),
-        ("Gujarati", super::script::Gujarati_table), ("Gurmukhi", super::script::Gurmukhi_table),
-        ("Han", super::script::Han_table), ("Hangul", super::script::Hangul_table), ("Hanunoo",
-        super::script::Hanunoo_table), ("Hebrew", super::script::Hebrew_table), ("Hiragana",
-        super::script::Hiragana_table), ("Imperial_Aramaic", super::script::Imperial_Aramaic_table),
-        ("Inherited", super::script::Inherited_table), ("Inscriptional_Pahlavi",
-        super::script::Inscriptional_Pahlavi_table), ("Inscriptional_Parthian",
-        super::script::Inscriptional_Parthian_table), ("Javanese", super::script::Javanese_table),
-        ("Join_Control", super::property::Join_Control_table), ("Kaithi",
-        super::script::Kaithi_table), ("Kannada", super::script::Kannada_table), ("Katakana",
-        super::script::Katakana_table), ("Kayah_Li", super::script::Kayah_Li_table), ("Kharoshthi",
-        super::script::Kharoshthi_table), ("Khmer", super::script::Khmer_table), ("Khojki",
-        super::script::Khojki_table), ("Khudawadi", super::script::Khudawadi_table), ("L",
-        super::general_category::L_table), ("LC", super::general_category::LC_table), ("Lao",
-        super::script::Lao_table), ("Latin", super::script::Latin_table), ("Lepcha",
-        super::script::Lepcha_table), ("Limbu", super::script::Limbu_table), ("Linear_A",
-        super::script::Linear_A_table), ("Linear_B", super::script::Linear_B_table), ("Lisu",
-        super::script::Lisu_table), ("Ll", super::general_category::Ll_table), ("Lm",
-        super::general_category::Lm_table), ("Lo", super::general_category::Lo_table), ("Lowercase",
-        super::derived_property::Lowercase_table), ("Lt", super::general_category::Lt_table), ("Lu",
-        super::general_category::Lu_table), ("Lycian", super::script::Lycian_table), ("Lydian",
-        super::script::Lydian_table), ("M", super::general_category::M_table), ("Mahajani",
-        super::script::Mahajani_table), ("Malayalam", super::script::Malayalam_table), ("Mandaic",
-        super::script::Mandaic_table), ("Manichaean", super::script::Manichaean_table), ("Mc",
-        super::general_category::Mc_table), ("Me", super::general_category::Me_table),
-        ("Meetei_Mayek", super::script::Meetei_Mayek_table), ("Mende_Kikakui",
-        super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
-        super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
-        super::script::Meroitic_Hieroglyphs_table), ("Miao", super::script::Miao_table), ("Mn",
-        super::general_category::Mn_table), ("Modi", super::script::Modi_table), ("Mongolian",
-        super::script::Mongolian_table), ("Mro", super::script::Mro_table), ("Myanmar",
-        super::script::Myanmar_table), ("N", super::general_category::N_table), ("Nabataean",
-        super::script::Nabataean_table), ("Nd", super::general_category::Nd_table), ("New_Tai_Lue",
-        super::script::New_Tai_Lue_table), ("Nko", super::script::Nko_table), ("Nl",
-        super::general_category::Nl_table), ("No", super::general_category::No_table),
-        ("Noncharacter_Code_Point", super::property::Noncharacter_Code_Point_table), ("Ogham",
-        super::script::Ogham_table), ("Ol_Chiki", super::script::Ol_Chiki_table), ("Old_Italic",
-        super::script::Old_Italic_table), ("Old_North_Arabian",
-        super::script::Old_North_Arabian_table), ("Old_Permic", super::script::Old_Permic_table),
-        ("Old_Persian", super::script::Old_Persian_table), ("Old_South_Arabian",
-        super::script::Old_South_Arabian_table), ("Old_Turkic", super::script::Old_Turkic_table),
-        ("Oriya", super::script::Oriya_table), ("Osmanya", super::script::Osmanya_table), ("P",
-        super::general_category::P_table), ("Pahawh_Hmong", super::script::Pahawh_Hmong_table),
-        ("Palmyrene", super::script::Palmyrene_table), ("Pau_Cin_Hau",
-        super::script::Pau_Cin_Hau_table), ("Pc", super::general_category::Pc_table), ("Pd",
-        super::general_category::Pd_table), ("Pe", super::general_category::Pe_table), ("Pf",
-        super::general_category::Pf_table), ("Phags_Pa", super::script::Phags_Pa_table),
-        ("Phoenician", super::script::Phoenician_table), ("Pi", super::general_category::Pi_table),
-        ("Po", super::general_category::Po_table), ("Ps", super::general_category::Ps_table),
-        ("Psalter_Pahlavi", super::script::Psalter_Pahlavi_table), ("Rejang",
-        super::script::Rejang_table), ("Runic", super::script::Runic_table), ("S",
-        super::general_category::S_table), ("Samaritan", super::script::Samaritan_table),
-        ("Saurashtra", super::script::Saurashtra_table), ("Sc", super::general_category::Sc_table),
-        ("Sharada", super::script::Sharada_table), ("Shavian", super::script::Shavian_table),
-        ("Siddham", super::script::Siddham_table), ("Sinhala", super::script::Sinhala_table), ("Sk",
-        super::general_category::Sk_table), ("Sm", super::general_category::Sm_table), ("So",
-        super::general_category::So_table), ("Sora_Sompeng", super::script::Sora_Sompeng_table),
-        ("Sundanese", super::script::Sundanese_table), ("Syloti_Nagri",
-        super::script::Syloti_Nagri_table), ("Syriac", super::script::Syriac_table), ("Tagalog",
-        super::script::Tagalog_table), ("Tagbanwa", super::script::Tagbanwa_table), ("Tai_Le",
-        super::script::Tai_Le_table), ("Tai_Tham", super::script::Tai_Tham_table), ("Tai_Viet",
-        super::script::Tai_Viet_table), ("Takri", super::script::Takri_table), ("Tamil",
-        super::script::Tamil_table), ("Telugu", super::script::Telugu_table), ("Thaana",
-        super::script::Thaana_table), ("Thai", super::script::Thai_table), ("Tibetan",
-        super::script::Tibetan_table), ("Tifinagh", super::script::Tifinagh_table), ("Tirhuta",
-        super::script::Tirhuta_table), ("Ugaritic", super::script::Ugaritic_table), ("Uppercase",
-        super::derived_property::Uppercase_table), ("Vai", super::script::Vai_table),
-        ("Warang_Citi", super::script::Warang_Citi_table), ("White_Space",
-        super::property::White_Space_table), ("XID_Continue",
-        super::derived_property::XID_Continue_table), ("XID_Start",
-        super::derived_property::XID_Start_table), ("Yi", super::script::Yi_table), ("Z",
-        super::general_category::Z_table), ("Zl", super::general_category::Zl_table), ("Zp",
-        super::general_category::Zp_table), ("Zs", super::general_category::Zs_table)
-    ];
-
-    pub static PERLD: &'static [(char, char)] = super::general_category::Nd_table;
-
-    pub static PERLS: &'static [(char, char)] = super::property::White_Space_table;
+    pub static UNICODE_CLASSES: &'static [(&'static str, &'static &'static [(char, char)])] = &[
+        ("Alphabetic", &super::derived_property::Alphabetic_table), ("Arabic",
+        &super::script::Arabic_table), ("Armenian", &super::script::Armenian_table), ("Avestan",
+        &super::script::Avestan_table), ("Balinese", &super::script::Balinese_table), ("Bamum",
+        &super::script::Bamum_table), ("Bassa_Vah", &super::script::Bassa_Vah_table), ("Batak",
+        &super::script::Batak_table), ("Bengali", &super::script::Bengali_table), ("Bopomofo",
+        &super::script::Bopomofo_table), ("Brahmi", &super::script::Brahmi_table), ("Braille",
+        &super::script::Braille_table), ("Buginese", &super::script::Buginese_table), ("Buhid",
+        &super::script::Buhid_table), ("C", &super::general_category::C_table),
+        ("Canadian_Aboriginal", &super::script::Canadian_Aboriginal_table), ("Carian",
+        &super::script::Carian_table), ("Caucasian_Albanian",
+        &super::script::Caucasian_Albanian_table), ("Cc", &super::general_category::Cc_table),
+        ("Cf", &super::general_category::Cf_table), ("Chakma", &super::script::Chakma_table),
+        ("Cham", &super::script::Cham_table), ("Cherokee", &super::script::Cherokee_table), ("Cn",
+        &super::general_category::Cn_table), ("Co", &super::general_category::Co_table), ("Common",
+        &super::script::Common_table), ("Coptic", &super::script::Coptic_table), ("Cuneiform",
+        &super::script::Cuneiform_table), ("Cypriot", &super::script::Cypriot_table), ("Cyrillic",
+        &super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
+        &super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret",
+        &super::script::Deseret_table), ("Devanagari", &super::script::Devanagari_table),
+        ("Duployan", &super::script::Duployan_table), ("Egyptian_Hieroglyphs",
+        &super::script::Egyptian_Hieroglyphs_table), ("Elbasan", &super::script::Elbasan_table),
+        ("Ethiopic", &super::script::Ethiopic_table), ("Georgian", &super::script::Georgian_table),
+        ("Glagolitic", &super::script::Glagolitic_table), ("Gothic", &super::script::Gothic_table),
+        ("Grantha", &super::script::Grantha_table), ("Greek", &super::script::Greek_table),
+        ("Gujarati", &super::script::Gujarati_table), ("Gurmukhi", &super::script::Gurmukhi_table),
+        ("Han", &super::script::Han_table), ("Hangul", &super::script::Hangul_table), ("Hanunoo",
+        &super::script::Hanunoo_table), ("Hebrew", &super::script::Hebrew_table), ("Hiragana",
+        &super::script::Hiragana_table), ("Imperial_Aramaic",
+        &super::script::Imperial_Aramaic_table), ("Inherited", &super::script::Inherited_table),
+        ("Inscriptional_Pahlavi", &super::script::Inscriptional_Pahlavi_table),
+        ("Inscriptional_Parthian", &super::script::Inscriptional_Parthian_table), ("Javanese",
+        &super::script::Javanese_table), ("Join_Control", &super::property::Join_Control_table),
+        ("Kaithi", &super::script::Kaithi_table), ("Kannada", &super::script::Kannada_table),
+        ("Katakana", &super::script::Katakana_table), ("Kayah_Li", &super::script::Kayah_Li_table),
+        ("Kharoshthi", &super::script::Kharoshthi_table), ("Khmer", &super::script::Khmer_table),
+        ("Khojki", &super::script::Khojki_table), ("Khudawadi", &super::script::Khudawadi_table),
+        ("L", &super::general_category::L_table), ("LC", &super::general_category::LC_table),
+        ("Lao", &super::script::Lao_table), ("Latin", &super::script::Latin_table), ("Lepcha",
+        &super::script::Lepcha_table), ("Limbu", &super::script::Limbu_table), ("Linear_A",
+        &super::script::Linear_A_table), ("Linear_B", &super::script::Linear_B_table), ("Lisu",
+        &super::script::Lisu_table), ("Ll", &super::general_category::Ll_table), ("Lm",
+        &super::general_category::Lm_table), ("Lo", &super::general_category::Lo_table),
+        ("Lowercase", &super::derived_property::Lowercase_table), ("Lt",
+        &super::general_category::Lt_table), ("Lu", &super::general_category::Lu_table), ("Lycian",
+        &super::script::Lycian_table), ("Lydian", &super::script::Lydian_table), ("M",
+        &super::general_category::M_table), ("Mahajani", &super::script::Mahajani_table),
+        ("Malayalam", &super::script::Malayalam_table), ("Mandaic", &super::script::Mandaic_table),
+        ("Manichaean", &super::script::Manichaean_table), ("Mc",
+        &super::general_category::Mc_table), ("Me", &super::general_category::Me_table),
+        ("Meetei_Mayek", &super::script::Meetei_Mayek_table), ("Mende_Kikakui",
+        &super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
+        &super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
+        &super::script::Meroitic_Hieroglyphs_table), ("Miao", &super::script::Miao_table), ("Mn",
+        &super::general_category::Mn_table), ("Modi", &super::script::Modi_table), ("Mongolian",
+        &super::script::Mongolian_table), ("Mro", &super::script::Mro_table), ("Myanmar",
+        &super::script::Myanmar_table), ("N", &super::general_category::N_table), ("Nabataean",
+        &super::script::Nabataean_table), ("Nd", &super::general_category::Nd_table),
+        ("New_Tai_Lue", &super::script::New_Tai_Lue_table), ("Nko", &super::script::Nko_table),
+        ("Nl", &super::general_category::Nl_table), ("No", &super::general_category::No_table),
+        ("Noncharacter_Code_Point", &super::property::Noncharacter_Code_Point_table), ("Ogham",
+        &super::script::Ogham_table), ("Ol_Chiki", &super::script::Ol_Chiki_table), ("Old_Italic",
+        &super::script::Old_Italic_table), ("Old_North_Arabian",
+        &super::script::Old_North_Arabian_table), ("Old_Permic", &super::script::Old_Permic_table),
+        ("Old_Persian", &super::script::Old_Persian_table), ("Old_South_Arabian",
+        &super::script::Old_South_Arabian_table), ("Old_Turkic", &super::script::Old_Turkic_table),
+        ("Oriya", &super::script::Oriya_table), ("Osmanya", &super::script::Osmanya_table), ("P",
+        &super::general_category::P_table), ("Pahawh_Hmong", &super::script::Pahawh_Hmong_table),
+        ("Palmyrene", &super::script::Palmyrene_table), ("Pau_Cin_Hau",
+        &super::script::Pau_Cin_Hau_table), ("Pc", &super::general_category::Pc_table), ("Pd",
+        &super::general_category::Pd_table), ("Pe", &super::general_category::Pe_table), ("Pf",
+        &super::general_category::Pf_table), ("Phags_Pa", &super::script::Phags_Pa_table),
+        ("Phoenician", &super::script::Phoenician_table), ("Pi",
+        &super::general_category::Pi_table), ("Po", &super::general_category::Po_table), ("Ps",
+        &super::general_category::Ps_table), ("Psalter_Pahlavi",
+        &super::script::Psalter_Pahlavi_table), ("Rejang", &super::script::Rejang_table), ("Runic",
+        &super::script::Runic_table), ("S", &super::general_category::S_table), ("Samaritan",
+        &super::script::Samaritan_table), ("Saurashtra", &super::script::Saurashtra_table), ("Sc",
+        &super::general_category::Sc_table), ("Sharada", &super::script::Sharada_table), ("Shavian",
+        &super::script::Shavian_table), ("Siddham", &super::script::Siddham_table), ("Sinhala",
+        &super::script::Sinhala_table), ("Sk", &super::general_category::Sk_table), ("Sm",
+        &super::general_category::Sm_table), ("So", &super::general_category::So_table),
+        ("Sora_Sompeng", &super::script::Sora_Sompeng_table), ("Sundanese",
+        &super::script::Sundanese_table), ("Syloti_Nagri", &super::script::Syloti_Nagri_table),
+        ("Syriac", &super::script::Syriac_table), ("Tagalog", &super::script::Tagalog_table),
+        ("Tagbanwa", &super::script::Tagbanwa_table), ("Tai_Le", &super::script::Tai_Le_table),
+        ("Tai_Tham", &super::script::Tai_Tham_table), ("Tai_Viet", &super::script::Tai_Viet_table),
+        ("Takri", &super::script::Takri_table), ("Tamil", &super::script::Tamil_table), ("Telugu",
+        &super::script::Telugu_table), ("Thaana", &super::script::Thaana_table), ("Thai",
+        &super::script::Thai_table), ("Tibetan", &super::script::Tibetan_table), ("Tifinagh",
+        &super::script::Tifinagh_table), ("Tirhuta", &super::script::Tirhuta_table), ("Ugaritic",
+        &super::script::Ugaritic_table), ("Uppercase", &super::derived_property::Uppercase_table),
+        ("Vai", &super::script::Vai_table), ("Warang_Citi", &super::script::Warang_Citi_table),
+        ("White_Space", &super::property::White_Space_table), ("XID_Continue",
+        &super::derived_property::XID_Continue_table), ("XID_Start",
+        &super::derived_property::XID_Start_table), ("Yi", &super::script::Yi_table), ("Z",
+        &super::general_category::Z_table), ("Zl", &super::general_category::Zl_table), ("Zp",
+        &super::general_category::Zp_table), ("Zs", &super::general_category::Zs_table)
+    ];
+
+    pub static PERLD: &'static &'static [(char, char)] = &super::general_category::Nd_table;
+
+    pub static PERLS: &'static &'static [(char, char)] = &super::property::White_Space_table;
 
     pub static PERLW: &'static [(char, char)] = &[
         ('\x30', '\x39'), ('\x41', '\x5a'), ('\x5f', '\x5f'), ('\x61', '\x7a'), ('\xaa', '\xaa'),