X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/711df2d9898cf331b49a32e5b1ae9979c9c9cdcb..47db112512e23853b60b6ecea208056818e10907:/src/src/pcre/pcre_tables.c diff --git a/src/src/pcre/pcre_tables.c b/src/src/pcre/pcre_tables.c index 1c2f1cd06..530e44038 100644 --- a/src/src/pcre/pcre_tables.c +++ b/src/src/pcre/pcre_tables.c @@ -1,4 +1,4 @@ -/* $Cambridge: exim/src/src/pcre/pcre_tables.c,v 1.5 2007/06/26 11:16:54 ph10 Exp $ */ +/* $Cambridge: exim/src/src/pcre/pcre_tables.c,v 1.6 2007/11/12 13:02:20 nm4 Exp $ */ /************************************************* * Perl-Compatible Regular Expressions * @@ -46,6 +46,10 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name clashes with the library. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include "pcre_internal.h" @@ -85,115 +89,228 @@ const uschar _pcre_utf8_table4[] = { 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; -/* This table translates Unicode property names into type and code values. It -is searched by binary chop, so must be in collating sequence of name. */ +/* The pcre_utt[] table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and then using offsets in the +table itself. Maintenance is more error-prone, but frequent changes to this +data is unlikely. */ + +const char _pcre_utt_names[] = + "Any\0" + "Arabic\0" + "Armenian\0" + "Balinese\0" + "Bengali\0" + "Bopomofo\0" + "Braille\0" + "Buginese\0" + "Buhid\0" + "C\0" + "Canadian_Aboriginal\0" + "Cc\0" + "Cf\0" + "Cherokee\0" + "Cn\0" + "Co\0" + "Common\0" + "Coptic\0" + "Cs\0" + "Cuneiform\0" + "Cypriot\0" + "Cyrillic\0" + "Deseret\0" + "Devanagari\0" + "Ethiopic\0" + "Georgian\0" + "Glagolitic\0" + "Gothic\0" + "Greek\0" + "Gujarati\0" + "Gurmukhi\0" + "Han\0" + "Hangul\0" + "Hanunoo\0" + "Hebrew\0" + "Hiragana\0" + "Inherited\0" + "Kannada\0" + "Katakana\0" + "Kharoshthi\0" + "Khmer\0" + "L\0" + "L&\0" + "Lao\0" + "Latin\0" + "Limbu\0" + "Linear_B\0" + "Ll\0" + "Lm\0" + "Lo\0" + "Lt\0" + "Lu\0" + "M\0" + "Malayalam\0" + "Mc\0" + "Me\0" + "Mn\0" + "Mongolian\0" + "Myanmar\0" + "N\0" + "Nd\0" + "New_Tai_Lue\0" + "Nko\0" + "Nl\0" + "No\0" + "Ogham\0" + "Old_Italic\0" + "Old_Persian\0" + "Oriya\0" + "Osmanya\0" + "P\0" + "Pc\0" + "Pd\0" + "Pe\0" + "Pf\0" + "Phags_Pa\0" + "Phoenician\0" + "Pi\0" + "Po\0" + "Ps\0" + "Runic\0" + "S\0" + "Sc\0" + "Shavian\0" + "Sinhala\0" + "Sk\0" + "Sm\0" + "So\0" + "Syloti_Nagri\0" + "Syriac\0" + "Tagalog\0" + "Tagbanwa\0" + "Tai_Le\0" + "Tamil\0" + "Telugu\0" + "Thaana\0" + "Thai\0" + "Tibetan\0" + "Tifinagh\0" + "Ugaritic\0" + "Yi\0" + "Z\0" + "Zl\0" + "Zp\0" + "Zs\0"; const ucp_type_table _pcre_utt[] = { - { "Any", PT_ANY, 0 }, - { "Arabic", PT_SC, ucp_Arabic }, - { "Armenian", PT_SC, ucp_Armenian }, - { "Balinese", PT_SC, ucp_Balinese }, - { "Bengali", PT_SC, ucp_Bengali }, - { "Bopomofo", PT_SC, ucp_Bopomofo }, - { "Braille", PT_SC, ucp_Braille }, - { "Buginese", PT_SC, ucp_Buginese }, - { "Buhid", PT_SC, ucp_Buhid }, - { "C", PT_GC, ucp_C }, - { "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal }, - { "Cc", PT_PC, ucp_Cc }, - { "Cf", PT_PC, ucp_Cf }, - { "Cherokee", PT_SC, ucp_Cherokee }, - { "Cn", PT_PC, ucp_Cn }, - { "Co", PT_PC, ucp_Co }, - { "Common", PT_SC, ucp_Common }, - { "Coptic", PT_SC, ucp_Coptic }, - { "Cs", PT_PC, ucp_Cs }, - { "Cuneiform", PT_SC, ucp_Cuneiform }, - { "Cypriot", PT_SC, ucp_Cypriot }, - { "Cyrillic", PT_SC, ucp_Cyrillic }, - { "Deseret", PT_SC, ucp_Deseret }, - { "Devanagari", PT_SC, ucp_Devanagari }, - { "Ethiopic", PT_SC, ucp_Ethiopic }, - { "Georgian", PT_SC, ucp_Georgian }, - { "Glagolitic", PT_SC, ucp_Glagolitic }, - { "Gothic", PT_SC, ucp_Gothic }, - { "Greek", PT_SC, ucp_Greek }, - { "Gujarati", PT_SC, ucp_Gujarati }, - { "Gurmukhi", PT_SC, ucp_Gurmukhi }, - { "Han", PT_SC, ucp_Han }, - { "Hangul", PT_SC, ucp_Hangul }, - { "Hanunoo", PT_SC, ucp_Hanunoo }, - { "Hebrew", PT_SC, ucp_Hebrew }, - { "Hiragana", PT_SC, ucp_Hiragana }, - { "Inherited", PT_SC, ucp_Inherited }, - { "Kannada", PT_SC, ucp_Kannada }, - { "Katakana", PT_SC, ucp_Katakana }, - { "Kharoshthi", PT_SC, ucp_Kharoshthi }, - { "Khmer", PT_SC, ucp_Khmer }, - { "L", PT_GC, ucp_L }, - { "L&", PT_LAMP, 0 }, - { "Lao", PT_SC, ucp_Lao }, - { "Latin", PT_SC, ucp_Latin }, - { "Limbu", PT_SC, ucp_Limbu }, - { "Linear_B", PT_SC, ucp_Linear_B }, - { "Ll", PT_PC, ucp_Ll }, - { "Lm", PT_PC, ucp_Lm }, - { "Lo", PT_PC, ucp_Lo }, - { "Lt", PT_PC, ucp_Lt }, - { "Lu", PT_PC, ucp_Lu }, - { "M", PT_GC, ucp_M }, - { "Malayalam", PT_SC, ucp_Malayalam }, - { "Mc", PT_PC, ucp_Mc }, - { "Me", PT_PC, ucp_Me }, - { "Mn", PT_PC, ucp_Mn }, - { "Mongolian", PT_SC, ucp_Mongolian }, - { "Myanmar", PT_SC, ucp_Myanmar }, - { "N", PT_GC, ucp_N }, - { "Nd", PT_PC, ucp_Nd }, - { "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue }, - { "Nko", PT_SC, ucp_Nko }, - { "Nl", PT_PC, ucp_Nl }, - { "No", PT_PC, ucp_No }, - { "Ogham", PT_SC, ucp_Ogham }, - { "Old_Italic", PT_SC, ucp_Old_Italic }, - { "Old_Persian", PT_SC, ucp_Old_Persian }, - { "Oriya", PT_SC, ucp_Oriya }, - { "Osmanya", PT_SC, ucp_Osmanya }, - { "P", PT_GC, ucp_P }, - { "Pc", PT_PC, ucp_Pc }, - { "Pd", PT_PC, ucp_Pd }, - { "Pe", PT_PC, ucp_Pe }, - { "Pf", PT_PC, ucp_Pf }, - { "Phags_Pa", PT_SC, ucp_Phags_Pa }, - { "Phoenician", PT_SC, ucp_Phoenician }, - { "Pi", PT_PC, ucp_Pi }, - { "Po", PT_PC, ucp_Po }, - { "Ps", PT_PC, ucp_Ps }, - { "Runic", PT_SC, ucp_Runic }, - { "S", PT_GC, ucp_S }, - { "Sc", PT_PC, ucp_Sc }, - { "Shavian", PT_SC, ucp_Shavian }, - { "Sinhala", PT_SC, ucp_Sinhala }, - { "Sk", PT_PC, ucp_Sk }, - { "Sm", PT_PC, ucp_Sm }, - { "So", PT_PC, ucp_So }, - { "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri }, - { "Syriac", PT_SC, ucp_Syriac }, - { "Tagalog", PT_SC, ucp_Tagalog }, - { "Tagbanwa", PT_SC, ucp_Tagbanwa }, - { "Tai_Le", PT_SC, ucp_Tai_Le }, - { "Tamil", PT_SC, ucp_Tamil }, - { "Telugu", PT_SC, ucp_Telugu }, - { "Thaana", PT_SC, ucp_Thaana }, - { "Thai", PT_SC, ucp_Thai }, - { "Tibetan", PT_SC, ucp_Tibetan }, - { "Tifinagh", PT_SC, ucp_Tifinagh }, - { "Ugaritic", PT_SC, ucp_Ugaritic }, - { "Yi", PT_SC, ucp_Yi }, - { "Z", PT_GC, ucp_Z }, - { "Zl", PT_PC, ucp_Zl }, - { "Zp", PT_PC, ucp_Zp }, - { "Zs", PT_PC, ucp_Zs } + { 0, PT_ANY, 0 }, + { 4, PT_SC, ucp_Arabic }, + { 11, PT_SC, ucp_Armenian }, + { 20, PT_SC, ucp_Balinese }, + { 29, PT_SC, ucp_Bengali }, + { 37, PT_SC, ucp_Bopomofo }, + { 46, PT_SC, ucp_Braille }, + { 54, PT_SC, ucp_Buginese }, + { 63, PT_SC, ucp_Buhid }, + { 69, PT_GC, ucp_C }, + { 71, PT_SC, ucp_Canadian_Aboriginal }, + { 91, PT_PC, ucp_Cc }, + { 94, PT_PC, ucp_Cf }, + { 97, PT_SC, ucp_Cherokee }, + { 106, PT_PC, ucp_Cn }, + { 109, PT_PC, ucp_Co }, + { 112, PT_SC, ucp_Common }, + { 119, PT_SC, ucp_Coptic }, + { 126, PT_PC, ucp_Cs }, + { 129, PT_SC, ucp_Cuneiform }, + { 139, PT_SC, ucp_Cypriot }, + { 147, PT_SC, ucp_Cyrillic }, + { 156, PT_SC, ucp_Deseret }, + { 164, PT_SC, ucp_Devanagari }, + { 175, PT_SC, ucp_Ethiopic }, + { 184, PT_SC, ucp_Georgian }, + { 193, PT_SC, ucp_Glagolitic }, + { 204, PT_SC, ucp_Gothic }, + { 211, PT_SC, ucp_Greek }, + { 217, PT_SC, ucp_Gujarati }, + { 226, PT_SC, ucp_Gurmukhi }, + { 235, PT_SC, ucp_Han }, + { 239, PT_SC, ucp_Hangul }, + { 246, PT_SC, ucp_Hanunoo }, + { 254, PT_SC, ucp_Hebrew }, + { 261, PT_SC, ucp_Hiragana }, + { 270, PT_SC, ucp_Inherited }, + { 280, PT_SC, ucp_Kannada }, + { 288, PT_SC, ucp_Katakana }, + { 297, PT_SC, ucp_Kharoshthi }, + { 308, PT_SC, ucp_Khmer }, + { 314, PT_GC, ucp_L }, + { 316, PT_LAMP, 0 }, + { 319, PT_SC, ucp_Lao }, + { 323, PT_SC, ucp_Latin }, + { 329, PT_SC, ucp_Limbu }, + { 335, PT_SC, ucp_Linear_B }, + { 344, PT_PC, ucp_Ll }, + { 347, PT_PC, ucp_Lm }, + { 350, PT_PC, ucp_Lo }, + { 353, PT_PC, ucp_Lt }, + { 356, PT_PC, ucp_Lu }, + { 359, PT_GC, ucp_M }, + { 361, PT_SC, ucp_Malayalam }, + { 371, PT_PC, ucp_Mc }, + { 374, PT_PC, ucp_Me }, + { 377, PT_PC, ucp_Mn }, + { 380, PT_SC, ucp_Mongolian }, + { 390, PT_SC, ucp_Myanmar }, + { 398, PT_GC, ucp_N }, + { 400, PT_PC, ucp_Nd }, + { 403, PT_SC, ucp_New_Tai_Lue }, + { 415, PT_SC, ucp_Nko }, + { 419, PT_PC, ucp_Nl }, + { 422, PT_PC, ucp_No }, + { 425, PT_SC, ucp_Ogham }, + { 431, PT_SC, ucp_Old_Italic }, + { 442, PT_SC, ucp_Old_Persian }, + { 454, PT_SC, ucp_Oriya }, + { 460, PT_SC, ucp_Osmanya }, + { 468, PT_GC, ucp_P }, + { 470, PT_PC, ucp_Pc }, + { 473, PT_PC, ucp_Pd }, + { 476, PT_PC, ucp_Pe }, + { 479, PT_PC, ucp_Pf }, + { 482, PT_SC, ucp_Phags_Pa }, + { 491, PT_SC, ucp_Phoenician }, + { 502, PT_PC, ucp_Pi }, + { 505, PT_PC, ucp_Po }, + { 508, PT_PC, ucp_Ps }, + { 511, PT_SC, ucp_Runic }, + { 517, PT_GC, ucp_S }, + { 519, PT_PC, ucp_Sc }, + { 522, PT_SC, ucp_Shavian }, + { 530, PT_SC, ucp_Sinhala }, + { 538, PT_PC, ucp_Sk }, + { 541, PT_PC, ucp_Sm }, + { 544, PT_PC, ucp_So }, + { 547, PT_SC, ucp_Syloti_Nagri }, + { 560, PT_SC, ucp_Syriac }, + { 567, PT_SC, ucp_Tagalog }, + { 575, PT_SC, ucp_Tagbanwa }, + { 584, PT_SC, ucp_Tai_Le }, + { 591, PT_SC, ucp_Tamil }, + { 597, PT_SC, ucp_Telugu }, + { 604, PT_SC, ucp_Thaana }, + { 611, PT_SC, ucp_Thai }, + { 616, PT_SC, ucp_Tibetan }, + { 624, PT_SC, ucp_Tifinagh }, + { 633, PT_SC, ucp_Ugaritic }, + { 642, PT_SC, ucp_Yi }, + { 645, PT_GC, ucp_Z }, + { 647, PT_PC, ucp_Zl }, + { 650, PT_PC, ucp_Zp }, + { 653, PT_PC, ucp_Zs } }; const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);