src/src/pcre/ucp.h

   1 /* $Cambridge: exim/src/src/pcre/ucp.h,v 1.5 2007/06/26 11:16:54 ph10 Exp $ */
   2
   3 /*************************************************
   4 *          Unicode Property Table handler        *
   5 *************************************************/
   6
   7 #ifndef _UCP_H
   8 #define _UCP_H
   9
  10 /* This file contains definitions of the property values that are returned by
  11 the function _pcre_ucp_findprop(). New values that are added for new releases
  12 of Unicode should always be at the end of each enum, for backwards
  13 compatibility. */
  14
  15 /* These are the general character categories. */
  16
  17 enum {
  18   ucp_C,     /* Other */
  19   ucp_L,     /* Letter */
  20   ucp_M,     /* Mark */
  21   ucp_N,     /* Number */
  22   ucp_P,     /* Punctuation */
  23   ucp_S,     /* Symbol */
  24   ucp_Z      /* Separator */
  25 };
  26
  27 /* These are the particular character types. */
  28
  29 enum {
  30   ucp_Cc,    /* Control */
  31   ucp_Cf,    /* Format */
  32   ucp_Cn,    /* Unassigned */
  33   ucp_Co,    /* Private use */
  34   ucp_Cs,    /* Surrogate */
  35   ucp_Ll,    /* Lower case letter */
  36   ucp_Lm,    /* Modifier letter */
  37   ucp_Lo,    /* Other letter */
  38   ucp_Lt,    /* Title case letter */
  39   ucp_Lu,    /* Upper case letter */
  40   ucp_Mc,    /* Spacing mark */
  41   ucp_Me,    /* Enclosing mark */
  42   ucp_Mn,    /* Non-spacing mark */
  43   ucp_Nd,    /* Decimal number */
  44   ucp_Nl,    /* Letter number */
  45   ucp_No,    /* Other number */
  46   ucp_Pc,    /* Connector punctuation */
  47   ucp_Pd,    /* Dash punctuation */
  48   ucp_Pe,    /* Close punctuation */
  49   ucp_Pf,    /* Final punctuation */
  50   ucp_Pi,    /* Initial punctuation */
  51   ucp_Po,    /* Other punctuation */
  52   ucp_Ps,    /* Open punctuation */
  53   ucp_Sc,    /* Currency symbol */
  54   ucp_Sk,    /* Modifier symbol */
  55   ucp_Sm,    /* Mathematical symbol */
  56   ucp_So,    /* Other symbol */
  57   ucp_Zl,    /* Line separator */
  58   ucp_Zp,    /* Paragraph separator */
  59   ucp_Zs     /* Space separator */
  60 };
  61
  62 /* These are the script identifications. */
  63
  64 enum {
  65   ucp_Arabic,
  66   ucp_Armenian,
  67   ucp_Bengali,
  68   ucp_Bopomofo,
  69   ucp_Braille,
  70   ucp_Buginese,
  71   ucp_Buhid,
  72   ucp_Canadian_Aboriginal,
  73   ucp_Cherokee,
  74   ucp_Common,
  75   ucp_Coptic,
  76   ucp_Cypriot,
  77   ucp_Cyrillic,
  78   ucp_Deseret,
  79   ucp_Devanagari,
  80   ucp_Ethiopic,
  81   ucp_Georgian,
  82   ucp_Glagolitic,
  83   ucp_Gothic,
  84   ucp_Greek,
  85   ucp_Gujarati,
  86   ucp_Gurmukhi,
  87   ucp_Han,
  88   ucp_Hangul,
  89   ucp_Hanunoo,
  90   ucp_Hebrew,
  91   ucp_Hiragana,
  92   ucp_Inherited,
  93   ucp_Kannada,
  94   ucp_Katakana,
  95   ucp_Kharoshthi,
  96   ucp_Khmer,
  97   ucp_Lao,
  98   ucp_Latin,
  99   ucp_Limbu,
 100   ucp_Linear_B,
 101   ucp_Malayalam,
 102   ucp_Mongolian,
 103   ucp_Myanmar,
 104   ucp_New_Tai_Lue,
 105   ucp_Ogham,
 106   ucp_Old_Italic,
 107   ucp_Old_Persian,
 108   ucp_Oriya,
 109   ucp_Osmanya,
 110   ucp_Runic,
 111   ucp_Shavian,
 112   ucp_Sinhala,
 113   ucp_Syloti_Nagri,
 114   ucp_Syriac,
 115   ucp_Tagalog,
 116   ucp_Tagbanwa,
 117   ucp_Tai_Le,
 118   ucp_Tamil,
 119   ucp_Telugu,
 120   ucp_Thaana,
 121   ucp_Thai,
 122   ucp_Tibetan,
 123   ucp_Tifinagh,
 124   ucp_Ugaritic,
 125   ucp_Yi,
 126   ucp_Balinese,      /* New for Unicode 5.0.0 */
 127   ucp_Cuneiform,     /* New for Unicode 5.0.0 */
 128   ucp_Nko,           /* New for Unicode 5.0.0 */
 129   ucp_Phags_Pa,      /* New for Unicode 5.0.0 */
 130   ucp_Phoenician     /* New for Unicode 5.0.0 */
 131 };
 132
 133 #endif
 134
 135 /* End of ucp.h */