character.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Representation of characters in source encoding.
00030 
00031   Defines helper class character containing constants and manipulators.
00032 */
00033 
00034 #include <lestes/common.hh>
00035 #include <lestes/std/character.hh>
00036 
00037 package(lestes);
00038 package(std);
00039 
00040 // TODO remove doxyments
00041 /*!
00042   ASCII bell character.
00043 */
00044 const ucn character::ascii_bell;
00045 /*!
00046   ASCII backspace character.
00047 */
00048 const ucn character::ascii_backspace;
00049 /*!
00050   ASCII tabulator character.
00051 */
00052 const ucn character::ascii_tab;
00053 /*!
00054   ASCII vertical tabulator character.
00055 */
00056 const ucn character::ascii_vtab;
00057 /*!
00058   ASCII form feed character.
00059 */
00060 const ucn character::ascii_form_feed;
00061 /*!
00062   ASCII new line character.
00063 */
00064 const ucn character::ascii_new_line;
00065 /*!
00066   ASCII carriage return character.
00067 */
00068 const ucn character::ascii_carriage_return;
00069 /*!
00070   ASCII space character.
00071 */
00072 const ucn character::ascii_space;
00073 /*!
00074   ASCII exclamation mark character.
00075 */
00076 const ucn character::ascii_emark;
00077 /*!
00078   ASCII double quote character.
00079 */
00080 const ucn character::ascii_dquote;
00081 /*!
00082   ASCII hash character.
00083 */
00084 const ucn character::ascii_hash;
00085 /*!
00086   ASCII dollar character.
00087 */
00088 const ucn character::ascii_dollar;
00089 /*!
00090   ASCII percent character.
00091 */
00092 const ucn character::ascii_percent;
00093 /*!
00094   ASCII ampersand character.
00095 */
00096 const ucn character::ascii_amp;
00097 /*!
00098   ASCII quote character.
00099 */
00100 const ucn character::ascii_quote;
00101 /*!
00102   ASCII left parenthesis character.
00103 */
00104 const ucn character::ascii_left_par;
00105 /*!
00106   ASCII right parenthesis character.
00107 */
00108 const ucn character::ascii_right_par;
00109 /*!
00110   ASCII star character.
00111 */
00112 const ucn character::ascii_star;
00113 /*!
00114   ASCII plus character.
00115 */
00116 const ucn character::ascii_plus;
00117 /*!
00118   ASCII comma character.
00119 */
00120 const ucn character::ascii_comma;
00121 /*!
00122   ASCII minus character.
00123 */
00124 const ucn character::ascii_minus;
00125 /*!
00126   ASCII dot character.
00127 */
00128 const ucn character::ascii_dot;
00129 /*!
00130   ASCII slash character.
00131 */
00132 const ucn character::ascii_slash;
00133 /*!
00134   ASCII digit 0 character.
00135 */
00136 const ucn character::ascii_digit_0;
00137 /*!
00138   ASCII digit 1 character.
00139 */
00140 const ucn character::ascii_digit_1;
00141 /*!
00142   ASCII digit 2 character.
00143 */
00144 const ucn character::ascii_digit_2;
00145 /*!
00146   ASCII digit 3 character.
00147 */
00148 const ucn character::ascii_digit_3;
00149 /*!
00150   ASCII digit 4 character.
00151 */
00152 const ucn character::ascii_digit_4;
00153 /*!
00154   ASCII digit 5 character.
00155 */
00156 const ucn character::ascii_digit_5;
00157 /*!
00158   ASCII digit 6 character.
00159 */
00160 const ucn character::ascii_digit_6;
00161 /*!
00162   ASCII digit 7 character.
00163 */
00164 const ucn character::ascii_digit_7;
00165 /*!
00166   ASCII digit 8 character.
00167 */
00168 const ucn character::ascii_digit_8;
00169 /*!
00170   ASCII digit 9 character.
00171 */
00172 const ucn character::ascii_digit_9;
00173 /*!
00174   ASCII colon character.
00175 */
00176 const ucn character::ascii_colon;
00177 /*!
00178   ASCII semicolon character.
00179 */
00180 const ucn character::ascii_semicolon;
00181 /*!
00182   ASCII less than character.
00183 */
00184 const ucn character::ascii_lt;
00185 /*!
00186   ASCII equals character.
00187 */
00188 const ucn character::ascii_eq;
00189 /*!
00190   ASCII greater than character.
00191 */
00192 const ucn character::ascii_gt;
00193 /*!
00194   ASCII question mark character.
00195 */
00196 const ucn character::ascii_qmark;
00197 /*!
00198   ASCII commercial at character (zavinac).
00199 */
00200 const ucn character::ascii_at;
00201 /*!
00202   ASCII upper a character.
00203 */
00204 const ucn character::ascii_upper_a;
00205 /*!
00206   ASCII upper b character.
00207 */
00208 const ucn character::ascii_upper_b;
00209 /*!
00210   ASCII upper c character.
00211 */
00212 const ucn character::ascii_upper_c;
00213 /*!
00214   ASCII upper d character.
00215 */
00216 const ucn character::ascii_upper_d;
00217 /*!
00218   ASCII upper e character.
00219 */
00220 const ucn character::ascii_upper_e;
00221 /*!
00222   ASCII upper f character.
00223 */
00224 const ucn character::ascii_upper_f;
00225 /*!
00226   ASCII upper g character.
00227 */
00228 const ucn character::ascii_upper_g;
00229 /*!
00230   ASCII upper h character.
00231 */
00232 const ucn character::ascii_upper_h;
00233 /*!
00234   ASCII upper i character.
00235 */
00236 const ucn character::ascii_upper_i;
00237 /*!
00238   ASCII upper j character.
00239 */
00240 const ucn character::ascii_upper_j;
00241 /*!
00242   ASCII upper k character.
00243 */
00244 const ucn character::ascii_upper_k;
00245 /*!
00246   ASCII upper l character.
00247 */
00248 const ucn character::ascii_upper_l;
00249 /*!
00250   ASCII upper m character.
00251 */
00252 const ucn character::ascii_upper_m;
00253 /*!
00254   ASCII upper n character.
00255 */
00256 const ucn character::ascii_upper_n;
00257 /*!
00258   ASCII upper o character.
00259 */
00260 const ucn character::ascii_upper_o;
00261 /*!
00262   ASCII upper p character.
00263 */
00264 const ucn character::ascii_upper_p;
00265 /*!
00266   ASCII upper q character.
00267 */
00268 const ucn character::ascii_upper_q;
00269 /*!
00270   ASCII upper r character.
00271 */
00272 const ucn character::ascii_upper_r;
00273 /*!
00274   ASCII upper s character.
00275 */
00276 const ucn character::ascii_upper_s;
00277 /*!
00278   ASCII upper t character.
00279 */
00280 const ucn character::ascii_upper_t;
00281 /*!
00282   ASCII upper u character.
00283 */
00284 const ucn character::ascii_upper_u;
00285 /*!
00286   ASCII upper v character.
00287 */
00288 const ucn character::ascii_upper_v;
00289 /*!
00290   ASCII upper w character.
00291 */
00292 const ucn character::ascii_upper_w;
00293 /*!
00294   ASCII upper x character.
00295 */
00296 const ucn character::ascii_upper_x;
00297 /*!
00298   ASCII upper y character.
00299 */
00300 const ucn character::ascii_upper_y;
00301 /*!
00302   ASCII upper z character.
00303 */
00304 const ucn character::ascii_upper_z;
00305 /*!
00306   ASCII left bracket character.
00307 */
00308 const ucn character::ascii_left_bracket;
00309 /*!
00310   ASCII backslash character.
00311 */
00312 const ucn character::ascii_backslash;
00313 /*!
00314   ASCII right bracket character.
00315 */
00316 const ucn character::ascii_right_bracket;
00317 /*!
00318   ASCII hat character.
00319 */
00320 const ucn character::ascii_hat;
00321 /*!
00322   ASCII underscore character.
00323 */
00324 const ucn character::ascii_underscore;
00325 /*!
00326   ASCII back quote character.
00327 */
00328 const ucn character::ascii_bquote;
00329 /*!
00330   ASCII lower a character.
00331 */
00332 const ucn character::ascii_lower_a;
00333 /*!
00334   ASCII lower b character.
00335 */
00336 const ucn character::ascii_lower_b;
00337 /*!
00338   ASCII lower c character.
00339 */
00340 const ucn character::ascii_lower_c;
00341 /*!
00342   ASCII lower d character.
00343 */
00344 const ucn character::ascii_lower_d;
00345 /*!
00346   ASCII lower e character.
00347 */
00348 const ucn character::ascii_lower_e;
00349 /*!
00350   ASCII lower f character.
00351 */
00352 const ucn character::ascii_lower_f;
00353 /*!
00354   ASCII lower g character.
00355 */
00356 const ucn character::ascii_lower_g;
00357 /*!
00358   ASCII lower h character.
00359 */
00360 const ucn character::ascii_lower_h;
00361 /*!
00362   ASCII lower i character.
00363 */
00364 const ucn character::ascii_lower_i;
00365 /*!
00366   ASCII lower j character.
00367 */
00368 const ucn character::ascii_lower_j;
00369 /*!
00370   ASCII lower k character.
00371 */
00372 const ucn character::ascii_lower_k;
00373 /*!
00374   ASCII lower l character.
00375 */
00376 const ucn character::ascii_lower_l;
00377 /*!
00378   ASCII lower m character.
00379 */
00380 const ucn character::ascii_lower_m;
00381 /*!
00382   ASCII lower n character.
00383 */
00384 const ucn character::ascii_lower_n;
00385 /*!
00386   ASCII lower o character.
00387 */
00388 const ucn character::ascii_lower_o;
00389 /*!
00390   ASCII lower p character.
00391 */
00392 const ucn character::ascii_lower_p;
00393 /*!
00394   ASCII lower q character.
00395 */
00396 const ucn character::ascii_lower_q;
00397 /*!
00398   ASCII lower r character.
00399 */
00400 const ucn character::ascii_lower_r;
00401 /*!
00402   ASCII lower s character.
00403 */
00404 const ucn character::ascii_lower_s;
00405 /*!
00406   ASCII lower t character.
00407 */
00408 const ucn character::ascii_lower_t;
00409 /*!
00410   ASCII lower u character.
00411 */
00412 const ucn character::ascii_lower_u;
00413 /*!
00414   ASCII lower v character.
00415 */
00416 const ucn character::ascii_lower_v;
00417 /*!
00418   ASCII lower w character.
00419 */
00420 const ucn character::ascii_lower_w;
00421 /*!
00422   ASCII lower x character.
00423 */
00424 const ucn character::ascii_lower_x;
00425 /*!
00426   ASCII lower y character.
00427 */
00428 const ucn character::ascii_lower_y;
00429 /*!
00430   ASCII lower z character.
00431 */
00432 const ucn character::ascii_lower_z;
00433 /*!
00434   ASCII left brace character.
00435 */
00436 const ucn character::ascii_left_brace;
00437 /*!
00438   ASCII vertical bar character.
00439 */
00440 const ucn character::ascii_vbar;
00441 /*!
00442   ASCII right brace character.
00443 */
00444 const ucn character::ascii_right_brace;
00445 /*!
00446   ASCII tilde character.
00447 */
00448 const ucn character::ascii_tilde;
00449 
00450 /*!
00451   Flags of ASCII part of internal characters, used for predicates.
00452 */
00453 character::ascii_flags_type character::ascii_flags[character::ascii_length] = {
00454         /*   0 `\x00' */  FLG_NONE,
00455         /*   1 `\x01' */  FLG_NONE,
00456         /*   2 `\x02' */  FLG_NONE,
00457         /*   3 `\x03' */  FLG_NONE,
00458         /*   4 `\x04' */  FLG_NONE,
00459         /*   5 `\x05' */  FLG_NONE,
00460         /*   6 `\x06' */  FLG_NONE,
00461         /*   7 `\a'   */  FLG_BASIC,
00462         /*   8 `\b'   */  FLG_BASIC,
00463         /*   9 `\t'   */  FLG_BASIC | FLG_SPACE,
00464         /*  10 `\n'   */  FLG_BASIC | FLG_SPACE,
00465         /*  11 `\v'   */  FLG_BASIC | FLG_SPACE,
00466         /*  12 `\f'   */  FLG_BASIC | FLG_SPACE,
00467         /*  13 `\r'   */  FLG_BASIC | FLG_SPACE,
00468         /*  14 `\x0e' */  FLG_NONE,
00469         /*  15 `\x0f' */  FLG_NONE,
00470         /*  16 `\x10' */  FLG_NONE,
00471         /*  17 `\x11' */  FLG_NONE,
00472         /*  18 `\x12' */  FLG_NONE,
00473         /*  19 `\x13' */  FLG_NONE,
00474         /*  20 `\x14' */  FLG_NONE,
00475         /*  21 `\x15' */  FLG_NONE,
00476         /*  22 `\x16' */  FLG_NONE,
00477         /*  23 `\x17' */  FLG_NONE,
00478         /*  24 `\x18' */  FLG_NONE,
00479         /*  25 `\x19' */  FLG_NONE,
00480         /*  26 `\x1a' */  FLG_NONE,
00481         /*  27 `\x1b' */  FLG_NONE,
00482         /*  28 `\x1c' */  FLG_NONE,
00483         /*  29 `\x1d' */  FLG_NONE,
00484         /*  30 `\x1e' */  FLG_NONE,
00485         /*  31 `\x1f' */  FLG_NONE,
00486         /*  32 ` '    */  FLG_BASIC | FLG_SPACE,
00487         /*  33 `!'    */  FLG_BASIC,
00488         /*  34 `"'    */  FLG_BASIC,
00489         /*  35 `#'    */  FLG_BASIC,
00490         /*  36 `$'    */  FLG_NONE,
00491         /*  37 `%'    */  FLG_BASIC,
00492         /*  38 `&'    */  FLG_BASIC,
00493         /*  39 `''    */  FLG_BASIC,
00494         /*  40 `('    */  FLG_BASIC,
00495         /*  41 `)'    */  FLG_BASIC,
00496         /*  42 `*'    */  FLG_BASIC,
00497         /*  43 `+'    */  FLG_BASIC,
00498         /*  44 `,'    */  FLG_BASIC,
00499         /*  45 `-'    */  FLG_BASIC,
00500         /*  46 `.'    */  FLG_BASIC,
00501         /*  47 `/'    */  FLG_BASIC,
00502         /*  48 `0'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00503         /*  49 `1'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00504         /*  50 `2'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00505         /*  51 `3'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00506         /*  52 `4'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00507         /*  53 `5'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00508         /*  54 `6'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00509         /*  55 `7'    */  FLG_BASIC | FLG_DIGIT | FLG_ODIGIT | FLG_XDIGIT,
00510         /*  56 `8'    */  FLG_BASIC | FLG_DIGIT | FLG_XDIGIT,
00511         /*  57 `9'    */  FLG_BASIC | FLG_DIGIT | FLG_XDIGIT,
00512         /*  58 `:'    */  FLG_BASIC,
00513         /*  59 `;'    */  FLG_BASIC,
00514         /*  60 `<'    */  FLG_BASIC,
00515         /*  61 `='    */  FLG_BASIC,
00516         /*  62 `>'    */  FLG_BASIC,
00517         /*  63 `?'    */  FLG_BASIC,
00518         /*  64 `@'    */  FLG_NONE,
00519         /*  65 `A'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00520         /*  66 `B'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00521         /*  67 `C'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00522         /*  68 `D'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00523         /*  69 `E'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00524         /*  70 `F'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA | FLG_XDIGIT,
00525         /*  71 `G'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00526         /*  72 `H'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00527         /*  73 `I'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00528         /*  74 `J'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00529         /*  75 `K'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00530         /*  76 `L'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00531         /*  77 `M'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00532         /*  78 `N'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00533         /*  79 `O'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00534         /*  80 `P'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00535         /*  81 `Q'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00536         /*  82 `R'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00537         /*  83 `S'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00538         /*  84 `T'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00539         /*  85 `U'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00540         /*  86 `V'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00541         /*  87 `W'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00542         /*  88 `X'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00543         /*  89 `Y'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00544         /*  90 `Z'    */  FLG_BASIC | FLG_UPPER | FLG_ALPHA,
00545         /*  91 `['    */  FLG_BASIC,
00546         /*  92 `\\'   */  FLG_BASIC,
00547         /*  93 `]'    */  FLG_BASIC,
00548         /*  94 `^'    */  FLG_BASIC,
00549         /*  95 `_'    */  FLG_BASIC,
00550         /*  96 ``'    */  FLG_NONE,
00551         /*  97 `a'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00552         /*  98 `b'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00553         /*  99 `c'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00554         /* 100 `d'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00555         /* 101 `e'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00556         /* 102 `f'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA | FLG_XDIGIT,
00557         /* 103 `g'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00558         /* 104 `h'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00559         /* 105 `i'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00560         /* 106 `j'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00561         /* 107 `k'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00562         /* 108 `l'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00563         /* 109 `m'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00564         /* 110 `n'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00565         /* 111 `o'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00566         /* 112 `p'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00567         /* 113 `q'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00568         /* 114 `r'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00569         /* 115 `s'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00570         /* 116 `t'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00571         /* 117 `u'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00572         /* 118 `v'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00573         /* 119 `w'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00574         /* 120 `x'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00575         /* 121 `y'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00576         /* 122 `z'    */  FLG_BASIC | FLG_LOWER | FLG_ALPHA,
00577         /* 123 `{'    */  FLG_BASIC,
00578         /* 124 `|'    */  FLG_BASIC,
00579         /* 125 `}'    */  FLG_BASIC,
00580         /* 126 `~'    */  FLG_BASIC,
00581         /* 127 `\x7f' */  FLG_NONE
00582 };
00583 
00584 /*!
00585         List of unicode character ranges for C++ identifiers.
00586         Does not include basic source character ranges.
00587         Shall be sorted in increasing order (for binary search).
00588 */
00589 character::range_type character::identifier_ranges[] = {
00590         { 0x00C0, 0x00D6 },
00591         { 0x00D8, 0x00F6 },
00592         { 0x00F8, 0x01F5 },
00593         { 0x01FA, 0x0217 },
00594         { 0x0250, 0x02A8 },
00595         { 0x0384, 0x0384 },
00596         { 0x0388, 0x038A },
00597         { 0x038C, 0x038C },
00598         { 0x038E, 0x03A1 },
00599         { 0x03A3, 0x03CE },
00600         { 0x03D0, 0x03D6 },
00601         { 0x03DA, 0x03DA },
00602         { 0x03DC, 0x03DC },
00603         { 0x03DE, 0x03DE },
00604         { 0x03E0, 0x03E0 },
00605         { 0x03E2, 0x03F3 },
00606         { 0x0401, 0x040C },
00607         { 0x040D, 0x040D },
00608         { 0x040F, 0x044F },
00609         { 0x0451, 0x045C },
00610         { 0x045E, 0x0481 },
00611         { 0x0490, 0x04C4 },
00612         { 0x04C7, 0x04C8 },
00613         { 0x04CB, 0x04CC },
00614         { 0x04D0, 0x04EB },
00615         { 0x04EE, 0x04F5 },
00616         { 0x04F8, 0x04F9 },
00617         { 0x0531, 0x0556 },
00618         { 0x0561, 0x0587 },
00619         { 0x05D0, 0x05EA },
00620         { 0x05F0, 0x05F2 },
00621         { 0x05F3, 0x05F4 },
00622         { 0x0621, 0x063A },
00623         { 0x0640, 0x0652 },
00624         { 0x0670, 0x06B7 },
00625         { 0x06BA, 0x06BE },
00626         { 0x06C0, 0x06CE },
00627         { 0x06E5, 0x06E7 },
00628         { 0x0905, 0x0939 },
00629         { 0x0958, 0x0962 },
00630         { 0x0985, 0x098C },
00631         { 0x098F, 0x0990 },
00632         { 0x0993, 0x09A8 },
00633         { 0x09AA, 0x09B0 },
00634         { 0x09B2, 0x09B2 },
00635         { 0x09B6, 0x09B9 },
00636         { 0x09DC, 0x09DD },
00637         { 0x09DF, 0x09E1 },
00638         { 0x09F0, 0x09F1 },
00639         { 0x0A05, 0x0A0A },
00640         { 0x0A0F, 0x0A10 },
00641         { 0x0A13, 0x0A28 },
00642         { 0x0A2A, 0x0A30 },
00643         { 0x0A32, 0x0A33 },
00644         { 0x0A35, 0x0A36 },
00645         { 0x0A38, 0x0A39 },
00646         { 0x0A59, 0x0A5C },
00647         { 0x0A5E, 0x0A5E },
00648         { 0x0A85, 0x0A8B },
00649         { 0x0A8D, 0x0A8D },
00650         { 0x0A8F, 0x0A91 },
00651         { 0x0A93, 0x0AA8 },
00652         { 0x0AAA, 0x0AB0 },
00653         { 0x0AB2, 0x0AB3 },
00654         { 0x0AB5, 0x0AB9 },
00655         { 0x0AE0, 0x0AE0 },
00656         { 0x0B05, 0x0B0C },
00657         { 0x0B0F, 0x0B10 },
00658         { 0x0B13, 0x0B28 },
00659         { 0x0B2A, 0x0B30 },
00660         { 0x0B32, 0x0B33 },
00661         { 0x0B36, 0x0B39 },
00662         { 0x0B5C, 0x0B5D },
00663         { 0x0B5F, 0x0B61 },
00664         { 0x0B85, 0x0B8A },
00665         { 0x0B8E, 0x0B90 },
00666         { 0x0B92, 0x0B95 },
00667         { 0x0B99, 0x0B9A },
00668         { 0x0B9C, 0x0B9C },
00669         { 0x0B9E, 0x0B9F },
00670         { 0x0BA3, 0x0BA4 },
00671         { 0x0BA8, 0x0BAA },
00672         { 0x0BAE, 0x0BB5 },
00673         { 0x0BB7, 0x0BB9 },
00674         { 0x0C05, 0x0C0C },
00675         { 0x0C0E, 0x0C10 },
00676         { 0x0C12, 0x0C28 },
00677         { 0x0C2A, 0x0C33 },
00678         { 0x0C35, 0x0C39 },
00679         { 0x0C60, 0x0C61 },
00680         { 0x0C85, 0x0C8C },
00681         { 0x0C8E, 0x0C90 },
00682         { 0x0C92, 0x0CA8 },
00683         { 0x0CAA, 0x0CB3 },
00684         { 0x0CB5, 0x0CB9 },
00685         { 0x0CE0, 0x0CE1 },
00686         { 0x0D05, 0x0D0C },
00687         { 0x0D0E, 0x0D10 },
00688         { 0x0D12, 0x0D28 },
00689         { 0x0D2A, 0x0D39 },
00690         { 0x0D60, 0x0D61 },
00691         { 0x0E01, 0x0E30 },
00692         { 0x0E32, 0x0E33 },
00693         { 0x0E40, 0x0E46 },
00694         { 0x0E50, 0x0E59 },
00695         { 0x0E5A, 0x0E5B },
00696         { 0x0E81, 0x0E82 },
00697         { 0x0E84, 0x0E84 },
00698         { 0x0E87, 0x0E88 },
00699         { 0x0E8A, 0x0E8A },
00700         { 0x0E8D, 0x0E8D },
00701         { 0x0E94, 0x0E97 },
00702         { 0x0E99, 0x0E9F },
00703         { 0x0EA1, 0x0EA3 },
00704         { 0x0EA5, 0x0EA5 },
00705         { 0x0EA7, 0x0EA7 },
00706         { 0x0EAA, 0x0EAB },
00707         { 0x0EAD, 0x0EAE },
00708         { 0x0EAF, 0x0EAF },
00709         { 0x0EB0, 0x0EB0 },
00710         { 0x0EB2, 0x0EB3 },
00711         { 0x0EBD, 0x0EBD },
00712         { 0x0EC0, 0x0EC4 },
00713         { 0x0EC6, 0x0EC6 },
00714         { 0x10A0, 0x10C5 },
00715         { 0x10D0, 0x10F6 },
00716         { 0x1100, 0x1159 },
00717         { 0x1161, 0x11A2 },
00718         { 0x11A8, 0x11F9 },
00719         { 0x1E00, 0x1E9A },
00720         { 0x1EA0, 0x1EF9 },
00721         { 0x1F00, 0x1F15 },
00722         { 0x1F18, 0x1F1D },
00723         { 0x1F20, 0x1F45 },
00724         { 0x1F48, 0x1F4D },
00725         { 0x1F50, 0x1F57 },
00726         { 0x1F59, 0x1F59 },
00727         { 0x1F5B, 0x1F5B },
00728         { 0x1F5D, 0x1F5D },
00729         { 0x1F5F, 0x1F7D },
00730         { 0x1F80, 0x1FB4 },
00731         { 0x1FB6, 0x1FBC },
00732         { 0x1FC2, 0x1FC4 },
00733         { 0x1FC6, 0x1FCC },
00734         { 0x1FD0, 0x1FD3 },
00735         { 0x1FD6, 0x1FDB },
00736         { 0x1FE0, 0x1FEC },
00737         { 0x1FF2, 0x1FF4 },
00738         { 0x1FF6, 0x1FFC },
00739         { 0x3041, 0x3093 },
00740         { 0x3094, 0x3094 },
00741         { 0x309B, 0x309C },
00742         { 0x309D, 0x309E },
00743         { 0x30A1, 0x30F6 },
00744         { 0x30F7, 0x30FA },
00745         { 0x30FB, 0x30FC },
00746         { 0x30FD, 0x30FE },
00747         { 0x3105, 0x312C },
00748         { 0x4E00, 0x9FA5 },
00749         { 0xF900, 0xFA2D },
00750         { 0xFB1F, 0xFB36 },
00751         { 0xFB38, 0xFB3C },
00752         { 0xFB3E, 0xFB3E },
00753         { 0xFB40, 0xFB44 },
00754         { 0xFB46, 0xFBB1 },
00755         { 0xFBD3, 0xFD3F },
00756         { 0xFD50, 0xFD8F },
00757         { 0xFD92, 0xFDC7 },
00758         { 0xFDF0, 0xFDFB },
00759         { 0xFE70, 0xFE72 },
00760         { 0xFE74, 0xFE74 },
00761         { 0xFE76, 0xFEFC },
00762         { 0xFF21, 0xFF3A },
00763         { 0xFF41, 0xFF5A },
00764         { 0xFF66, 0xFFBE },
00765         { 0xFFC2, 0xFFC7 },
00766         { 0xFFCA, 0xFFCF },
00767         { 0xFFD2, 0xFFD7 },
00768         { 0xFFDA, 0xFFDC }
00769 };
00770 
00771 /*!
00772   Internal to host encoding of basic characters translation table.
00773   Unused fields are ignored.
00774   Assumes that no basic character has code hchar_unknown in host encoding.
00775 */
00776 hchar character::internal_to_host[character::ascii_length] = {
00777         character::hchar_unknown,
00778         character::hchar_unknown,
00779         character::hchar_unknown,
00780         character::hchar_unknown,
00781         character::hchar_unknown,
00782         character::hchar_unknown,
00783         character::hchar_unknown,
00784         '\a',
00785         '\b',
00786         '\t',
00787         '\n',
00788         '\v',
00789         '\f',
00790         '\r',
00791         character::hchar_unknown,
00792         character::hchar_unknown,
00793         character::hchar_unknown,
00794         character::hchar_unknown,
00795         character::hchar_unknown,
00796         character::hchar_unknown,
00797         character::hchar_unknown,
00798         character::hchar_unknown,
00799         character::hchar_unknown,
00800         character::hchar_unknown,
00801         character::hchar_unknown,
00802         character::hchar_unknown,
00803         character::hchar_unknown,
00804         character::hchar_unknown,
00805         character::hchar_unknown,
00806         character::hchar_unknown,
00807         character::hchar_unknown,
00808         character::hchar_unknown,
00809         ' ',
00810         '!',
00811         '"',
00812         '#',
00813         '$',
00814         '%',
00815         '&',
00816         '\'',
00817         '(',
00818         ')',
00819         '*',
00820         '+',
00821         ',',
00822         '-',
00823         '.',
00824         '/',
00825         '0',
00826         '1',
00827         '2',
00828         '3',
00829         '4',
00830         '5',
00831         '6',
00832         '7',
00833         '8',
00834         '9',
00835         ':',
00836         ';',
00837         '<',
00838         '=',
00839         '>',
00840         '?',
00841         '@',
00842         'A',
00843         'B',
00844         'C',
00845         'D',
00846         'E',
00847         'F',
00848         'G',
00849         'H',
00850         'I',
00851         'J',
00852         'K',
00853         'L',
00854         'M',
00855         'N',
00856         'O',
00857         'P',
00858         'Q',
00859         'R',
00860         'S',
00861         'T',
00862         'U',
00863         'V',
00864         'W',
00865         'X',
00866         'Y',
00867         'Z',
00868         '[',
00869         '\\',
00870         ']',
00871         '^',
00872         '_',
00873         '`',
00874         'a',
00875         'b',
00876         'c',
00877         'd',
00878         'e',
00879         'f',
00880         'g',
00881         'h',
00882         'i',
00883         'j',
00884         'k',
00885         'l',
00886         'm',
00887         'n',
00888         'o',
00889         'p',
00890         'q',
00891         'r',
00892         's',
00893         't',
00894         'u',
00895         'v',
00896         'w',
00897         'x',
00898         'y',
00899         'z',
00900         '{',
00901         '|',
00902         '}',
00903         '~',
00904         character::hchar_unknown
00905 };
00906 
00907 /*!
00908   Host to internal encoding translation table.
00909   This is reasonable only for narrow range host characters, could be substituted by map.
00910   Zeroed statically, initialization is done in constructor.
00911 */
00912 ulint character::host_to_internal[host_length] = {
00913 };
00914 
00915 /*!
00916   Initializes some of the internal tables.
00917 */
00918 character::character(void)
00919 {
00920         if (initialized) return;
00921         initialized = 1;
00922         // fill the table according the reverse
00923         for (ulint i = 0; i < ascii_length; i++) {
00924                 hchar c = internal_to_host[i];
00925                 if (c != hchar_unknown) {
00926                         host_to_internal[static_cast<ulint>(c)] = i;
00927                 }
00928         }
00929 }
00930 
00931 /*!
00932   Destructs the initializer instance.
00933 */
00934 character::~character(void)
00935 {   
00936 }
00937 
00938 /*!
00939   Tests if value is translated unicode C++ identifier character.
00940   Performs a binary search, idea taken from gcc.
00941   \param u The value to test.
00942   \return true  If the value falls into a range for valid C++ identifier.
00943 */
00944 bool character::is_translated_identifier(ucn u)
00945 {
00946         if (!is_translated(u)) return false;
00947 
00948         ulint x = extract_value(u);
00949 
00950         ulint i = 0;
00951         ulint j = sizeof(identifier_ranges)/sizeof(identifier_ranges[0]);
00952         ulint k;
00953 
00954         while (i < j) {
00955                 k = i + ((j - i) >> 1);
00956                 if (x < identifier_ranges[k].low)
00957                         j = k;
00958                 else if (x > identifier_ranges[k].high)
00959                         i = k + 1;
00960                 else return true;
00961         }
00962         return false;
00963 }
00964 
00965 /*!
00966   Multiple initialization guard, set to true after first constructor run.
00967 */
00968 bool character::initialized = false;
00969 
00970 end_package(std);
00971 end_package(lestes);
00972 
00973 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:22:32 2007 for lestes by doxygen 1.5.1-20070107