character.hh

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 #ifndef lestes__std___character_hh___included
00029 #define lestes__std___character_hh___included
00030 
00031 /*! \file
00032   \brief Representation of characters in source encoding.
00033 
00034   Defines type ucn for storing characters in source encoding.
00035   Defines type hchar for storing host characters.
00036   Declares helper class character containing constants and manipulators.
00037 */
00038 #include <lestes/common.hh>
00039 #include <limits>
00040 
00041 package(lestes);
00042 package(std);
00043 
00044 /*!
00045   \brief Type for storing characters in source encoding.
00046   
00047   The source character is represented by an at least 32-bit unsigned integer.
00048   When bit 31 is set, a value stored in the lower bits is in external character set, 
00049   either input file character set, or execution (target) character set, depending on the context.
00050   The exception is the eof value, represented as 32 one bits.
00051   Thus execution character with value 0x7FFFFFFF cannot be represented in this scheme.
00052   When bit 31 is not set, Unicode UTF-32 value is stored in the lower bits.
00053   When the type is longer than 32 bits, the higher bits shall not be used and it is entirely
00054   in the responsibility of the programmer to keep these zero, otherwise the whole
00055   mechanism would break. To keep sane, use character class static methods.
00056 */
00057 typedef lc_host_uint_least32 ucn;
00058 
00059 /*!
00060   \brief Host character type.
00061 
00062   The native host character type, used in i/o operations.
00063 */
00064 typedef char hchar;
00065 
00066 /*!
00067   \brief Character constants and manipulators.
00068   
00069   Contains symbolic constants for Unicode representation of basic source characters
00070   as well as creators, predicates, transformers and extracters.
00071 */
00072 class character {
00073 public:
00074         //! ASCII bell character.
00075         static const ucn ascii_bell = static_cast<ucn>(0x07);
00076         //! ASCII backspace character.
00077         static const ucn ascii_backspace = static_cast<ucn>(0x08);
00078         //! ASCII tabulator character.
00079         static const ucn ascii_tab = static_cast<ucn>(0x09);
00080         //! ASCII vertical tabulator character.
00081         static const ucn ascii_vtab = static_cast<ucn>(0x0B);
00082         //! ASCII form feed character.
00083         static const ucn ascii_form_feed = static_cast<ucn>(0x0C);
00084         //! ASCII new line character.
00085         static const ucn ascii_new_line = static_cast<ucn>(0x0A);
00086         //! ASCII carriage return character.
00087         static const ucn ascii_carriage_return = static_cast<ucn>(0x0D);
00088         //! ASCII space character.
00089         static const ucn ascii_space = static_cast<ucn>(0x20);
00090         //! ASCII exclamation mark character.
00091         static const ucn ascii_emark = static_cast<ucn>(0x21);
00092         //! ASCII double quote character.
00093         static const ucn ascii_dquote = static_cast<ucn>(0x22);
00094         //! ASCII hash character.
00095         static const ucn ascii_hash = static_cast<ucn>(0x23);
00096         //! ASCII dollar character.
00097         static const ucn ascii_dollar = static_cast<ucn>(0x24);
00098         //! ASCII percent character.
00099         static const ucn ascii_percent = static_cast<ucn>(0x25);
00100         //! ASCII ampersand character.
00101         static const ucn ascii_amp = static_cast<ucn>(0x26);
00102         //! ASCII quote character.
00103         static const ucn ascii_quote = static_cast<ucn>(0x27);
00104         //! ASCII left parenthesis character.
00105         static const ucn ascii_left_par = static_cast<ucn>(0x28);
00106         //! ASCII right parenthesis character.
00107         static const ucn ascii_right_par = static_cast<ucn>(0x29);
00108         //! ASCII star character.
00109         static const ucn ascii_star = static_cast<ucn>(0x2A);
00110         //! ASCII plus character.
00111         static const ucn ascii_plus = static_cast<ucn>(0x2B);
00112         //! ASCII comma character.
00113         static const ucn ascii_comma = static_cast<ucn>(0x2C);
00114         //! ASCII minus character.
00115         static const ucn ascii_minus = static_cast<ucn>(0x2D);
00116         //! ASCII dot character.
00117         static const ucn ascii_dot = static_cast<ucn>(0x2E);
00118         //! ASCII slash character.
00119         static const ucn ascii_slash = static_cast<ucn>(0x2F);
00120         //! ASCII digit 0 character.
00121         static const ucn ascii_digit_0 = static_cast<ucn>(0x30);
00122         //! ASCII digit 1 character.
00123         static const ucn ascii_digit_1 = static_cast<ucn>(0x31);
00124         //! ASCII digit 2 character.
00125         static const ucn ascii_digit_2 = static_cast<ucn>(0x32);
00126         //! ASCII digit 3 character.
00127         static const ucn ascii_digit_3 = static_cast<ucn>(0x33);
00128         //! ASCII digit 4 character.
00129         static const ucn ascii_digit_4 = static_cast<ucn>(0x34);
00130         //! ASCII digit 5 character.
00131         static const ucn ascii_digit_5 = static_cast<ucn>(0x35);
00132         //! ASCII digit 6 character.
00133         static const ucn ascii_digit_6 = static_cast<ucn>(0x36);
00134         //! ASCII digit 7 character.
00135         static const ucn ascii_digit_7 = static_cast<ucn>(0x37);
00136         //! ASCII digit 8 character.
00137         static const ucn ascii_digit_8 = static_cast<ucn>(0x38);
00138         //! ASCII digit 9 character.
00139         static const ucn ascii_digit_9 = static_cast<ucn>(0x39);
00140         //! ASCII colon character.
00141         static const ucn ascii_colon = static_cast<ucn>(0x3A);
00142         //! ASCII semicolon character.
00143         static const ucn ascii_semicolon = static_cast<ucn>(0x3B);
00144         //! ASCII less than character.
00145         static const ucn ascii_lt = static_cast<ucn>(0x3C);
00146         //! ASCII equals character.
00147         static const ucn ascii_eq = static_cast<ucn>(0x3D);
00148         //! ASCII greater than character.
00149         static const ucn ascii_gt = static_cast<ucn>(0x3E);
00150         //! ASCII question mark character.
00151         static const ucn ascii_qmark = static_cast<ucn>(0x3F);
00152         //! ASCII commercial at character (zavinac).
00153         static const ucn ascii_at = static_cast<ucn>(0x40);
00154         //! ASCII upper a character.
00155         static const ucn ascii_upper_a = static_cast<ucn>(0x41);
00156         //! ASCII upper b character.
00157         static const ucn ascii_upper_b = static_cast<ucn>(0x42);
00158         //! ASCII upper c character.
00159         static const ucn ascii_upper_c = static_cast<ucn>(0x43);
00160         //! ASCII upper d character.
00161         static const ucn ascii_upper_d = static_cast<ucn>(0x44);
00162         //! ASCII upper e character.
00163         static const ucn ascii_upper_e = static_cast<ucn>(0x45);
00164         //! ASCII upper f character.
00165         static const ucn ascii_upper_f = static_cast<ucn>(0x46);
00166         //! ASCII upper g character.
00167         static const ucn ascii_upper_g = static_cast<ucn>(0x47);
00168         //! ASCII upper h character.
00169         static const ucn ascii_upper_h = static_cast<ucn>(0x48);
00170         //! ASCII upper i character.
00171         static const ucn ascii_upper_i = static_cast<ucn>(0x49);
00172         //! ASCII upper j character.
00173         static const ucn ascii_upper_j = static_cast<ucn>(0x4A);
00174         //! ASCII upper k character.
00175         static const ucn ascii_upper_k = static_cast<ucn>(0x4B);
00176         //! ASCII upper l character.
00177         static const ucn ascii_upper_l = static_cast<ucn>(0x4C);
00178         //! ASCII upper m character.
00179         static const ucn ascii_upper_m = static_cast<ucn>(0x4D);
00180         //! ASCII upper n character.
00181         static const ucn ascii_upper_n = static_cast<ucn>(0x4E);
00182         //! ASCII upper o character.
00183         static const ucn ascii_upper_o = static_cast<ucn>(0x4F);
00184         //! ASCII upper p character.
00185         static const ucn ascii_upper_p = static_cast<ucn>(0x50);
00186         //! ASCII upper q character.
00187         static const ucn ascii_upper_q = static_cast<ucn>(0x51);
00188         //! ASCII upper r character.
00189         static const ucn ascii_upper_r = static_cast<ucn>(0x52);
00190         //! ASCII upper s character.
00191         static const ucn ascii_upper_s = static_cast<ucn>(0x53);
00192         //! ASCII upper t character.
00193         static const ucn ascii_upper_t = static_cast<ucn>(0x54);
00194         //! ASCII upper u character.
00195         static const ucn ascii_upper_u = static_cast<ucn>(0x55);
00196         //! ASCII upper v character.
00197         static const ucn ascii_upper_v = static_cast<ucn>(0x56);
00198         //! ASCII upper w character.
00199         static const ucn ascii_upper_w = static_cast<ucn>(0x57);
00200         //! ASCII upper x character.
00201         static const ucn ascii_upper_x = static_cast<ucn>(0x58);
00202         //! ASCII upper y character.
00203         static const ucn ascii_upper_y = static_cast<ucn>(0x59);
00204         //! ASCII upper z character.
00205         static const ucn ascii_upper_z = static_cast<ucn>(0x5A);
00206         //! ASCII left bracket character.
00207         static const ucn ascii_left_bracket = static_cast<ucn>(0x5B);
00208         //! ASCII backslash character.
00209         static const ucn ascii_backslash = static_cast<ucn>(0x5C);
00210         //! ASCII right bracket character.
00211         static const ucn ascii_right_bracket = static_cast<ucn>(0x5D);
00212         //! ASCII hat character.
00213         static const ucn ascii_hat = static_cast<ucn>(0x5E);
00214         //! ASCII underscore character.
00215         static const ucn ascii_underscore = static_cast<ucn>(0x5F);
00216         //! ASCII back quote character.
00217         static const ucn ascii_bquote = static_cast<ucn>(0x60);
00218         //! ASCII lower a character.
00219         static const ucn ascii_lower_a = static_cast<ucn>(0x61);
00220         //! ASCII lower b character.
00221         static const ucn ascii_lower_b = static_cast<ucn>(0x62);
00222         //! ASCII lower c character.
00223         static const ucn ascii_lower_c = static_cast<ucn>(0x63);
00224         //! ASCII lower d character.
00225         static const ucn ascii_lower_d = static_cast<ucn>(0x64);
00226         //! ASCII lower e character.
00227         static const ucn ascii_lower_e = static_cast<ucn>(0x65);
00228         //! ASCII lower f character.
00229         static const ucn ascii_lower_f = static_cast<ucn>(0x66);
00230         //! ASCII lower g character.
00231         static const ucn ascii_lower_g = static_cast<ucn>(0x67);
00232         //! ASCII lower h character.
00233         static const ucn ascii_lower_h = static_cast<ucn>(0x68);
00234         //! ASCII lower i character.
00235         static const ucn ascii_lower_i = static_cast<ucn>(0x69);
00236         //! ASCII lower j character.
00237         static const ucn ascii_lower_j = static_cast<ucn>(0x6A);
00238         //! ASCII lower k character.
00239         static const ucn ascii_lower_k = static_cast<ucn>(0x6B);
00240         //! ASCII lower l character.
00241         static const ucn ascii_lower_l = static_cast<ucn>(0x6C);
00242         //! ASCII lower m character.
00243         static const ucn ascii_lower_m = static_cast<ucn>(0x6D);
00244         //! ASCII lower n character.
00245         static const ucn ascii_lower_n = static_cast<ucn>(0x6E);
00246         //! ASCII lower o character.
00247         static const ucn ascii_lower_o = static_cast<ucn>(0x6F);
00248         //! ASCII lower p character.
00249         static const ucn ascii_lower_p = static_cast<ucn>(0x70);
00250         //! ASCII lower q character.
00251         static const ucn ascii_lower_q = static_cast<ucn>(0x71);
00252         //! ASCII lower r character.
00253         static const ucn ascii_lower_r = static_cast<ucn>(0x72);
00254         //! ASCII lower s character.
00255         static const ucn ascii_lower_s = static_cast<ucn>(0x73);
00256         //! ASCII lower t character.
00257         static const ucn ascii_lower_t = static_cast<ucn>(0x74);
00258         //! ASCII lower u character.
00259         static const ucn ascii_lower_u = static_cast<ucn>(0x75);
00260         //! ASCII lower v character.
00261         static const ucn ascii_lower_v = static_cast<ucn>(0x76);
00262         //! ASCII lower w character.
00263         static const ucn ascii_lower_w = static_cast<ucn>(0x77);
00264         //! ASCII lower x character.
00265         static const ucn ascii_lower_x = static_cast<ucn>(0x78);
00266         //! ASCII lower y character.
00267         static const ucn ascii_lower_y = static_cast<ucn>(0x79);
00268         //! ASCII lower z character.
00269         static const ucn ascii_lower_z = static_cast<ucn>(0x7A);
00270         //! ASCII left brace character.
00271         static const ucn ascii_left_brace = static_cast<ucn>(0x7B);
00272         //! ASCII vertical bar character.
00273         static const ucn ascii_vbar = static_cast<ucn>(0x7C);
00274         //! ASCII right brace character.
00275         static const ucn ascii_right_brace = static_cast<ucn>(0x7D);
00276         //! ASCII tilde character.
00277         static const ucn ascii_tilde = static_cast<ucn>(0x7E);
00278         //! Initializes internal tables.
00279         character(void);
00280         //! Destructor of the initializer.
00281         ~character(void);
00282         //! Returns internal character set ucn.
00283         static inline ucn create_internal(ulint code);
00284         //! Returns internal ucn from host character.
00285         static inline ucn create_from_host(hchar c);
00286         //! Returns external character set ucn.
00287         static inline ucn create_external(ulint code);
00288         //! Returns internal character set ucn from hexadecimal digit.
00289         static inline ucn create_xdigit(ulint digit);
00290         //! Tests if value is internal character.
00291         static inline bool is_internal(ucn u);
00292         //! Tests if value is external character.
00293         static inline bool is_external(ucn u);
00294         //! Tests if value is basic source character.
00295         static inline bool is_basic(ucn u);
00296         //! Tests if value is translated source character.
00297         static inline bool is_translated(ucn u);
00298         //! Tests if value is 7 bit ASCII.
00299         static inline bool is_ascii7(ucn u);
00300         //! Tests if value is ASCII letter.
00301         static inline bool is_alpha(ucn u);
00302         //! Tests if value is ASCII uppercase letter.
00303         static inline bool is_upper(ucn u);
00304         //! Tests if value is ASCII lowercase letter.
00305         static inline bool is_lower(ucn u);
00306         //! Tests if value is ASCII digit.
00307         static inline bool is_digit(ucn u);
00308         //! Tests if value is ASCII octal digit.
00309         static inline bool is_odigit(ucn u);
00310         //! Tests if value is ASCII hexadecimal digit .
00311         static inline bool is_xdigit(ucn u);
00312         //! Tests if value is ASCII space character .
00313         static inline bool is_space(ucn u);
00314         //! Tests if value is translated C++ identifier character.
00315         static bool is_translated_identifier(ucn u);
00316         //! Tests if value is host character encodable in ucn.
00317         static inline bool is_encodable_host(ucn u);
00318         //! Returns uppercase of ASCII letter.
00319         static inline ucn to_upper(ucn u);
00320         //! Returns lowercase of ASCII letter.
00321         static inline ucn to_lower(ucn u);
00322         //! Returns host character.
00323         static inline hchar to_host(ucn u);
00324         //! Returns digit value.
00325         static inline ulint extract_digit(ucn u);
00326         //! Returns hexadecimal digit value.
00327         static inline ulint extract_xdigit(ucn u);
00328         //! Returns hexadecimal digit value.
00329         static inline ulint extract_odigit(ucn u);
00330         //! Returns character value.
00331         static inline ulint extract_value(ucn u);
00332 private:
00333         //! Number of significant ucn_bits.
00334         static const ulint ucn_bits = 32;
00335         //! Mask for character value.
00336         static const ucn value_mask = (static_cast<ucn>(1) << (ucn_bits - 1)) - 1;
00337         //! Mask for external characters.
00338         static const ucn external_mask = (static_cast<ucn>(1) << (ucn_bits - 1));
00339         //! Internal eof constant, only for ucn_traits, must not be used elsewhere.
00340         static const ucn eof = ((static_cast<ucn>(1) << (ucn_bits - 1) - 1) << 1) | 1;
00341         //! Host character value presumably not representing encodable character.
00342         static const hchar hchar_unknown = '\0';
00343         //! Length of 7 bit ASCII table.
00344         static const ulint ascii_length = 128;
00345         //! Length of table of host characters .
00346         static const ulint host_length = 1 << (::std::numeric_limits<hchar>::digits);
00347         //! Values of ASCII flags.
00348         enum ascii_flags_values {
00349                 FLG_NONE = 0x00,
00350                 FLG_BASIC = 0x01,
00351                 FLG_UPPER = 0x02,
00352                 FLG_LOWER = 0x04,
00353                 FLG_ALPHA = 0x06,
00354                 FLG_DIGIT = 0x08,
00355                 FLG_ODIGIT = 0x10,
00356                 FLG_XDIGIT = 0x20,
00357                 FLG_SPACE = 0x40
00358         };
00359         //! Type of ASCII flags.
00360         typedef lc_host_uint_least8 ascii_flags_type;
00361         //! Flags of ASCII characters.
00362         static ascii_flags_type ascii_flags[ascii_length];
00363         //! Range of 16 bit codes.
00364         typedef struct {
00365                 lc_host_uint_least16 low;
00366                 lc_host_uint_least16 high;
00367         } range_type;
00368         //! List of unicode character ranges for C++ identifiers.
00369         static range_type identifier_ranges[];
00370         //! Internal to host encoding of basic characters translation table.
00371         static hchar internal_to_host[ascii_length];
00372         //! Host to internal value encoding translation table.
00373         static ulint host_to_internal[host_length];
00374         //! Hides copy constructor.
00375         character(const character &copy);
00376         //! Hides assignment operator.
00377         character &operator=(const character &rhs);
00378         //! Multiple initialization guard.
00379         static bool initialized;
00380 };
00381 
00382 /*!
00383   Returns ucn for internal character of given code.
00384   \pre code <= 0x7FFFFFFF
00385   \param code  The code of the character.
00386   \return The internal character encoded in ucn.
00387 */
00388 inline ucn character::create_internal(ulint code)
00389 {
00390         lassert(code <= 0x7FFFFFFF);
00391         return static_cast<ucn>(code);
00392 }
00393 
00394 /*!
00395   Returns ucn for basic host character.
00396   \pre The host character is basic.
00397   \param c  The host character.
00398   \return The host character encoded in ucn.
00399 */
00400 inline ucn character::create_from_host(hchar c)
00401 {
00402         ulint i = static_cast<unsigned char>(c);
00403         ulint x = host_to_internal[i];
00404         lassert(x != 0);
00405         return create_internal(x);
00406 }
00407 
00408 /*!
00409   Returns ucn for external character of given code.
00410   \pre code < 0x7FFFFFFF
00411   \param code  The code of the character.
00412   \return The external character encoded in ucn.
00413 */
00414 inline ucn character::create_external(ulint code)
00415 {
00416         lassert(code < 0x7FFFFFFF);
00417         return static_cast<ucn>(code) | external_mask;  
00418 }
00419 
00420 /*!
00421   Returns ucn for internal character representing given lower case hexadecimal number.
00422   \pre digit < 16
00423   \param digit  The hexadecimal digit.
00424   \return The digit as internal ucn.
00425 */
00426 inline ucn character::create_xdigit(ulint digit)
00427 {
00428         lassert(digit < 16);
00429         return static_cast<ucn>(digit < 10 ? ascii_digit_0 + digit : ascii_lower_a + digit - 10);
00430 }
00431 
00432 /*!
00433   Tests if value is internal character.
00434   \param u  The value to test.
00435   \return true  If the value represents internal character.
00436 */
00437 inline bool character::is_internal(ucn u)
00438 {
00439         return (u & external_mask) == 0;
00440 }
00441 
00442 /*!
00443   Tests if value is external host character with known encoding into ucn.
00444   \param u  The value to test.
00445   \return true  If the value represents encodable host character.
00446 */
00447 inline bool character::is_encodable_host(ucn u)
00448 {
00449         if (!is_external(u)) return false;
00450         ulint x = extract_value(u);
00451         return x < host_length && host_to_internal[x] != 0;
00452 }
00453 
00454 /*!
00455   Tests if value is external character.
00456   \param u The value to test.
00457   \return true  If the value represents external character.
00458 */
00459 inline bool character::is_external(ucn u)
00460 {
00461         return (u & external_mask) != 0; 
00462 }
00463 
00464 /*!
00465   Tests if value is basic source character.
00466   \param u The value to test.
00467   \return true  If the value represents basic source character.
00468 */
00469 inline bool character::is_basic(ucn u)
00470 {
00471         return is_ascii7(u) && (ascii_flags[u] & FLG_BASIC);
00472 }
00473 
00474 /*!
00475   Tests if value is translated source character.
00476   Certain ranges are disallowed for translated characters.
00477   \param u The value to test.
00478   \return true  If the value represents translated source character.
00479 */
00480 inline bool character::is_translated(ucn u)
00481 {
00482         return is_internal(u) && !is_basic(u) && !(u < 0x20 || (0x7F <= u && u <= 0x9F));
00483 }
00484 
00485 /*!
00486   Tests if value is 7 bit ASCII.
00487   \param u The value to test.
00488   \return true  If the value represents 7 bit ASCII character.
00489 */
00490 inline bool character::is_ascii7(ucn u)
00491 {
00492         // implicitly is_internal(u) check
00493         return u < 0x80;
00494 }
00495 
00496 /*!
00497   Tests if value is ASCII letter.
00498   \param u The value to test.
00499   \return true  If the value represents ASCII letter.
00500 */
00501 inline bool character::is_alpha(ucn u)
00502 {
00503         return is_ascii7(u) && (ascii_flags[u] & FLG_ALPHA == FLG_ALPHA);
00504 }
00505 
00506 /*!
00507   Tests if value is ASCII uppercase letter.
00508   \param u The value to test.
00509   \return true  If the value represents ASCII uppercase letter.
00510 */
00511 inline bool character::is_upper(ucn u)
00512 {
00513         return is_ascii7(u) && (ascii_flags[u] & FLG_UPPER);
00514 }
00515 
00516 /*!
00517   Tests if value is ASCII lowercase letter.
00518   \param u The value to test.
00519   \return true  If the value represents ASCII lowercase letter.
00520 */
00521 inline bool character::is_lower(ucn u)
00522 {
00523         return is_ascii7(u) && (ascii_flags[u] & FLG_LOWER);
00524 }
00525 
00526 /*!
00527   Tests if value is ASCII digit.
00528   \param u The value to test.
00529   \return true  If the value represents ASCII digit.
00530 */
00531 inline bool character::is_digit(ucn u)
00532 {
00533         return is_ascii7(u) && (ascii_flags[u] & FLG_DIGIT);
00534 }
00535 
00536 /*!
00537   Tests if value is ASCII octal digit.
00538   \param u The value to test.
00539   \return true  If the value represents ASCII octal digit.
00540 */
00541 inline bool character::is_odigit(ucn u)
00542 {
00543         return is_ascii7(u) && (ascii_flags[u] & FLG_ODIGIT);
00544 }
00545 
00546 /*!
00547   Tests if value is ASCII hexadecimal digit.
00548   \param u The value to test.
00549   \return true  If the value represents ASCII hexadecimal digit.
00550 */
00551 inline bool character::is_xdigit(ucn u)
00552 {
00553         return is_ascii7(u) && (ascii_flags[u] & FLG_XDIGIT);
00554 }
00555 
00556 /*!
00557   Tests if value is ASCII space character (space, tab, vtab, ff, cr, lf) 
00558   \param u The value to test.
00559   \return true  If the value represents ASCII space character.
00560 */
00561 inline bool character::is_space(ucn u)
00562 {
00563         return is_ascii7(u) && (ascii_flags[u] & FLG_SPACE);
00564 }
00565 
00566 /*!
00567   Returns uppercase of ASCII lowercase letter, other values intact.
00568   \param u The value to transform.
00569   \return Uppercase of the given lowercase letter or the original ucn.
00570 */
00571 inline ucn character::to_upper(ucn u)
00572 {
00573         if (is_lower(u)) return u & 0xDF;
00574         return u;
00575 }
00576 
00577 /*!
00578   Returns lowercase of ASCII uppercase letter, other values intact.
00579   \param u The value to transform.
00580   \return Uppercase of the given uppercase letter or the original ucn.
00581 */
00582 inline ucn character::to_lower(ucn u)
00583 {
00584         if (is_upper(u)) return u | 0x20;
00585         return u;
00586 }
00587 
00588 /*!
00589   Returns host character for subset of ASCII for which the encoding is known.
00590   It contains basic ASCII characters, '$', '@' and '`'.
00591   Other values are not supported.
00592   \pre The value is internal and contains only character with known encoding.
00593   \param u The value to transform.
00594   \return The given character in host encoding.
00595 */
00596 inline hchar character::to_host(ucn u)
00597 {
00598         lassert(is_ascii7(u));
00599         hchar c = internal_to_host[extract_value(u)];
00600         lassert(c != hchar_unknown);
00601         return c;
00602 }
00603 
00604 /*!
00605   Returns digit value of the ucn.
00606   \pre is_digit(u)
00607   \param u  The digit to extract.
00608   \return The value of the digit.
00609 */
00610 inline ulint character::extract_digit(ucn u)
00611 {
00612         lassert(is_digit(u));
00613         return static_cast<ulint>(u) - ascii_digit_0;
00614 }
00615 
00616 /*!
00617   Returns hexadecimal digit value of the ucn.
00618   \pre is_xdigit(u)
00619   \param u  The digit to extract.
00620   \return The value of the hexadecimal digit.
00621 */
00622 inline ulint character::extract_xdigit(ucn u)
00623 {
00624         if (is_digit(u)) return extract_digit(u);
00625         lassert(is_xdigit(u));
00626         return static_cast<ulint>(to_upper(u)) - ascii_upper_a + 10;
00627 }
00628 
00629 /*!
00630   Returns octal digit value of the ucn.
00631   \pre is_odigit(u)
00632   \param u  The digit to extract.
00633   \return The value of the octal digit.
00634 */
00635 inline ulint character::extract_odigit(ucn u)
00636 {
00637         lassert(is_odigit(u));
00638         return extract_digit(u); 
00639 }
00640 
00641 /*!
00642   Returns character value of the ucn for both unicode and external characters.
00643   \param u  The character to extract.
00644   \return The value of the stored character.
00645 */
00646 inline ulint character::extract_value(ucn u)
00647 {
00648         return u & value_mask; 
00649 }
00650 
00651 /*!
00652   Initializer of static fields of character class.
00653 */
00654 static character character_initializer;
00655 
00656 end_package(std);
00657 end_package(lestes);
00658 
00659 #endif
00660 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:22:32 2007 for lestes by doxygen 1.5.1-20070107