00001 /* 00002 The lestes compiler suite 00003 Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy 00004 Copyright (C) 2002, 2003, 2004, 2005 Petr Zika 00005 Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala 00006 Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina 00007 Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda 00008 Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar 00009 Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas 00010 00011 This program is free software; you can redistribute it and/or modify 00012 it under the terms of the GNU General Public License as published by 00013 the Free Software Foundation; version 2 of the License. 00014 00015 This program is distributed in the hope that it will be useful, 00016 but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 GNU General Public License for more details. 00019 00020 See the full text of the GNU General Public License version 2, and 00021 the limitations in the file doc/LICENSE. 00022 00023 By accepting the license the licensee waives any and all claims 00024 against the copyright holder(s) related in whole or in part to the 00025 work, its use, and/or the inability to use it. 00026 00027 */ 00028 #ifndef lestes__std___character_hh___included 00029 #define lestes__std___character_hh___included 00030 00031 /*! \file 00032 \brief Representation of characters in source encoding. 00033 00034 Defines type ucn for storing characters in source encoding. 00035 Defines type hchar for storing host characters. 00036 Declares helper class character containing constants and manipulators. 00037 */ 00038 #include <lestes/common.hh> 00039 #include <limits> 00040 00041 package(lestes); 00042 package(std); 00043 00044 /*! 00045 \brief Type for storing characters in source encoding. 00046 00047 The source character is represented by an at least 32-bit unsigned integer. 00048 When bit 31 is set, a value stored in the lower bits is in external character set, 00049 either input file character set, or execution (target) character set, depending on the context. 00050 The exception is the eof value, represented as 32 one bits. 00051 Thus execution character with value 0x7FFFFFFF cannot be represented in this scheme. 00052 When bit 31 is not set, Unicode UTF-32 value is stored in the lower bits. 00053 When the type is longer than 32 bits, the higher bits shall not be used and it is entirely 00054 in the responsibility of the programmer to keep these zero, otherwise the whole 00055 mechanism would break. To keep sane, use character class static methods. 00056 */ 00057 typedef lc_host_uint_least32 ucn; 00058 00059 /*! 00060 \brief Host character type. 00061 00062 The native host character type, used in i/o operations. 00063 */ 00064 typedef char hchar; 00065 00066 /*! 00067 \brief Character constants and manipulators. 00068 00069 Contains symbolic constants for Unicode representation of basic source characters 00070 as well as creators, predicates, transformers and extracters. 00071 */ 00072 class character { 00073 public: 00074 //! ASCII bell character. 00075 static const ucn ascii_bell = static_cast<ucn>(0x07); 00076 //! ASCII backspace character. 00077 static const ucn ascii_backspace = static_cast<ucn>(0x08); 00078 //! ASCII tabulator character. 00079 static const ucn ascii_tab = static_cast<ucn>(0x09); 00080 //! ASCII vertical tabulator character. 00081 static const ucn ascii_vtab = static_cast<ucn>(0x0B); 00082 //! ASCII form feed character. 00083 static const ucn ascii_form_feed = static_cast<ucn>(0x0C); 00084 //! ASCII new line character. 00085 static const ucn ascii_new_line = static_cast<ucn>(0x0A); 00086 //! ASCII carriage return character. 00087 static const ucn ascii_carriage_return = static_cast<ucn>(0x0D); 00088 //! ASCII space character. 00089 static const ucn ascii_space = static_cast<ucn>(0x20); 00090 //! ASCII exclamation mark character. 00091 static const ucn ascii_emark = static_cast<ucn>(0x21); 00092 //! ASCII double quote character. 00093 static const ucn ascii_dquote = static_cast<ucn>(0x22); 00094 //! ASCII hash character. 00095 static const ucn ascii_hash = static_cast<ucn>(0x23); 00096 //! ASCII dollar character. 00097 static const ucn ascii_dollar = static_cast<ucn>(0x24); 00098 //! ASCII percent character. 00099 static const ucn ascii_percent = static_cast<ucn>(0x25); 00100 //! ASCII ampersand character. 00101 static const ucn ascii_amp = static_cast<ucn>(0x26); 00102 //! ASCII quote character. 00103 static const ucn ascii_quote = static_cast<ucn>(0x27); 00104 //! ASCII left parenthesis character. 00105 static const ucn ascii_left_par = static_cast<ucn>(0x28); 00106 //! ASCII right parenthesis character. 00107 static const ucn ascii_right_par = static_cast<ucn>(0x29); 00108 //! ASCII star character. 00109 static const ucn ascii_star = static_cast<ucn>(0x2A); 00110 //! ASCII plus character. 00111 static const ucn ascii_plus = static_cast<ucn>(0x2B); 00112 //! ASCII comma character. 00113 static const ucn ascii_comma = static_cast<ucn>(0x2C); 00114 //! ASCII minus character. 00115 static const ucn ascii_minus = static_cast<ucn>(0x2D); 00116 //! ASCII dot character. 00117 static const ucn ascii_dot = static_cast<ucn>(0x2E); 00118 //! ASCII slash character. 00119 static const ucn ascii_slash = static_cast<ucn>(0x2F); 00120 //! ASCII digit 0 character. 00121 static const ucn ascii_digit_0 = static_cast<ucn>(0x30); 00122 //! ASCII digit 1 character. 00123 static const ucn ascii_digit_1 = static_cast<ucn>(0x31); 00124 //! ASCII digit 2 character. 00125 static const ucn ascii_digit_2 = static_cast<ucn>(0x32); 00126 //! ASCII digit 3 character. 00127 static const ucn ascii_digit_3 = static_cast<ucn>(0x33); 00128 //! ASCII digit 4 character. 00129 static const ucn ascii_digit_4 = static_cast<ucn>(0x34); 00130 //! ASCII digit 5 character. 00131 static const ucn ascii_digit_5 = static_cast<ucn>(0x35); 00132 //! ASCII digit 6 character. 00133 static const ucn ascii_digit_6 = static_cast<ucn>(0x36); 00134 //! ASCII digit 7 character. 00135 static const ucn ascii_digit_7 = static_cast<ucn>(0x37); 00136 //! ASCII digit 8 character. 00137 static const ucn ascii_digit_8 = static_cast<ucn>(0x38); 00138 //! ASCII digit 9 character. 00139 static const ucn ascii_digit_9 = static_cast<ucn>(0x39); 00140 //! ASCII colon character. 00141 static const ucn ascii_colon = static_cast<ucn>(0x3A); 00142 //! ASCII semicolon character. 00143 static const ucn ascii_semicolon = static_cast<ucn>(0x3B); 00144 //! ASCII less than character. 00145 static const ucn ascii_lt = static_cast<ucn>(0x3C); 00146 //! ASCII equals character. 00147 static const ucn ascii_eq = static_cast<ucn>(0x3D); 00148 //! ASCII greater than character. 00149 static const ucn ascii_gt = static_cast<ucn>(0x3E); 00150 //! ASCII question mark character. 00151 static const ucn ascii_qmark = static_cast<ucn>(0x3F); 00152 //! ASCII commercial at character (zavinac). 00153 static const ucn ascii_at = static_cast<ucn>(0x40); 00154 //! ASCII upper a character. 00155 static const ucn ascii_upper_a = static_cast<ucn>(0x41); 00156 //! ASCII upper b character. 00157 static const ucn ascii_upper_b = static_cast<ucn>(0x42); 00158 //! ASCII upper c character. 00159 static const ucn ascii_upper_c = static_cast<ucn>(0x43); 00160 //! ASCII upper d character. 00161 static const ucn ascii_upper_d = static_cast<ucn>(0x44); 00162 //! ASCII upper e character. 00163 static const ucn ascii_upper_e = static_cast<ucn>(0x45); 00164 //! ASCII upper f character. 00165 static const ucn ascii_upper_f = static_cast<ucn>(0x46); 00166 //! ASCII upper g character. 00167 static const ucn ascii_upper_g = static_cast<ucn>(0x47); 00168 //! ASCII upper h character. 00169 static const ucn ascii_upper_h = static_cast<ucn>(0x48); 00170 //! ASCII upper i character. 00171 static const ucn ascii_upper_i = static_cast<ucn>(0x49); 00172 //! ASCII upper j character. 00173 static const ucn ascii_upper_j = static_cast<ucn>(0x4A); 00174 //! ASCII upper k character. 00175 static const ucn ascii_upper_k = static_cast<ucn>(0x4B); 00176 //! ASCII upper l character. 00177 static const ucn ascii_upper_l = static_cast<ucn>(0x4C); 00178 //! ASCII upper m character. 00179 static const ucn ascii_upper_m = static_cast<ucn>(0x4D); 00180 //! ASCII upper n character. 00181 static const ucn ascii_upper_n = static_cast<ucn>(0x4E); 00182 //! ASCII upper o character. 00183 static const ucn ascii_upper_o = static_cast<ucn>(0x4F); 00184 //! ASCII upper p character. 00185 static const ucn ascii_upper_p = static_cast<ucn>(0x50); 00186 //! ASCII upper q character. 00187 static const ucn ascii_upper_q = static_cast<ucn>(0x51); 00188 //! ASCII upper r character. 00189 static const ucn ascii_upper_r = static_cast<ucn>(0x52); 00190 //! ASCII upper s character. 00191 static const ucn ascii_upper_s = static_cast<ucn>(0x53); 00192 //! ASCII upper t character. 00193 static const ucn ascii_upper_t = static_cast<ucn>(0x54); 00194 //! ASCII upper u character. 00195 static const ucn ascii_upper_u = static_cast<ucn>(0x55); 00196 //! ASCII upper v character. 00197 static const ucn ascii_upper_v = static_cast<ucn>(0x56); 00198 //! ASCII upper w character. 00199 static const ucn ascii_upper_w = static_cast<ucn>(0x57); 00200 //! ASCII upper x character. 00201 static const ucn ascii_upper_x = static_cast<ucn>(0x58); 00202 //! ASCII upper y character. 00203 static const ucn ascii_upper_y = static_cast<ucn>(0x59); 00204 //! ASCII upper z character. 00205 static const ucn ascii_upper_z = static_cast<ucn>(0x5A); 00206 //! ASCII left bracket character. 00207 static const ucn ascii_left_bracket = static_cast<ucn>(0x5B); 00208 //! ASCII backslash character. 00209 static const ucn ascii_backslash = static_cast<ucn>(0x5C); 00210 //! ASCII right bracket character. 00211 static const ucn ascii_right_bracket = static_cast<ucn>(0x5D); 00212 //! ASCII hat character. 00213 static const ucn ascii_hat = static_cast<ucn>(0x5E); 00214 //! ASCII underscore character. 00215 static const ucn ascii_underscore = static_cast<ucn>(0x5F); 00216 //! ASCII back quote character. 00217 static const ucn ascii_bquote = static_cast<ucn>(0x60); 00218 //! ASCII lower a character. 00219 static const ucn ascii_lower_a = static_cast<ucn>(0x61); 00220 //! ASCII lower b character. 00221 static const ucn ascii_lower_b = static_cast<ucn>(0x62); 00222 //! ASCII lower c character. 00223 static const ucn ascii_lower_c = static_cast<ucn>(0x63); 00224 //! ASCII lower d character. 00225 static const ucn ascii_lower_d = static_cast<ucn>(0x64); 00226 //! ASCII lower e character. 00227 static const ucn ascii_lower_e = static_cast<ucn>(0x65); 00228 //! ASCII lower f character. 00229 static const ucn ascii_lower_f = static_cast<ucn>(0x66); 00230 //! ASCII lower g character. 00231 static const ucn ascii_lower_g = static_cast<ucn>(0x67); 00232 //! ASCII lower h character. 00233 static const ucn ascii_lower_h = static_cast<ucn>(0x68); 00234 //! ASCII lower i character. 00235 static const ucn ascii_lower_i = static_cast<ucn>(0x69); 00236 //! ASCII lower j character. 00237 static const ucn ascii_lower_j = static_cast<ucn>(0x6A); 00238 //! ASCII lower k character. 00239 static const ucn ascii_lower_k = static_cast<ucn>(0x6B); 00240 //! ASCII lower l character. 00241 static const ucn ascii_lower_l = static_cast<ucn>(0x6C); 00242 //! ASCII lower m character. 00243 static const ucn ascii_lower_m = static_cast<ucn>(0x6D); 00244 //! ASCII lower n character. 00245 static const ucn ascii_lower_n = static_cast<ucn>(0x6E); 00246 //! ASCII lower o character. 00247 static const ucn ascii_lower_o = static_cast<ucn>(0x6F); 00248 //! ASCII lower p character. 00249 static const ucn ascii_lower_p = static_cast<ucn>(0x70); 00250 //! ASCII lower q character. 00251 static const ucn ascii_lower_q = static_cast<ucn>(0x71); 00252 //! ASCII lower r character. 00253 static const ucn ascii_lower_r = static_cast<ucn>(0x72); 00254 //! ASCII lower s character. 00255 static const ucn ascii_lower_s = static_cast<ucn>(0x73); 00256 //! ASCII lower t character. 00257 static const ucn ascii_lower_t = static_cast<ucn>(0x74); 00258 //! ASCII lower u character. 00259 static const ucn ascii_lower_u = static_cast<ucn>(0x75); 00260 //! ASCII lower v character. 00261 static const ucn ascii_lower_v = static_cast<ucn>(0x76); 00262 //! ASCII lower w character. 00263 static const ucn ascii_lower_w = static_cast<ucn>(0x77); 00264 //! ASCII lower x character. 00265 static const ucn ascii_lower_x = static_cast<ucn>(0x78); 00266 //! ASCII lower y character. 00267 static const ucn ascii_lower_y = static_cast<ucn>(0x79); 00268 //! ASCII lower z character. 00269 static const ucn ascii_lower_z = static_cast<ucn>(0x7A); 00270 //! ASCII left brace character. 00271 static const ucn ascii_left_brace = static_cast<ucn>(0x7B); 00272 //! ASCII vertical bar character. 00273 static const ucn ascii_vbar = static_cast<ucn>(0x7C); 00274 //! ASCII right brace character. 00275 static const ucn ascii_right_brace = static_cast<ucn>(0x7D); 00276 //! ASCII tilde character. 00277 static const ucn ascii_tilde = static_cast<ucn>(0x7E); 00278 //! Initializes internal tables. 00279 character(void); 00280 //! Destructor of the initializer. 00281 ~character(void); 00282 //! Returns internal character set ucn. 00283 static inline ucn create_internal(ulint code); 00284 //! Returns internal ucn from host character. 00285 static inline ucn create_from_host(hchar c); 00286 //! Returns external character set ucn. 00287 static inline ucn create_external(ulint code); 00288 //! Returns internal character set ucn from hexadecimal digit. 00289 static inline ucn create_xdigit(ulint digit); 00290 //! Tests if value is internal character. 00291 static inline bool is_internal(ucn u); 00292 //! Tests if value is external character. 00293 static inline bool is_external(ucn u); 00294 //! Tests if value is basic source character. 00295 static inline bool is_basic(ucn u); 00296 //! Tests if value is translated source character. 00297 static inline bool is_translated(ucn u); 00298 //! Tests if value is 7 bit ASCII. 00299 static inline bool is_ascii7(ucn u); 00300 //! Tests if value is ASCII letter. 00301 static inline bool is_alpha(ucn u); 00302 //! Tests if value is ASCII uppercase letter. 00303 static inline bool is_upper(ucn u); 00304 //! Tests if value is ASCII lowercase letter. 00305 static inline bool is_lower(ucn u); 00306 //! Tests if value is ASCII digit. 00307 static inline bool is_digit(ucn u); 00308 //! Tests if value is ASCII octal digit. 00309 static inline bool is_odigit(ucn u); 00310 //! Tests if value is ASCII hexadecimal digit . 00311 static inline bool is_xdigit(ucn u); 00312 //! Tests if value is ASCII space character . 00313 static inline bool is_space(ucn u); 00314 //! Tests if value is translated C++ identifier character. 00315 static bool is_translated_identifier(ucn u); 00316 //! Tests if value is host character encodable in ucn. 00317 static inline bool is_encodable_host(ucn u); 00318 //! Returns uppercase of ASCII letter. 00319 static inline ucn to_upper(ucn u); 00320 //! Returns lowercase of ASCII letter. 00321 static inline ucn to_lower(ucn u); 00322 //! Returns host character. 00323 static inline hchar to_host(ucn u); 00324 //! Returns digit value. 00325 static inline ulint extract_digit(ucn u); 00326 //! Returns hexadecimal digit value. 00327 static inline ulint extract_xdigit(ucn u); 00328 //! Returns hexadecimal digit value. 00329 static inline ulint extract_odigit(ucn u); 00330 //! Returns character value. 00331 static inline ulint extract_value(ucn u); 00332 private: 00333 //! Number of significant ucn_bits. 00334 static const ulint ucn_bits = 32; 00335 //! Mask for character value. 00336 static const ucn value_mask = (static_cast<ucn>(1) << (ucn_bits - 1)) - 1; 00337 //! Mask for external characters. 00338 static const ucn external_mask = (static_cast<ucn>(1) << (ucn_bits - 1)); 00339 //! Internal eof constant, only for ucn_traits, must not be used elsewhere. 00340 static const ucn eof = ((static_cast<ucn>(1) << (ucn_bits - 1) - 1) << 1) | 1; 00341 //! Host character value presumably not representing encodable character. 00342 static const hchar hchar_unknown = '\0'; 00343 //! Length of 7 bit ASCII table. 00344 static const ulint ascii_length = 128; 00345 //! Length of table of host characters . 00346 static const ulint host_length = 1 << (::std::numeric_limits<hchar>::digits); 00347 //! Values of ASCII flags. 00348 enum ascii_flags_values { 00349 FLG_NONE = 0x00, 00350 FLG_BASIC = 0x01, 00351 FLG_UPPER = 0x02, 00352 FLG_LOWER = 0x04, 00353 FLG_ALPHA = 0x06, 00354 FLG_DIGIT = 0x08, 00355 FLG_ODIGIT = 0x10, 00356 FLG_XDIGIT = 0x20, 00357 FLG_SPACE = 0x40 00358 }; 00359 //! Type of ASCII flags. 00360 typedef lc_host_uint_least8 ascii_flags_type; 00361 //! Flags of ASCII characters. 00362 static ascii_flags_type ascii_flags[ascii_length]; 00363 //! Range of 16 bit codes. 00364 typedef struct { 00365 lc_host_uint_least16 low; 00366 lc_host_uint_least16 high; 00367 } range_type; 00368 //! List of unicode character ranges for C++ identifiers. 00369 static range_type identifier_ranges[]; 00370 //! Internal to host encoding of basic characters translation table. 00371 static hchar internal_to_host[ascii_length]; 00372 //! Host to internal value encoding translation table. 00373 static ulint host_to_internal[host_length]; 00374 //! Hides copy constructor. 00375 character(const character ©); 00376 //! Hides assignment operator. 00377 character &operator=(const character &rhs); 00378 //! Multiple initialization guard. 00379 static bool initialized; 00380 }; 00381 00382 /*! 00383 Returns ucn for internal character of given code. 00384 \pre code <= 0x7FFFFFFF 00385 \param code The code of the character. 00386 \return The internal character encoded in ucn. 00387 */ 00388 inline ucn character::create_internal(ulint code) 00389 { 00390 lassert(code <= 0x7FFFFFFF); 00391 return static_cast<ucn>(code); 00392 } 00393 00394 /*! 00395 Returns ucn for basic host character. 00396 \pre The host character is basic. 00397 \param c The host character. 00398 \return The host character encoded in ucn. 00399 */ 00400 inline ucn character::create_from_host(hchar c) 00401 { 00402 ulint i = static_cast<unsigned char>(c); 00403 ulint x = host_to_internal[i]; 00404 lassert(x != 0); 00405 return create_internal(x); 00406 } 00407 00408 /*! 00409 Returns ucn for external character of given code. 00410 \pre code < 0x7FFFFFFF 00411 \param code The code of the character. 00412 \return The external character encoded in ucn. 00413 */ 00414 inline ucn character::create_external(ulint code) 00415 { 00416 lassert(code < 0x7FFFFFFF); 00417 return static_cast<ucn>(code) | external_mask; 00418 } 00419 00420 /*! 00421 Returns ucn for internal character representing given lower case hexadecimal number. 00422 \pre digit < 16 00423 \param digit The hexadecimal digit. 00424 \return The digit as internal ucn. 00425 */ 00426 inline ucn character::create_xdigit(ulint digit) 00427 { 00428 lassert(digit < 16); 00429 return static_cast<ucn>(digit < 10 ? ascii_digit_0 + digit : ascii_lower_a + digit - 10); 00430 } 00431 00432 /*! 00433 Tests if value is internal character. 00434 \param u The value to test. 00435 \return true If the value represents internal character. 00436 */ 00437 inline bool character::is_internal(ucn u) 00438 { 00439 return (u & external_mask) == 0; 00440 } 00441 00442 /*! 00443 Tests if value is external host character with known encoding into ucn. 00444 \param u The value to test. 00445 \return true If the value represents encodable host character. 00446 */ 00447 inline bool character::is_encodable_host(ucn u) 00448 { 00449 if (!is_external(u)) return false; 00450 ulint x = extract_value(u); 00451 return x < host_length && host_to_internal[x] != 0; 00452 } 00453 00454 /*! 00455 Tests if value is external character. 00456 \param u The value to test. 00457 \return true If the value represents external character. 00458 */ 00459 inline bool character::is_external(ucn u) 00460 { 00461 return (u & external_mask) != 0; 00462 } 00463 00464 /*! 00465 Tests if value is basic source character. 00466 \param u The value to test. 00467 \return true If the value represents basic source character. 00468 */ 00469 inline bool character::is_basic(ucn u) 00470 { 00471 return is_ascii7(u) && (ascii_flags[u] & FLG_BASIC); 00472 } 00473 00474 /*! 00475 Tests if value is translated source character. 00476 Certain ranges are disallowed for translated characters. 00477 \param u The value to test. 00478 \return true If the value represents translated source character. 00479 */ 00480 inline bool character::is_translated(ucn u) 00481 { 00482 return is_internal(u) && !is_basic(u) && !(u < 0x20 || (0x7F <= u && u <= 0x9F)); 00483 } 00484 00485 /*! 00486 Tests if value is 7 bit ASCII. 00487 \param u The value to test. 00488 \return true If the value represents 7 bit ASCII character. 00489 */ 00490 inline bool character::is_ascii7(ucn u) 00491 { 00492 // implicitly is_internal(u) check 00493 return u < 0x80; 00494 } 00495 00496 /*! 00497 Tests if value is ASCII letter. 00498 \param u The value to test. 00499 \return true If the value represents ASCII letter. 00500 */ 00501 inline bool character::is_alpha(ucn u) 00502 { 00503 return is_ascii7(u) && (ascii_flags[u] & FLG_ALPHA == FLG_ALPHA); 00504 } 00505 00506 /*! 00507 Tests if value is ASCII uppercase letter. 00508 \param u The value to test. 00509 \return true If the value represents ASCII uppercase letter. 00510 */ 00511 inline bool character::is_upper(ucn u) 00512 { 00513 return is_ascii7(u) && (ascii_flags[u] & FLG_UPPER); 00514 } 00515 00516 /*! 00517 Tests if value is ASCII lowercase letter. 00518 \param u The value to test. 00519 \return true If the value represents ASCII lowercase letter. 00520 */ 00521 inline bool character::is_lower(ucn u) 00522 { 00523 return is_ascii7(u) && (ascii_flags[u] & FLG_LOWER); 00524 } 00525 00526 /*! 00527 Tests if value is ASCII digit. 00528 \param u The value to test. 00529 \return true If the value represents ASCII digit. 00530 */ 00531 inline bool character::is_digit(ucn u) 00532 { 00533 return is_ascii7(u) && (ascii_flags[u] & FLG_DIGIT); 00534 } 00535 00536 /*! 00537 Tests if value is ASCII octal digit. 00538 \param u The value to test. 00539 \return true If the value represents ASCII octal digit. 00540 */ 00541 inline bool character::is_odigit(ucn u) 00542 { 00543 return is_ascii7(u) && (ascii_flags[u] & FLG_ODIGIT); 00544 } 00545 00546 /*! 00547 Tests if value is ASCII hexadecimal digit. 00548 \param u The value to test. 00549 \return true If the value represents ASCII hexadecimal digit. 00550 */ 00551 inline bool character::is_xdigit(ucn u) 00552 { 00553 return is_ascii7(u) && (ascii_flags[u] & FLG_XDIGIT); 00554 } 00555 00556 /*! 00557 Tests if value is ASCII space character (space, tab, vtab, ff, cr, lf) 00558 \param u The value to test. 00559 \return true If the value represents ASCII space character. 00560 */ 00561 inline bool character::is_space(ucn u) 00562 { 00563 return is_ascii7(u) && (ascii_flags[u] & FLG_SPACE); 00564 } 00565 00566 /*! 00567 Returns uppercase of ASCII lowercase letter, other values intact. 00568 \param u The value to transform. 00569 \return Uppercase of the given lowercase letter or the original ucn. 00570 */ 00571 inline ucn character::to_upper(ucn u) 00572 { 00573 if (is_lower(u)) return u & 0xDF; 00574 return u; 00575 } 00576 00577 /*! 00578 Returns lowercase of ASCII uppercase letter, other values intact. 00579 \param u The value to transform. 00580 \return Uppercase of the given uppercase letter or the original ucn. 00581 */ 00582 inline ucn character::to_lower(ucn u) 00583 { 00584 if (is_upper(u)) return u | 0x20; 00585 return u; 00586 } 00587 00588 /*! 00589 Returns host character for subset of ASCII for which the encoding is known. 00590 It contains basic ASCII characters, '$', '@' and '`'. 00591 Other values are not supported. 00592 \pre The value is internal and contains only character with known encoding. 00593 \param u The value to transform. 00594 \return The given character in host encoding. 00595 */ 00596 inline hchar character::to_host(ucn u) 00597 { 00598 lassert(is_ascii7(u)); 00599 hchar c = internal_to_host[extract_value(u)]; 00600 lassert(c != hchar_unknown); 00601 return c; 00602 } 00603 00604 /*! 00605 Returns digit value of the ucn. 00606 \pre is_digit(u) 00607 \param u The digit to extract. 00608 \return The value of the digit. 00609 */ 00610 inline ulint character::extract_digit(ucn u) 00611 { 00612 lassert(is_digit(u)); 00613 return static_cast<ulint>(u) - ascii_digit_0; 00614 } 00615 00616 /*! 00617 Returns hexadecimal digit value of the ucn. 00618 \pre is_xdigit(u) 00619 \param u The digit to extract. 00620 \return The value of the hexadecimal digit. 00621 */ 00622 inline ulint character::extract_xdigit(ucn u) 00623 { 00624 if (is_digit(u)) return extract_digit(u); 00625 lassert(is_xdigit(u)); 00626 return static_cast<ulint>(to_upper(u)) - ascii_upper_a + 10; 00627 } 00628 00629 /*! 00630 Returns octal digit value of the ucn. 00631 \pre is_odigit(u) 00632 \param u The digit to extract. 00633 \return The value of the octal digit. 00634 */ 00635 inline ulint character::extract_odigit(ucn u) 00636 { 00637 lassert(is_odigit(u)); 00638 return extract_digit(u); 00639 } 00640 00641 /*! 00642 Returns character value of the ucn for both unicode and external characters. 00643 \param u The character to extract. 00644 \return The value of the stored character. 00645 */ 00646 inline ulint character::extract_value(ucn u) 00647 { 00648 return u & value_mask; 00649 } 00650 00651 /*! 00652 Initializer of static fields of character class. 00653 */ 00654 static character character_initializer; 00655 00656 end_package(std); 00657 end_package(lestes); 00658 00659 #endif 00660 /* vim: set ft=lestes : */
1.5.1-20070107