preprocessor.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief C++ preprocessor. 
00030   
00031   Definition of preprocessor class representing the entire C++ preprocessor.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/lang/cplus/lex/preprocessor.hh>
00036 #include <lestes/lang/cplus/lex/preprocessor.m.hh>
00037 #include <lestes/lang/cplus/lex/cpp_token.hh>
00038 #include <lestes/lang/cplus/lex/evaluator.hh>
00039 #include <lestes/lang/cplus/lex/string_joiner.hh>
00040 #include <lestes/lang/cplus/lex/space_remover.hh>
00041 #include <lestes/lang/cplus/lex/lex_literal.g.hh>
00042 #include <lestes/lang/cplus/lex/file_system.hh>
00043 #include <lestes/lang/cplus/lex/string_translator.hh>
00044 #include <lestes/lang/cplus/lex/token_value.hh>
00045 #include <lestes/lang/cplus/lex/lex_loggers.hh>
00046 #include <lestes/msg/logger.hh>
00047 #include <lestes/msg/logger_util.hh>
00048 #include <lestes/std/source_location.hh>
00049 
00050 package(lestes);
00051 package(lang);
00052 package(cplus);
00053 package(lex);
00054 
00055 /*!
00056   Creates new preprocessor.
00057   \pre a_fs != NULL
00058   \param a_fs  The file system binding.
00059   \param file_name  The full name of the file to process.
00060 */
00061 preprocessor::preprocessor(const ptr<file_system> &a_fs, const lstring &file_name):
00062         order(1),
00063         fs(checked(a_fs)),
00064         eva(evaluator::create(a_fs,file_name)),
00065         spr(space_remover::create(eva)),
00066         str(string_translator::create(spr)),
00067         stj(string_joiner::create(str))
00068 {
00069 }
00070 
00071 /*!
00072   Returns pragma flag.
00073   \return true  If #pragma lestes was encountered.
00074 */
00075 bool preprocessor::pragma_flag_get(void) const
00076 {
00077         return eva->pragma_flag_get();
00078 }
00079 
00080 /*!
00081   Reads next token from the translation unit.
00082   Converts pp_token to cpp_token, reports error if not possible.
00083   Extracts additional information for literals.
00084   \return  The next token in translation unit.
00085 */
00086 ptr<cpp_token> preprocessor::read(void)
00087 {
00088         preprocessor_logger << "preprocessor::read()\n" << msg::eolog;
00089 
00090         ptr<pp_token> t;
00091         ptr<source_location> loc;
00092         ptr<token_value> val;
00093         bool wide;
00094         ptr<lex_literal> lit;
00095         ptr<cpp_token> ct;
00096 
00097         while (!ct) {
00098                 t = stj->read();
00099                 loc = t->location_get();
00100                 // add proper order
00101                 loc = loc->clone_order(order++);
00102                 val = t->value_get();
00103                 wide = false;
00104 
00105                 switch (t->type_get()) {
00106                         case pp_token::TOK_FILE_END:
00107                                 ct = cpp_token::create(loc,cpp_token::TOK_EOF);
00108                                 break;
00109                         case pp_token::TOK_WSTRING_LIT:
00110                                 wide = true;
00111                                 // fall through
00112                         case pp_token::TOK_STRING_LIT:
00113                                 ct = cpp_token::create_literal(loc,lex_string_literal::create(wide),val);
00114                                 break;
00115                         case pp_token::TOK_WCHAR_LIT:
00116                                 wide = true;
00117                                 // fall through
00118                         case pp_token::TOK_CHAR_LIT:
00119                                 ct = cpp_token::create_literal(loc,lex_character_literal::create(wide,val->content_get().length() > 1),val);
00120                                 break;
00121                         case pp_token::TOK_NUMBER_LIT:
00122                                 preprocessor_logger << "number literal\n" << msg::eolog;
00123                                 lit = classify_number(loc,val);
00124                                 
00125                                 if (!lit) {
00126                                         preprocessor_logger << "the literal is illformed\n" << msg::eolog;
00127                                         // create fake zero
00128                                         lit = lex_integral_literal::create(lex_integral_literal::OCTAL,lex_integral_literal::NONE,0,1);
00129                                         val = token_value::create("0");
00130                                 }
00131 
00132                                 ct = cpp_token::create_literal(loc,lit,val);
00133                                 // error already reported
00134                                 break;
00135                         case pp_token::TOK_BOOL_LIT:
00136                                 ct = cpp_token::create_literal(loc,lex_boolean_literal::create(),val);
00137                                 break;
00138                                 
00139                         case pp_token::TOK_LEFT_BRACKET:
00140                                 ct = cpp_token::create(loc,cpp_token::TOK_LEFT_BRACKET);
00141                                 break;
00142                         case pp_token::TOK_RIGHT_BRACKET:
00143                                 ct = cpp_token::create(loc,cpp_token::TOK_RIGHT_BRACKET);
00144                                 break;
00145                         case pp_token::TOK_LEFT_BRACE:
00146                                 ct = cpp_token::create(loc,cpp_token::TOK_LEFT_BRACE);
00147                                 break;
00148                         case pp_token::TOK_RIGHT_BRACE:
00149                                 ct = cpp_token::create(loc,cpp_token::TOK_RIGHT_BRACE);
00150                                 break;
00151                         case pp_token::TOK_LEFT_PAR:
00152                                 ct = cpp_token::create(loc,cpp_token::TOK_LEFT_PAR);
00153                                 break;
00154                         case pp_token::TOK_RIGHT_PAR:
00155                                 ct = cpp_token::create(loc,cpp_token::TOK_RIGHT_PAR);
00156                                 break;
00157                         case pp_token::TOK_SEMICOLON:
00158                                 ct = cpp_token::create(loc,cpp_token::TOK_SEMICOLON);
00159                                 break;
00160                         case pp_token::TOK_COLON:
00161                                 ct = cpp_token::create(loc,cpp_token::TOK_COLON);
00162                                 break;
00163                         case pp_token::TOK_QMARK:
00164                                 ct = cpp_token::create(loc,cpp_token::TOK_QMARK);
00165                                 break;
00166                         case pp_token::TOK_DOT:
00167                                 ct = cpp_token::create(loc,cpp_token::TOK_DOT);
00168                                 break;
00169                         case pp_token::TOK_PLUS:
00170                                 ct = cpp_token::create(loc,cpp_token::TOK_PLUS);
00171                                 break;
00172                         case pp_token::TOK_STAR:
00173                                 ct = cpp_token::create(loc,cpp_token::TOK_STAR);
00174                                 break;
00175                         case pp_token::TOK_PERCENT:
00176                                 ct = cpp_token::create(loc,cpp_token::TOK_PERCENT);
00177                                 break;
00178                         case pp_token::TOK_SLASH:
00179                                 ct = cpp_token::create(loc,cpp_token::TOK_SLASH);
00180                                 break;
00181                         case pp_token::TOK_HAT:
00182                                 ct = cpp_token::create(loc,cpp_token::TOK_HAT);
00183                                 break;
00184                         case pp_token::TOK_AMP:
00185                                 ct = cpp_token::create(loc,cpp_token::TOK_AMP);
00186                                 break;
00187                         case pp_token::TOK_VBAR:
00188                                 ct = cpp_token::create(loc,cpp_token::TOK_VBAR);
00189                                 break;
00190                         case pp_token::TOK_TILDE:
00191                                 ct = cpp_token::create(loc,cpp_token::TOK_TILDE);
00192                                 break;
00193                         case pp_token::TOK_EMARK:
00194                                 ct = cpp_token::create(loc,cpp_token::TOK_EXCLAMATION);
00195                                 break;
00196                         case pp_token::TOK_EQ:
00197                                 ct = cpp_token::create(loc,cpp_token::TOK_EQ);
00198                                 break;
00199                         case pp_token::TOK_LT:
00200                                 ct = cpp_token::create(loc,cpp_token::TOK_LT);
00201                                 break;
00202                         case pp_token::TOK_GT:
00203                                 ct = cpp_token::create(loc,cpp_token::TOK_GT);
00204                                 break;
00205                         case pp_token::TOK_COMMA:
00206                                 ct = cpp_token::create(loc,cpp_token::TOK_COMMA);
00207                                 break;
00208                         case pp_token::TOK_MINUS:
00209                                 ct = cpp_token::create(loc,cpp_token::TOK_MINUS);
00210                                 break;
00211                         case pp_token::TOK_DOT_DOT_DOT:
00212                                 ct = cpp_token::create(loc,cpp_token::TOK_DOT_DOT_DOT);
00213                                 break;
00214                         case pp_token::TOK_COLON_COLON:
00215                                 ct = cpp_token::create(loc,cpp_token::TOK_COLON_COLON);
00216                                 break;
00217                         case pp_token::TOK_DOT_STAR:
00218                                 ct = cpp_token::create(loc,cpp_token::TOK_DOT_STAR);
00219                                 break;
00220                         case pp_token::TOK_PLUS_EQ:
00221                                 ct = cpp_token::create(loc,cpp_token::TOK_PLUS_EQ);
00222                                 break;
00223                         case pp_token::TOK_MINUS_EQ:
00224                                 ct = cpp_token::create(loc,cpp_token::TOK_MINUS_EQ);
00225                                 break;
00226                         case pp_token::TOK_STAR_EQ:
00227                                 ct = cpp_token::create(loc,cpp_token::TOK_STAR_EQ);
00228                                 break;
00229                         case pp_token::TOK_SLASH_EQ:
00230                                 ct = cpp_token::create(loc,cpp_token::TOK_SLASH_EQ);
00231                                 break;
00232                         case pp_token::TOK_PERCENT_EQ:
00233                                 ct = cpp_token::create(loc,cpp_token::TOK_PERCENT_EQ);
00234                                 break;
00235                         case pp_token::TOK_HAT_EQ:
00236                                 ct = cpp_token::create(loc,cpp_token::TOK_HAT_EQ);
00237                                 break;
00238                         case pp_token::TOK_AMP_EQ:
00239                                 ct = cpp_token::create(loc,cpp_token::TOK_AMP_EQ);
00240                                 break;
00241                         case pp_token::TOK_VBAR_EQ:
00242                                 ct = cpp_token::create(loc,cpp_token::TOK_VBAR_EQ);
00243                                 break;
00244                         case pp_token::TOK_LT_LT:
00245                                 ct = cpp_token::create(loc,cpp_token::TOK_LT_LT);
00246                                 break;
00247                         case pp_token::TOK_GT_GT:
00248                                 ct = cpp_token::create(loc,cpp_token::TOK_GT_GT);
00249                                 break;
00250                         case pp_token::TOK_LT_LT_EQ:
00251                                 ct = cpp_token::create(loc,cpp_token::TOK_LT_LT_EQ);
00252                                 break;
00253                         case pp_token::TOK_GT_GT_EQ:
00254                                 ct = cpp_token::create(loc,cpp_token::TOK_GT_GT_EQ);
00255                                 break;
00256                         case pp_token::TOK_EQ_EQ:
00257                                 ct = cpp_token::create(loc,cpp_token::TOK_EQ_EQ);
00258                                 break;
00259                         case pp_token::TOK_EMARK_EQ:
00260                                 ct = cpp_token::create(loc,cpp_token::TOK_EXCLAMATION_EQ);
00261                                 break;
00262                         case pp_token::TOK_LT_EQ:
00263                                 ct = cpp_token::create(loc,cpp_token::TOK_LT_EQ);
00264                                 break;
00265                         case pp_token::TOK_GT_EQ:
00266                                 ct = cpp_token::create(loc,cpp_token::TOK_GT_EQ);
00267                                 break;
00268                         case pp_token::TOK_AMP_AMP:
00269                                 ct = cpp_token::create(loc,cpp_token::TOK_AMP_AMP);
00270                                 break;
00271                         case pp_token::TOK_VBAR_VBAR:
00272                                 ct = cpp_token::create(loc,cpp_token::TOK_VBAR_VBAR);
00273                                 break;
00274                         case pp_token::TOK_PLUS_PLUS:
00275                                 ct = cpp_token::create(loc,cpp_token::TOK_PLUS_PLUS);
00276                                 break;
00277                         case pp_token::TOK_MINUS_MINUS:
00278                                 ct = cpp_token::create(loc,cpp_token::TOK_MINUS_MINUS);
00279                                 break;
00280                         case pp_token::TOK_MINUS_GT_STAR:
00281                                 ct = cpp_token::create(loc,cpp_token::TOK_MINUS_GT_STAR);
00282                                 break;
00283                         case pp_token::TOK_MINUS_GT:
00284                                 ct = cpp_token::create(loc,cpp_token::TOK_MINUS_GT);
00285                                 break;
00286                         case pp_token::TOK_ASM:
00287                                 ct = cpp_token::create(loc,cpp_token::TOK_ASM);
00288                                 break;
00289                         case pp_token::TOK_AUTO:
00290                                 ct = cpp_token::create(loc,cpp_token::TOK_AUTO);
00291                                 break;
00292                         case pp_token::TOK_BOOL:
00293                                 ct = cpp_token::create(loc,cpp_token::TOK_BOOL);
00294                                 break;
00295                         case pp_token::TOK_BREAK:
00296                                 ct = cpp_token::create(loc,cpp_token::TOK_BREAK);
00297                                 break;
00298                         case pp_token::TOK_CASE:
00299                                 ct = cpp_token::create(loc,cpp_token::TOK_CASE);
00300                                 break;
00301                         case pp_token::TOK_CATCH:
00302                                 ct = cpp_token::create(loc,cpp_token::TOK_CATCH);
00303                                 break;
00304                         case pp_token::TOK_CHAR:
00305                                 ct = cpp_token::create(loc,cpp_token::TOK_CHAR);
00306                                 break;
00307                         case pp_token::TOK_CLASS:
00308                                 ct = cpp_token::create(loc,cpp_token::TOK_CLASS);
00309                                 break;
00310                         case pp_token::TOK_CONST:
00311                                 ct = cpp_token::create(loc,cpp_token::TOK_CONST);
00312                                 break;
00313                         case pp_token::TOK_CONST_CAST:
00314                                 ct = cpp_token::create(loc,cpp_token::TOK_CONST_CAST);
00315                                 break;
00316                         case pp_token::TOK_CONTINUE:
00317                                 ct = cpp_token::create(loc,cpp_token::TOK_CONTINUE);
00318                                 break;
00319                         case pp_token::TOK_DEFAULT:
00320                                 ct = cpp_token::create(loc,cpp_token::TOK_DEFAULT);
00321                                 break;
00322                         case pp_token::TOK_DELETE:
00323                                 ct = cpp_token::create(loc,cpp_token::TOK_DELETE);
00324                                 break;
00325                         case pp_token::TOK_DO:
00326                                 ct = cpp_token::create(loc,cpp_token::TOK_DO);
00327                                 break;
00328                         case pp_token::TOK_DOUBLE:
00329                                 ct = cpp_token::create(loc,cpp_token::TOK_DOUBLE);
00330                                 break;
00331                         case pp_token::TOK_DYNAMIC_CAST:
00332                                 ct = cpp_token::create(loc,cpp_token::TOK_DYNAMIC_CAST);
00333                                 break;
00334                         case pp_token::TOK_ELSE:
00335                                 ct = cpp_token::create(loc,cpp_token::TOK_ELSE);
00336                                 break;
00337                         case pp_token::TOK_ENUM:
00338                                 ct = cpp_token::create(loc,cpp_token::TOK_ENUM);
00339                                 break;
00340                         case pp_token::TOK_EXPLICIT:
00341                                 ct = cpp_token::create(loc,cpp_token::TOK_EXPLICIT);
00342                                 break;
00343                         case pp_token::TOK_EXPORT:
00344                                 ct = cpp_token::create(loc,cpp_token::TOK_EXPORT);
00345                                 break;
00346                         case pp_token::TOK_EXTERN:
00347                                 ct = cpp_token::create(loc,cpp_token::TOK_EXTERN);
00348                                 break;
00349                         case pp_token::TOK_FLOAT:
00350                                 ct = cpp_token::create(loc,cpp_token::TOK_FLOAT);
00351                                 break;
00352                         case pp_token::TOK_FOR:
00353                                 ct = cpp_token::create(loc,cpp_token::TOK_FOR);
00354                                 break;
00355                         case pp_token::TOK_FRIEND:
00356                                 ct = cpp_token::create(loc,cpp_token::TOK_FRIEND);
00357                                 break;
00358                         case pp_token::TOK_GOTO:
00359                                 ct = cpp_token::create(loc,cpp_token::TOK_GOTO);
00360                                 break;
00361                         case pp_token::TOK_IF:
00362                                 ct = cpp_token::create(loc,cpp_token::TOK_IF);
00363                                 break;
00364                         case pp_token::TOK_INLINE:
00365                                 ct = cpp_token::create(loc,cpp_token::TOK_INLINE);
00366                                 break;
00367                         case pp_token::TOK_INT:
00368                                 ct = cpp_token::create(loc,cpp_token::TOK_INT);
00369                                 break;
00370                         case pp_token::TOK_LONG:
00371                                 ct = cpp_token::create(loc,cpp_token::TOK_LONG);
00372                                 break;
00373                         case pp_token::TOK_MUTABLE:
00374                                 ct = cpp_token::create(loc,cpp_token::TOK_MUTABLE);
00375                                 break;
00376                         case pp_token::TOK_NAMESPACE:
00377                                 ct = cpp_token::create(loc,cpp_token::TOK_NAMESPACE);
00378                                 break;
00379                         case pp_token::TOK_NEW:
00380                                 ct = cpp_token::create(loc,cpp_token::TOK_NEW);
00381                                 break;
00382                         case pp_token::TOK_OPERATOR:
00383                                 ct = cpp_token::create(loc,cpp_token::TOK_OPERATOR);
00384                                 break;
00385                         case pp_token::TOK_PRIVATE:
00386                                 ct = cpp_token::create(loc,cpp_token::TOK_PRIVATE);
00387                                 break;
00388                         case pp_token::TOK_PROTECTED:
00389                                 ct = cpp_token::create(loc,cpp_token::TOK_PROTECTED);
00390                                 break;
00391                         case pp_token::TOK_PUBLIC:
00392                                 ct = cpp_token::create(loc,cpp_token::TOK_PUBLIC);
00393                                 break;
00394                         case pp_token::TOK_REGISTER:
00395                                 ct = cpp_token::create(loc,cpp_token::TOK_REGISTER);
00396                                 break;
00397                         case pp_token::TOK_REINTERPRET_CAST:
00398                                 ct = cpp_token::create(loc,cpp_token::TOK_REINTERPRET_CAST);
00399                                 break;
00400                         case pp_token::TOK_RETURN:
00401                                 ct = cpp_token::create(loc,cpp_token::TOK_RETURN);
00402                                 break;
00403                         case pp_token::TOK_SHORT:
00404                                 ct = cpp_token::create(loc,cpp_token::TOK_SHORT);
00405                                 break;
00406                         case pp_token::TOK_SIGNED:
00407                                 ct = cpp_token::create(loc,cpp_token::TOK_SIGNED);
00408                                 break;
00409                         case pp_token::TOK_SIZEOF:
00410                                 ct = cpp_token::create(loc,cpp_token::TOK_SIZEOF);
00411                                 break;
00412                         case pp_token::TOK_STATIC:
00413                                 ct = cpp_token::create(loc,cpp_token::TOK_STATIC);
00414                                 break;
00415                         case pp_token::TOK_STATIC_CAST:
00416                                 ct = cpp_token::create(loc,cpp_token::TOK_STATIC_CAST);
00417                                 break;
00418                         case pp_token::TOK_STRUCT:
00419                                 ct = cpp_token::create(loc,cpp_token::TOK_STRUCT);
00420                                 break;
00421                         case pp_token::TOK_SWITCH:
00422                                 ct = cpp_token::create(loc,cpp_token::TOK_SWITCH);
00423                                 break;
00424                         case pp_token::TOK_TEMPLATE:
00425                                 ct = cpp_token::create(loc,cpp_token::TOK_TEMPLATE);
00426                                 break;
00427                         case pp_token::TOK_THIS:
00428                                 ct = cpp_token::create(loc,cpp_token::TOK_THIS);
00429                                 break;
00430                         case pp_token::TOK_THROW:
00431                                 ct = cpp_token::create(loc,cpp_token::TOK_THROW);
00432                                 break;
00433                         case pp_token::TOK_TRY:
00434                                 ct = cpp_token::create(loc,cpp_token::TOK_TRY);
00435                                 break;
00436                         case pp_token::TOK_TYPEDEF:
00437                                 ct = cpp_token::create(loc,cpp_token::TOK_TYPEDEF);
00438                                 break;
00439                         case pp_token::TOK_TYPEID:
00440                                 ct = cpp_token::create(loc,cpp_token::TOK_TYPEID);
00441                                 break;
00442                         case pp_token::TOK_TYPENAME:
00443                                 ct = cpp_token::create(loc,cpp_token::TOK_TYPENAME);
00444                                 break;
00445                         case pp_token::TOK_UNION:
00446                                 ct = cpp_token::create(loc,cpp_token::TOK_UNION);
00447                                 break;
00448                         case pp_token::TOK_UNSIGNED:
00449                                 ct = cpp_token::create(loc,cpp_token::TOK_UNSIGNED);
00450                                 break;
00451                         case pp_token::TOK_USING:
00452                                 ct = cpp_token::create(loc,cpp_token::TOK_USING);
00453                                 break;
00454                         case pp_token::TOK_VIRTUAL:
00455                                 ct = cpp_token::create(loc,cpp_token::TOK_VIRTUAL);
00456                                 break;
00457                         case pp_token::TOK_VOID:
00458                                 ct = cpp_token::create(loc,cpp_token::TOK_VOID);
00459                                 break;
00460                         case pp_token::TOK_VOLATILE:
00461                                 ct = cpp_token::create(loc,cpp_token::TOK_VOLATILE);
00462                                 break;
00463                         case pp_token::TOK_WCHAR:
00464                                 ct = cpp_token::create(loc,cpp_token::TOK_WCHAR_T);
00465                                 break;
00466                         case pp_token::TOK_WHILE:
00467                                 ct = cpp_token::create(loc,cpp_token::TOK_WHILE);
00468                                 break;
00469                         case pp_token::TOK_IDENT:
00470                                 ct = cpp_token::create(loc,cpp_token::TOK_IDENT,val);
00471                                 break;
00472                         case pp_token::TOK_HASH:
00473                         case pp_token::TOK_HASH_HASH:
00474                                 // invalid token
00475                                 report << invalid_cpp_token << t->spelling_get() << loc;
00476                                 break;
00477                         case pp_token::TOK_OTHER:
00478                                 // stray character
00479                                 report << stray_character << t->spelling_get() << loc;
00480                                 break;
00481                         default:
00482                                 lassert2(false,"Unexpected token in conversion");
00483                                 break;
00484                 }
00485         }
00486 
00487         preprocessor_logger << "returning " << static_cast<ulint>(ct->type_get()) << "\n" << msg::eolog;
00488         preprocessor_logger << "preprocessor::read() end\n" << msg::eolog;
00489         return ct;
00490 }
00491 
00492 /*!
00493   Attempts to parse preprocessing number. Returns literal properties.
00494   \pre loc != NULL
00495   \pre num != NULL
00496   \pre  The value matches preprocessing number.
00497   \param loc  The location of the number.
00498   \param num  The number to classify.
00499   \return The appropriate literal properties or NULL. 
00500 */
00501 ptr<lex_literal> preprocessor::classify_number(const ptr<source_location> &loc, const ptr<token_value> &num)
00502 {
00503         preprocessor_logger << "preprocessor::classify_number()\n" << msg::eolog;
00504 
00505         lassert(loc);
00506         lassert(num);
00507 
00508         // the states of the parser
00509         enum {
00510                 BEGIN,
00511                 ZERO,
00512                 OCTAL,
00513                 DECIMAL,
00514                 HEXADECIMAL_START,
00515                 HEXADECIMAL,
00516                 FLOAT_INTEGRAL,
00517                 FLOAT_FRACTION,
00518                 FLOAT_EXPONENT_SIGN,
00519                 FLOAT_EXPONENT_START,
00520                 FLOAT_EXPONENT,
00521                 FLOAT_SUFFIX,
00522                 FLOAT_END,
00523                 INTEGRAL_SUFFIX,
00524         } fstate = BEGIN;
00525 
00526         ucn_string str(num->content_get());
00527         ucn u;
00528         lex_integral_literal::base_type base = lex_integral_literal::OCTAL;
00529         bool long_suffix = false;
00530         bool unsigned_suffix = false;
00531         bool float_suffix = false;
00532         ptr<lex_literal> literal;
00533 
00534         for (ucn_string::iterator sit = str.begin(), send = str.end();
00535                         sit != send; ) {
00536                 u = *sit;
00537 
00538                 preprocessor_logger << "fstate = " << fstate["bzodhHI.+eEfFu"] << ' ' << (ulint)u << '\n' << msg::eolog;
00539 
00540                 if (!character::is_basic(u) || u == character::ascii_underscore) {
00541                         // bad character in number
00542                         report << invalid_character_in_number << loc;
00543                         goto err;
00544                 }
00545                 switch (fstate) {
00546                         case BEGIN:
00547                                 ++sit;
00548                                 switch (u) {
00549                                         case character::ascii_digit_0:
00550                                                 fstate = ZERO;
00551                                                 break;
00552                                         case character::ascii_digit_1:
00553                                         case character::ascii_digit_2:
00554                                         case character::ascii_digit_3:
00555                                         case character::ascii_digit_4:
00556                                         case character::ascii_digit_5:
00557                                         case character::ascii_digit_6:
00558                                         case character::ascii_digit_7:
00559                                         case character::ascii_digit_8:
00560                                         case character::ascii_digit_9:
00561                                                 fstate = DECIMAL;
00562                                                 break;
00563                                         case character::ascii_dot:
00564                                                 fstate = FLOAT_FRACTION;
00565                                                 break;
00566                                         default:
00567                                                 lassert2(false,"You should never get here");
00568                                 }
00569                                 break;
00570                         case ZERO:
00571                                 if (u == character::ascii_lower_x ||
00572                                          u == character::ascii_upper_x) {
00573                                         fstate = HEXADECIMAL_START;
00574                                         ++sit;
00575                                 } else if (character::is_odigit(u)) {
00576                                         fstate = OCTAL;
00577                                 } else 
00578                                         fstate = INTEGRAL_SUFFIX;
00579                                 break;
00580                         case OCTAL:
00581                                 if (character::is_odigit(u))
00582                                         ++sit;
00583                                 else if (character::is_digit(u))
00584                                         fstate = FLOAT_INTEGRAL;
00585                                 else if (u == character::ascii_dot) {
00586                                         fstate = FLOAT_FRACTION;
00587                                         ++sit;
00588                                 } else
00589                                         fstate = INTEGRAL_SUFFIX;
00590                                 break;
00591                         case DECIMAL:
00592                                 if (character::is_digit(u))
00593                                         ++sit;
00594                                 else if (u == character::ascii_dot) {
00595                                         ++sit;
00596                                         fstate = FLOAT_FRACTION;
00597                                 } else if (u == character::ascii_lower_e ||
00598                                                 u == character::ascii_upper_e) {
00599                                         ++sit;
00600                                         fstate = FLOAT_EXPONENT_SIGN;
00601                                 } else
00602                                         fstate = INTEGRAL_SUFFIX;
00603                                 break;
00604                         case HEXADECIMAL_START:
00605                                 if (character::is_xdigit(u)) {
00606                                         ++sit;
00607                                         fstate = HEXADECIMAL;
00608                                         base = lex_integral_literal::HEXADECIMAL;
00609                                 } else
00610                                         fstate = INTEGRAL_SUFFIX;
00611                                 break;
00612                         case HEXADECIMAL:
00613                                 if (character::is_xdigit(u))
00614                                         ++sit;
00615                                 else
00616                                         fstate = INTEGRAL_SUFFIX;
00617                                 break;
00618                         case FLOAT_INTEGRAL:
00619                                 if (character::is_digit(u))
00620                                         ++sit;
00621                                 else if (u == character::ascii_dot) {
00622                                         ++sit;
00623                                         fstate = FLOAT_FRACTION;
00624                                 } else if (u == character::ascii_lower_e ||
00625                                                 u == character::ascii_upper_e) {
00626                                         ++sit;
00627                                         fstate = FLOAT_EXPONENT_SIGN;
00628                                 } else
00629                                         fstate = FLOAT_SUFFIX;
00630                                 break;
00631                         case FLOAT_FRACTION:
00632                                 if (character::is_digit(u))
00633                                         ++sit;
00634                                 else if (u == character::ascii_lower_e ||
00635                                                 u == character::ascii_upper_e) {
00636                                         ++sit;
00637                                         fstate = FLOAT_EXPONENT_SIGN;
00638                                 } else
00639                                         fstate = FLOAT_SUFFIX;
00640                                 break;
00641                         case FLOAT_EXPONENT_SIGN:
00642                                 if (character::is_digit(u)) {
00643                                         ++sit;
00644                                         fstate = FLOAT_EXPONENT;
00645                                 } else if (u == character::ascii_plus) {
00646                                         ++sit;
00647                                         fstate = FLOAT_EXPONENT_START;
00648                                 } else if (u == character::ascii_minus) {
00649                                         ++sit;
00650                                         fstate = FLOAT_EXPONENT_START;
00651                                 } else {
00652                                         // no digits in exponent
00653                                         report << floating_exponent_empty << loc;
00654                                         goto err;
00655                                 }
00656                                 break;
00657                         case FLOAT_EXPONENT_START:
00658                                 if (character::is_digit(u)) {
00659                                         ++sit;
00660                                         fstate = FLOAT_EXPONENT;
00661                                 } else {
00662                                         // no digits in exponent
00663                                         report << floating_exponent_empty << loc;
00664                                         goto err;
00665                                 }
00666                                 break;
00667                         case FLOAT_EXPONENT:
00668                                 if (character::is_digit(u))
00669                                         ++sit;
00670                                 else {
00671                                         fstate = FLOAT_SUFFIX;
00672                                 }
00673                                 break;
00674                         case FLOAT_SUFFIX:
00675                                 ++sit;
00676                                 if (u == character::ascii_lower_f ||
00677                                          u == character::ascii_upper_f) {
00678                                         float_suffix = true;
00679                                         fstate = FLOAT_END;
00680                                         break;
00681                                 } else if (u == character::ascii_lower_l ||
00682                                                 u == character::ascii_upper_l) {
00683                                         long_suffix = true;
00684                                         fstate = FLOAT_END;
00685                                         break;
00686                                 } 
00687                                 // fall through
00688                         case FLOAT_END:
00689                                 // bad float suffix
00690                                 report << floating_suffix_invalid << loc;
00691                                 goto err;
00692                         case INTEGRAL_SUFFIX:
00693                                 ++sit;
00694                                 if (u == character::ascii_lower_u ||
00695                                          u == character::ascii_upper_u) {
00696                                         if (unsigned_suffix) {
00697                                                 // bad integral suffix
00698                                                 report << integral_suffix_invalid << loc;
00699                                                 goto err;
00700                                         }
00701                                         unsigned_suffix = true;
00702                                 } else if (u == character::ascii_lower_l ||
00703                                                 u == character::ascii_upper_l) {
00704                                         if (long_suffix) {
00705                                                 // bad integral suffix
00706                                                 report << integral_suffix_invalid << loc;
00707                                                 goto err;
00708                                         }
00709                                         long_suffix = true;
00710                                 } else {
00711                                         // bad integral suffix
00712                                         report << integral_suffix_invalid << loc;
00713                                         goto err;
00714                                 }
00715                                 break;
00716                         default:
00717                                 lassert2(false,"You should never get here");
00718                                 break;
00719                 }
00720         }
00721                 
00722         switch (fstate) {
00723                 case ZERO:
00724                 case OCTAL:
00725                 case DECIMAL:
00726                 case HEXADECIMAL:
00727                 case INTEGRAL_SUFFIX:
00728                 {
00729                         ucn_string::size_type start = (base == lex_integral_literal::HEXADECIMAL) ? 2 : 0;
00730                         ucn_string::size_type end = str.length();
00731                         lex_integral_literal::suffix_type suffix = lex_integral_literal::NONE;
00732 
00733                         if (unsigned_suffix) {
00734                                 if (long_suffix) {
00735                                         suffix = lex_integral_literal::UNSIGNED_LONG;
00736                                         end -= 2;
00737                                 } else {
00738                                         suffix = lex_integral_literal::UNSIGNED;
00739                                         end--;
00740                                 }
00741                         } else if (long_suffix) {
00742                                 suffix = lex_integral_literal::LONG;
00743                                 end--;
00744                         }
00745 
00746                         preprocessor_logger << "integral su = " << (ulint)suffix <<  
00747                                 " s = " << start << " e = " << end << "\n" << msg::eolog;
00748 
00749                         literal = lex_integral_literal::create(base,suffix,start,end);
00750                         break;
00751                 }
00752                 case FLOAT_FRACTION:
00753                 case FLOAT_EXPONENT:
00754                 case FLOAT_SUFFIX:
00755                 case FLOAT_END:
00756                 {
00757                         lex_floating_literal::suffix_type suffix = lex_floating_literal::NONE;
00758                         if (long_suffix)
00759                                 suffix = lex_floating_literal::LONG;
00760                         else if (float_suffix)
00761                                 suffix = lex_floating_literal::FLOAT;
00762 
00763                         ucn_string::iterator sit = str.begin(), send = str.end();
00764 
00765                         ucn_string::size_type is = 0, ie = is;
00766                         
00767                         while (sit != send && character::is_digit(*sit)) {
00768                                 ++ie;
00769                                 ++sit;
00770                         }
00771 
00772                         ucn_string::size_type fs = ie, fe = fs;
00773                         if (sit != send && *sit == character::ascii_dot) {
00774                                 ++sit;
00775                                 ++fs;
00776                                 ++fe;
00777 
00778                                 while (sit != send && character::is_digit(*sit)) {
00779                                         ++fe;
00780                                         ++sit;
00781                                 }
00782                         }
00783                         
00784                         bool neg = false;
00785                         ucn_string::size_type es = fe, ee = es;
00786                         if (sit != send && (*sit == character::ascii_lower_e || *sit == character::ascii_upper_e)) {
00787                                 ++sit;
00788                                 ++es;
00789                                 ++ee;
00790 
00791                                 if (sit != send && !character::is_digit(*sit)) {
00792                                         neg = *sit == character::ascii_minus;
00793                                         ++sit;
00794                                         ++es;
00795                                         ++ee;
00796                                 }
00797 
00798                                 while (sit != send && character::is_digit(*sit)) {
00799                                         ++ee;
00800                                         ++sit;
00801                                 }
00802                         }
00803 
00804                         preprocessor_logger << "floating su = " << (ulint)suffix << " n = " << neg << 
00805                                 " is = " << is << " ie = " << ie << " fs = " << fs << " fe = " << fe <<
00806                                 " es = " << es << " ee = " << ee << "\n" << msg::eolog;
00807 
00808                         literal = lex_floating_literal::create(suffix,neg,is,ie,fs,fe,es,ee);
00809                 }
00810                 break;
00811                 case FLOAT_EXPONENT_SIGN:
00812                 case FLOAT_EXPONENT_START:
00813                         // float exponent with no digits
00814                         report << floating_exponent_empty << loc;
00815                         break;
00816                 case HEXADECIMAL_START:
00817                         // hexa with no digits
00818                         report << integral_empty << loc;
00819                         break;
00820                 default:
00821                         lassert2(false,"You should never get here");
00822         }
00823   
00824 err:
00825         preprocessor_logger << "preprocessor::classify_number() end\n" << msg::eolog;
00826 
00827         return literal;
00828 }
00829 
00830 /*!
00831   Marks the object.
00832 */
00833 void preprocessor::gc_mark(void)
00834 {
00835         fs.gc_mark();
00836         eva.gc_mark();
00837         spr.gc_mark();
00838         str.gc_mark();
00839         stj.gc_mark();
00840 	::lestes::std::object::gc_mark();
00841 }
00842 
00843 /*!
00844   Returns new preprocessor for given file.
00845   \param a_fs  The file system binding.
00846   \param file_name  The full name of the file to process.
00847 */
00848 ptr<preprocessor> preprocessor::create(const ptr<file_system> &a_fs, const lstring &file_name)
00849 {
00850         return new preprocessor(a_fs,file_name);
00851 }
00852 
00853 end_package(lex);
00854 end_package(cplus);
00855 end_package(lang);
00856 end_package(lestes);
00857 
00858 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:23:11 2007 for lestes by doxygen 1.5.1-20070107