evaluator.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Directives evaluator.
00030 
00031   Definition of evaluator class representing evaluator of preprocessing directives.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/lang/cplus/lex/evaluator.hh>
00036 #include <lestes/lang/cplus/lex/evaluator.m.hh>
00037 #include <lestes/lang/cplus/lex/pp_token.hh>
00038 #include <lestes/lang/cplus/lex/expander.hh>
00039 #include <lestes/lang/cplus/lex/token_sequence.hh>
00040 #include <lestes/lang/cplus/lex/unit_part.hh>
00041 #include <lestes/lang/cplus/lex/condition_stack.hh>
00042 #include <lestes/lang/cplus/lex/macro_storage.hh>
00043 #include <lestes/lang/cplus/lex/macro.hh>
00044 #include <lestes/lang/cplus/lex/token_value.hh>
00045 #include <lestes/lang/cplus/lex/file_system.hh>
00046 #include <lestes/lang/cplus/lex/named_istream.hh>
00047 #include <lestes/lang/cplus/lex/evaluator.m.hh>
00048 #include <lestes/lang/cplus/lex/stream_source.hh>
00049 #include <lestes/lang/cplus/lex/encoder_ascii7.hh>
00050 #include <lestes/lang/cplus/lex/line_control.hh>
00051 #include <lestes/std/istream_wrapper.hh>
00052 #include <lestes/std/source_location.hh>
00053 #include <lestes/std/file_info.hh>
00054 
00055 package(lestes);
00056 package(lang);
00057 package(cplus);
00058 package(lex);
00059 
00060 using namespace ::std;
00061 
00062 /*!
00063   Creates new evaluator, try to start processing given file.
00064   The state is set to EMPTY when the file could not be opened, or START.
00065   \pre a_fs != NULL
00066   \param a_fs  The file system binding.
00067   \param file_name  The name of the file to process.
00068 */
00069 evaluator::evaluator(const ptr<file_system> &a_fs, const lstring &file_name):
00070         fs(checked(a_fs)),
00071         state(EMPTY),
00072         active(false),
00073         macros(macro_storage::create()),
00074         includes(includes_type::create()),
00075         input(NULL),
00076         part(NULL),
00077         buffer(NULL),
00078         pragma_flag(false),
00079         nesting(0)
00080 {
00081         ptr<named_istream> ns = fs->open_file(file_name);
00082         if (ns) {
00083                 push_stream(ns,NULL);
00084         } else {
00085                 report << unable_to_open_file << file_name << 
00086                         source_location::create(file_info::create("<command line>",NULL),1,1);
00087         }
00088 }
00089 
00090 /*!
00091   Returns the pragma flag.
00092   \return The pragma flag.
00093 */
00094 bool evaluator::pragma_flag_get(void) const
00095 {
00096         return pragma_flag;
00097 }
00098 
00099 /*!
00100   Pushes a new stream to be processed as a part of a translation unit.
00101   \pre ns != NULL
00102   \param ns  The named stream to process.
00103   \param loc  The location of inclusion or NULL for toplevel stream.
00104 */
00105 void evaluator::push_stream(const ptr<named_istream> &ns, const ptr<source_location> &loc)
00106 {
00107         lassert(ns);
00108 
00109         ptr<file_info> fi = file_info::create(ns->print_name_get(),loc);
00110         ptr<stream_source> ss = stream_source::create(ns->stream_get());
00111         // TODO pt config
00112         ptr<encoder> enc = encoder_ascii7::create();
00113 
00114         part = unit_part::create(fi,ss,enc,macros);
00115         state = START;
00116         active = true;
00117         input = part->expander_get();
00118         includes->push_back(part);
00119         ++nesting;
00120 }
00121 
00122 /*!
00123   Pops a processed stream, returning to the following in the stack.
00124   \pre state != EMPTY
00125 */
00126 void evaluator::pop_stream(void)
00127 {
00128         lassert(state != EMPTY);
00129 
00130         includes->pop_back();
00131         --nesting;
00132 
00133         if (includes->size() != 0) {
00134                 part = includes->back();
00135                 input = part->expander_get();
00136                 state = START;
00137         } else {
00138                 input = NULL;
00139                 part = NULL;
00140                 buffer = NULL;
00141                 state = EMPTY;
00142                 macros = NULL;
00143         }
00144 }
00145 
00146 /*!
00147   Checks for extra tokens in buffer before the end of line.
00148   Used for issuing error for trailing garbage in directives.
00149   \pre  The newline is still in buffer.
00150   \param name  The name of the checked directive.
00151 */
00152 void evaluator::check_extra_tokens(const lstring &name)
00153 {
00154         ptr<pp_token> tok = buffer->read_front_skip_ws();
00155         lassert(tok->type_get() != pp_token::TOK_TERMINATOR);
00156         if (tok->type_get() != pp_token::TOK_LINE_END) {
00157                 // extra tokens at the end of directive
00158                 report << trailing_tokens_in_directive << name << tok->location_get();
00159         }
00160 }
00161 
00162 /*!
00163   Parses buffer as file name part of an include directive.
00164   Returns the appropriate token, or NULL in case of failure.
00165   \return  The token representing the include directive or NULL.
00166 */
00167 ptr<pp_token> evaluator::parse_include(void)
00168 {
00169         buffer = buffer->expand_all(macros);
00170 
00171         ucn_string name;
00172         
00173         ptr<pp_token> t = buffer->read_front_skip_ws();
00174         pp_token_type ptt = t->type_get();
00175         // save the start location
00176         ptr<source_location> loc = t->location_get();
00177         if (ptt == pp_token::TOK_LT) {
00178                 // get the first part
00179                 t = buffer->read_front();
00180                 ptt = t->type_get();
00181                 while (ptt != pp_token::TOK_GT && ptt != pp_token::TOK_LINE_END) {
00182                         // take the name as literal concatenation
00183                         name += t->spelling_get();
00184                         t = buffer->read_front();
00185                         ptt = t->type_get();
00186                 }
00187                 // unterminated <> sequence
00188                 if (ptt != pp_token::TOK_GT) return NULL;
00189                 // can check here, because the newline was not read yet
00190                 check_extra_tokens("#include");
00191                 return pp_token::create(loc,pp_token::TOK_INCL_HCHAR,token_value::create(name));
00192                 // this is the only possibility to put double quote into the third type include
00193         } else if (ptt == pp_token::TOK_STRING_LIT) {
00194                 check_extra_tokens("#include");
00195                 return pp_token::create(loc,pp_token::TOK_INCL_QCHAR,t->value_get());
00196         } 
00197         
00198         // else it has to be invalid
00199         return NULL;
00200 }
00201 
00202 /*!
00203   Processes preprocessor directive in buffer.
00204   \return  The next token for read().
00205 */
00206 ptr<pp_token> evaluator::process_directive(void)
00207 {
00208         // throw away hash
00209         buffer->read_front();
00210 
00211         ptr<pp_token> newline = buffer->peek_back();
00212         // get the directive
00213         ptr<pp_token> tok = buffer->read_front_skip_ws();
00214         ptr<source_location> loc = tok->location_get();
00215         pp_token_type directive = tok->type_get();
00216         bool old_active = active;
00217         bool done = true;
00218         bool first = false;
00219         bool result;
00220 
00221         ptr<condition_stack> conditions = part->conditions_get();
00222 
00223         switch (directive) {
00224                 case pp_token::TOK_ELIF:
00225                         first = true;
00226                         // fall through
00227                 case pp_token::TOK_IF:
00228                         lassert2(false,"#if and #elif are not supported");
00229                         // TODO pt call differently
00230                         result = true;
00231                         //result = expression::instance()->evaluate(buffer);
00232                         // result = expression::create(buffer)->eval();
00233                         if (conditions->process(first ? condition_stack::DIR_ELIF : condition_stack::DIR_IF,result,loc)) {
00234                                 active = conditions->active_get();
00235                         }
00236                         break;
00237                 case pp_token::TOK_IFNDEF:
00238                         first = true;
00239                         // fall through
00240                 case pp_token::TOK_IFDEF:
00241                         tok = buffer->read_front_skip_ws();
00242                         if (tok->type_get() == pp_token::TOK_LINE_END) {
00243                                 // expected macro name
00244                                 report << directive_expects_macro_name << lstring(first ? "#ifndef" : "#ifdef") << loc;
00245                                 break;
00246                         }
00247 
00248                         result = macros->defined(tok);
00249                         result = result != first;
00250 
00251                         check_extra_tokens(first ? "#ifndef" : "#ifdef");
00252                         
00253                         if (conditions->process(first ? condition_stack::DIR_IFNDEF : condition_stack::DIR_IFDEF,result,loc)) {
00254                                 active = conditions->active_get();
00255                         }
00256                         break;
00257                 case pp_token::TOK_ELSE:
00258                         first = true;
00259                         // fall through
00260                 case pp_token::TOK_ENDIF:
00261                         check_extra_tokens(first ? "#else" : "#endif");
00262                         if (conditions->process(first ? condition_stack::DIR_ELSE : condition_stack::DIR_ENDIF,false,loc)) {
00263                                 active = conditions->active_get();
00264                         }
00265                         break;
00266                 default:
00267                         done = false;
00268                         break;
00269         }
00270         
00271         // either already processed, or ignored
00272         if (done || !old_active) return newline;
00273         
00274         switch (directive) {
00275                 case pp_token::TOK_DEFINE:
00276                 {
00277                         ptr<macro> mac = macro::create();
00278                         if (!mac->parse(buffer)) break;
00279                         macros->define(mac);
00280                         break;
00281                 }
00282                 case pp_token::TOK_UNDEF:
00283                         tok = buffer->read_front_skip_ws();
00284 
00285                         if (tok->type_get() == pp_token::TOK_LINE_END) {
00286                                 // expected macro name
00287                                 report << directive_expects_macro_name << lstring("#undef") << loc;
00288                                 break;
00289                         }
00290 
00291                         macros->undef(tok);
00292                                 
00293                         check_extra_tokens("#undef");
00294 
00295                         break;
00296                 case pp_token::TOK_INCLUDE:
00297                 {
00298                         if (nesting == NESTING_LIMIT) {
00299                                 report << nesting_too_deep << nesting << loc;
00300                                 break;
00301                         }
00302 
00303                         tok = buffer->read_front_skip_ws();
00304                         pp_token_type ptt = tok->type_get();
00305 
00306                         if (ptt == pp_token::TOK_INCL_SIG) {
00307                                 // parse the include in buffer
00308                                 tok = parse_include();
00309                                 if (!tok) {
00310                                         ptt = pp_token::TOK_OTHER;
00311                                 } else {
00312                                         ptt = tok->type_get();
00313                                 }
00314                         }
00315                         
00316                         bool system = false;
00317                         
00318                         switch (ptt) {
00319                                 case pp_token::TOK_INCL_HCHAR:
00320                                         system = true;
00321                                         // fall through
00322                                 case pp_token::TOK_INCL_QCHAR:
00323                                 {
00324                                         // TODO pt get the path from unit part
00325                                         ucn_string us(tok->value_get()->content_get());
00326                                         lstring ls;
00327                                         bool invalid = false;
00328 
00329                                         // convert the string to host character set
00330                                         for (ucn_string::iterator it = us.begin(), end = us.end();
00331                                                         it != end; ++it) {
00332                                                 ucn u = *it;
00333                                                 // only certain characters are supported
00334                                                 if (!character::is_basic(u)) {
00335                                                         // error: unsupported character in include name
00336                                                         report << invalid_character_in_filename << tok->location_get();
00337                                                         invalid = true;
00338                                                         break;
00339                                                 }
00340                                                 ls += character::to_host(u);
00341                                         }
00342 
00343                                         if (invalid) break;
00344                                         
00345                                         // TODO pt path
00346                                         ptr<named_istream> ns = fs->find_file("",ls,system);
00347 
00348                                         if (ns) {
00349                                                 push_stream(ns,loc);
00350                                         } else {
00351                                                 // file not found or not accessible
00352                                                 report << unable_to_open_file << ls << loc;
00353                                         }
00354                                         break;
00355                                 }
00356                                 default:
00357                                         // malformed include
00358                                         report << malformed_include << loc;
00359                                         break;
00360                         }
00361                         break;
00362                 }
00363                 case pp_token::TOK_LINE:
00364                 {
00365                         // in any case, do the expansion, it can not spoil anything
00366                         buffer = buffer->expand_all(macros);
00367 
00368                         tok = buffer->read_front_skip_ws();
00369                         pp_token_type ptt = tok->type_get();
00370 
00371                         if (ptt != pp_token::TOK_NUMBER_LIT) {
00372                                 report << line_expects_number << tok->location_get();
00373                                 break;
00374                         }
00375 
00376                         // check digit sequence
00377                         ucn_string us(tok->value_get()->content_get());
00378                         ulint x = 0;
00379                         bool invalid = false;
00380 
00381                         // convert the string to host character set
00382                         for (ucn_string::iterator it = us.begin(), end = us.end();
00383                                         it != end; ++it) {
00384                                 ucn u = *it;
00385                                 // only decimal digit sequence less than 32768 is allowed
00386                                 if (!character::is_digit(u) || (x = 10*x + character::extract_digit(u)) > 32767) {
00387                                         invalid = true;
00388                                         break;
00389                                 }
00390                         }
00391 
00392                         if (invalid || x == 0) {
00393                                 report << line_expects_number << tok->location_get();
00394                                 break;
00395                         }
00396 
00397                         tok = buffer->read_front_skip_ws();
00398                         ptt = tok->type_get();
00399 
00400                         ptr<line_control> lic = part->line_control_get();
00401 
00402                         if (ptt == pp_token::TOK_STRING_LIT) {
00403                                 // TODO make this a method, together with include
00404                                 ucn_string us(tok->value_get()->content_get());
00405                                 lstring ls;
00406 
00407                                 // convert the string to host character set
00408                                 for (ucn_string::iterator it = us.begin(), end = us.end();
00409                                                 it != end; ++it) {
00410                                         ucn u = *it;
00411                                         // only certain characters are supported
00412                                         if (!character::is_basic(u)) {
00413                                                 invalid = true;
00414                                                 break;
00415                                         }
00416                                         ls += character::to_host(u);
00417                                 }
00418 
00419                                 if (invalid) {
00420                                         report << invalid_character_in_filename << tok->location_get();
00421                                         break;
00422                                 }
00423 
00424                                 lic->change_file(ls);
00425 
00426                                 // in other cases this error is suppressed by other errors
00427                                 check_extra_tokens("#line");
00428                         } else if (ptt != pp_token::TOK_LINE_END) {
00429                                 // file name string literal expected
00430                                 report << line_expects_string << tok->location_get();
00431                                 break;
00432                         }
00433 
00434                         // change the next line number to x
00435                         lic->change_line(newline->location_get(),x);
00436                         break;
00437                 }
00438                 case pp_token::TOK_ERROR:
00439                         // TODO pt add content
00440                         report << user_error << loc;
00441                         break;
00442                 case pp_token::TOK_PRAGMA:
00443                         // no pragmas supported except #pragma lestes
00444                         tok = buffer->read_front_skip_ws();
00445 
00446                         if (tok->type_get() == pp_token::TOK_IDENT) {
00447                                 if (tok->value_get() == token_value::create("lestes")) {
00448                                         pragma_flag = true;
00449                                 }
00450                         }
00451                                 
00452                         break;
00453                 case pp_token::TOK_LINE_END:
00454                         // empty directive
00455                         break;
00456                 default:
00457                         // invalid directive
00458                         report << invalid_directve << tok->spelling_get() << loc;
00459                         break;
00460         }
00461 
00462         return newline;
00463 }
00464 
00465 /*!
00466   Reads next token from current unit part after evaluation of directives.
00467   \return The next token.
00468 */
00469 ptr<pp_token> evaluator::read(void)
00470 {
00471         if (state == EMPTY) 
00472                 return pp_token::create(source_location::zero(),pp_token::TOK_FILE_END);
00473         
00474         ptr<pp_token> tok;
00475         
00476         do {
00477                 switch (state) {
00478                         case START:
00479                                 part->start_of_line();
00480 
00481                                 switch (input->mode_get()) {
00482                                         case expander::DIRECTIVE:
00483                                                 buffer = input->read_line();
00484                                                 return process_directive();
00485                                         case expander::FILE_END:
00486                                                 buffer = input->read_line();
00487                                                 state = END;
00488                                                 break;
00489                                         case expander::NORMAL:
00490                                                 if (active) {
00491                                                         buffer = input->read_expanded();
00492                                                         state = BUFFER;
00493                                                 } else {
00494                                                         buffer = input->read_line();
00495                                                         tok = buffer->peek_back();
00496                                                         lassert(tok->type_get() == pp_token::TOK_LINE_END);
00497                                                         // return the newline
00498                                                         return tok;
00499                                                 }
00500                                                 break;
00501                                         default:
00502                                                 lassert2(false,"You should never get here");
00503                                 }
00504                                 
00505                                 break;
00506                   case BUFFER:
00507                                 tok = buffer->read_front();
00508                                 if (buffer->length() == 0) state = START;
00509                                 return tok;
00510                   case END:
00511                                 {
00512                                         tok = buffer->read_front();
00513                                         lassert(tok->type_get() == pp_token::TOK_FILE_END);
00514                                         
00515                                         ptr<condition_stack> conditions = part->conditions_get();
00516                                         // check whether all conditions were closed
00517                                         conditions->process(condition_stack::DIR_EOF,false,tok->location_get());
00518 
00519                                         // sets state internally
00520                                         pop_stream();
00521                                 }
00522                                 break;
00523                   default:
00524                                 lassert2(false,"You should never get here");
00525                                 break;
00526                 }
00527         } while (state != EMPTY);
00528         
00529         // return the last EOF token
00530         return tok;
00531 }
00532 /*!
00533   Marks the object.
00534 */
00535 void evaluator::gc_mark(void)
00536 {
00537         fs.gc_mark();
00538         macros.gc_mark();
00539         includes.gc_mark();
00540         input.gc_mark();
00541         part.gc_mark();
00542         buffer.gc_mark();
00543         pp_filter::gc_mark();
00544 }
00545 
00546 
00547 /*!
00548   Creates new evaluator, try to start processing given file.
00549   The state is set to EMPTY when the file could not be opened, or START.
00550   \pre a_fs != NULL
00551   \param a_fs  The file system binding.
00552   \param file_name  The name of the file to process.
00553   \return The evaluator.
00554 */
00555 ptr<evaluator> evaluator::create(const ptr<file_system> &a_fs, const lstring &file_name)
00556 {
00557         return new evaluator(a_fs,file_name);
00558 }
00559 
00560 end_package(lex);
00561 end_package(cplus);
00562 end_package(lang);
00563 end_package(lestes);
00564 
00565 /* vim: set ft=lestes : */
00566 

Generated on Mon Feb 12 18:22:33 2007 for lestes by doxygen 1.5.1-20070107