lestes: token_stream.cc Source File

00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Stream of tokens.
00030 
00031         Definition of token_stream class representing stream of tokens.
00032         \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/std/source_location.hh>
00036 #include <lestes/lang/cplus/lex/token_stream.hh>
00037 #include <lestes/lang/cplus/lex/token_sequence.hh>
00038 #include <lestes/lang/cplus/lex/pp_token.hh>
00039 #include <lestes/lang/cplus/lex/pp_filter.hh>
00040 #include <lestes/lang/cplus/lex/taboo_macros.hh>
00041 #include <lestes/lang/cplus/lex/macro.hh>
00042 #include <lestes/lang/cplus/lex/macro_storage.hh>
00043 #include <lestes/lang/cplus/lex/token_value.hh>
00044 
00045 #include <iterator>
00046 #include <iostream>
00047 
00048 package(lestes);
00049 package(lang);
00050 package(cplus);
00051 package(lex);
00052 
00053 using namespace ::std;
00054 
00055 /*!
00056   Constructs new stream.
00057   \pre a_input != NULL
00058   \param a_input  The input to read from.
00059 */
00060 token_stream::token_stream(const ptr<pp_filter> &a_input):
00061         token_input(checked(a_input)),
00062         file_end(NULL),
00063         sequence(sequence_type::create())
00064 {
00065 }
00066 
00067 /*!
00068   Loads next line delimited by pp_token::TOK_LINE_END from input into internal buffer.
00069   Expects pp_token::TOK_FILE_END to be returned after the last pp_token::TOK_LINE_END.
00070   \return true  If before end of stream.
00071 */
00072 bool token_stream::load_input(void)
00073 {
00074         // the TOK_FILE_END was read
00075         if (file_end) return false;
00076 
00077         // still something in buffer
00078         if (sequence->size() != 0) return true;
00079         
00080         ptr<pp_token> tok = input_read();
00081         
00082         // TOK_FILE_END can come only at the beginning of line
00083         if (tok->type_get() == pp_token::TOK_FILE_END) {
00084                 file_end = tok;
00085                 return false;
00086         }
00087         
00088         sequence->push_back(tok);
00089         
00090         // fill the line buffer
00091         while (tok->type_get() != pp_token::TOK_LINE_END) {
00092                 tok = input_read();
00093 
00094                 sequence->push_back(tok);
00095         }
00096         return true;
00097 }
00098 
00099 /*!
00100   Reads first token, does no transformation.
00101   \return  The first token in the sequence, or TOK_TERMINATOR.
00102 */
00103 ptr<pp_token> token_stream::read(void)
00104 {
00105         if (!load_input()) return pp_token::terminator();
00106         ptr<pp_token> t = sequence->front();
00107         sequence->pop_front();
00108         return t;
00109 }
00110         
00111 /*!
00112   Returns front token, which is kept in the sequence.
00113   \return  The first token in the sequence, or TOK_TERMINATOR.
00114 */
00115 ptr<pp_token> token_stream::peek_front(void)
00116 {
00117         if (!load_input()) return pp_token::terminator();
00118         return sequence->front();
00119 }
00120         
00121 /*!
00122   Reads front token, squeezing multiple blanks into one.
00123   \return  The first token in the sequence.
00124 */
00125 ptr<pp_token> token_stream::read_front(void)
00126 {
00127         if (!load_input()) return pp_token::terminator();
00128         
00129         ptr<pp_token> t = sequence->front();
00130         sequence->pop_front();
00131 
00132         sequence_type::size_type len = sequence->size();
00133         
00134         if (t->type_get() == pp_token::TOK_BLANK) {
00135                 ptr<pp_token> u;
00136                 while (len--) {
00137                         u = sequence->front();
00138                         if (u->type_get() != pp_token::TOK_BLANK) break;
00139                         sequence->pop_front();
00140                 }
00141         }
00142         
00143         return t;
00144 }
00145 
00146 /*!
00147   Reads front token, skipping front whitespace, but not newline.
00148   \return  The first non-whitespace token in the sequence.
00149 */
00150 ptr<pp_token> token_stream::read_front_skip_ws(void)
00151 {
00152         if (!load_input()) return pp_token::terminator();
00153         
00154         ptr<pp_token> t;
00155         
00156         do {   
00157                 t = sequence->front();
00158                 sequence->pop_front();
00159         } while (t->type_get() == pp_token::TOK_BLANK);
00160         
00161         return t;
00162 }
00163 
00164 /*!
00165   Skips front whitespace.
00166   \return true  If there are some tokens left in the sequence.
00167 */
00168 bool token_stream::skip_front_ws(void)
00169 {
00170         if (!load_input()) return false;
00171 
00172         ptr<pp_token> t;
00173         
00174         while (sequence->front()->type_get() == pp_token::TOK_BLANK) {
00175                 sequence->pop_front();
00176         }
00177         
00178         // sequence contains at least TOK_LINE_END
00179         return true;
00180 }
00181 
00182 /*!
00183   Reads line from the stream until TOK_LINE_END. At the end of stream returns the TOK_FILE_END.
00184   \return  The nonexpanded tokens including the TOK_LINE_END, or sequence containing TOK_FILE_END.
00185 */
00186 ptr<token_sequence> token_stream::read_line(void)
00187 {
00188         ptr<token_sequence> result = token_sequence::create();
00189         if (!load_input()) {
00190                 result->add_back(file_end);
00191                 return result;
00192         }
00193         // TODO pt this is ugly, uses friend, could be done better
00194         // append the buffer to the result
00195         result->sequence->splice(result->sequence->end(),*sequence);
00196         return result;
00197 }
00198 
00199 /*!
00200   Expands macros within sequence till newline. The first line, i.e. the front part
00201   ending with TOK_LINE_END, not counting TOK_LINE_END inside macro
00202   calls, is removed from the sequence.
00203   \pre macros != NULL
00204   \param macros  The macros for expansion.
00205   \return The expanded tokens including the TOK_LINE_END, or empty sequence.
00206 */
00207 ptr<token_sequence> token_stream::expand_line(const ptr<macro_storage> &macros)
00208 {
00209         ptr<token_sequence> result = token_sequence::create();
00210 
00211         // return empty sequence
00212         if (!load_input()) return result;
00213 
00214         ptr<pp_token> t;
00215         // TODO pt copy to token_sequence
00216         ptr<macro> mac;
00217         ptr<token_sequence> expanded;
00218         
00219         do {
00220                 t = read_front();
00221 
00222                 // check whether the token is defined as macro
00223                 // whether the macro is allowed in the context of the token
00224                 // and whether a funlike macro is called with '('
00225                 if (t->is_name() &&
00226                          (mac = macros->lookup(t->value_get())) &&
00227                          !t->taboo_get()->contains(mac) &&
00228                          (!mac->funlike_get() || peek_front()->type_get() == pp_token::TOK_LEFT_PAR) &&
00229                          (expanded = mac->expand(t,this,macros))) {
00230                         
00231                         // TODO pt move into expand
00232                         expanded->taboo_extend(t->taboo_get()->extend(mac),macros);
00233 
00234                         // prepend in O(1)
00235                         prepend(expanded);
00236                 } else {
00237                         result->add_back(t);
00238                 }
00239         } while (t->type_get() != pp_token::TOK_LINE_END);
00240         return result;
00241 }
00242 
00243 /*!
00244   Inserts another token sequence before the beginning of the sequence.
00245   \pre inserted != NULL
00246   \post inserted->length() == 0
00247   \param inserted  The sequence to insert, the content is moved into new place.
00248 */
00249 void token_stream::prepend(const ptr<token_sequence> &inserted)
00250 {
00251         lassert(inserted);
00252         sequence->splice(sequence->begin(),*(inserted->sequence));
00253 }
00254 
00255 /*!
00256   Marks the object.
00257 */
00258 void token_stream::gc_mark(void)
00259 {
00260         file_end.gc_mark();
00261         sequence.gc_mark();
00262         token_input::gc_mark();
00263 }
00264 
00265 /*!
00266   Returns a token stream connected to the \a a_input.
00267   \pre a_input != NULL
00268   \param a_input  The input to read from.
00269   \return  New token stream.
00270 */
00271 ptr<token_stream> token_stream::create(const ptr<pp_filter> &a_input)
00272 {
00273         return new token_stream(a_input);
00274 }
00275 
00276 // TODO pt remove
00277 /*!
00278   Prints debug dump of content to stream.
00279   \param o  The stream to print to.
00280 */
00281 void token_stream::debug_print(ostream &o)
00282 {
00283         bool next = false;
00284 
00285         for (sequence_type::iterator it = sequence->begin(), end = sequence->end();
00286                   it != end; ++it) {
00287                 if (next) o << ' '; else next = true;
00288                 o << (*it)->spelling_get();
00289         }
00290 }
00291 
00292 /*!
00293   Prints debug dump of token sequence to stream.
00294   \param o  The stream to print to.
00295   \param ts  The token sequence to print.
00296 */
00297 ::std::ostream &operator<<(::std::ostream &o, const ptr<token_stream> &ts) {
00298         ts->debug_print(o);
00299         return o;
00300 }
00301 
00302 end_package(lex);
00303 end_package(cplus);
00304 end_package(lang);
00305 end_package(lestes);
00306 /* vim: set ft=lestes : */