pre_lex.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Character level processing.
00030 
00031   Definition of pre_lex class performing operations before lexical analysis.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/lang/cplus/lex/pre_lex.hh>
00036 #include <lestes/lang/cplus/lex/pre_lex.m.hh>
00037 #include <lestes/lang/cplus/lex/lex_loggers.hh>
00038 
00039 package(lestes);
00040 package(lang);
00041 package(cplus);
00042 package(lex);
00043 
00044 using namespace ::std;
00045 
00046 /*!
00047   Creates the object.
00048   \post state == START
00049   \pre a_src != NULL
00050   \pre a_enc != NULL
00051   \param a_src  The source stream of data.
00052   \param a_enc  The encoder to apply on the source.
00053   \return New instance of the class.
00054 */
00055 pre_lex::pre_lex(const ptr<data_source> &a_src, const ptr<encoder> &a_enc):
00056         state(START),
00057         saved(),
00058         src((lassert(a_src),a_src)),
00059         enc((lassert(a_enc),a_enc)),
00060         stok(special_tokens::create()),
00061         lnum(line_numbers::create()),
00062         trig(trigraphs::create()), 
00063         ljoin(line_join::create())
00064 {
00065         connect();
00066 }
00067 
00068 /*!
00069   Connects all filters together.
00070 */
00071 void pre_lex::connect(void)
00072 {
00073         enc->input_set(src);
00074         stok->input_set(enc);
00075         lnum->input_set(stok);
00076         trig->input_set(lnum);
00077         ljoin->input_set(trig);
00078 }
00079 
00080 /*!
00081   Returns next token passed through all actions before lex.
00082   Inserts fake newline, if there is none at the end of file.
00083   Saves the EOF token to be returned forever.
00084   \return Next token with all fields set to proper values.
00085 */
00086 ptr<ucn_token> pre_lex::read(void)
00087 {
00088         pre_lex_logger << "pre_lex::read()\n" << msg::eolog;
00089         pre_lex_logger << "state = " << state["scn#$"] << '\n' << msg::eolog;
00090 
00091         ptr<ucn_token> t;
00092         
00093         switch (state) {
00094                 case END:
00095                         t = saved;
00096                         break;
00097                 case ERROR:
00098                         state = END;
00099                         // return fake newline
00100                         t = ucn_token::create(ucn_token::TOK_BASIC,character::ascii_new_line,saved->location_get());
00101                         break;
00102                 case START:
00103                 case CONT:
00104                 case NEWLINE:
00105                         t = ljoin->read();
00106                         break;
00107                 default:
00108                         lassert2(false,"You should never get here");
00109         }
00110         
00111         ucn_token_type utt = t->type_get();
00112 
00113         switch (state) {
00114                 case START:
00115                         if (utt == ucn_token::TOK_EOF) {
00116                                 // save the EOF
00117                                 saved = t;
00118                                 state = END;
00119                         } else if (utt == ucn_token::TOK_BASIC && t->value_get() == character::ascii_new_line) {
00120                                 // save token's location
00121                                 saved = t;
00122                                 state = NEWLINE;
00123                         } else {
00124                                 // continuing
00125                                 state = CONT;
00126                                 saved = NULL;
00127                         }
00128                         break;
00129                 case CONT:
00130                         if (utt == ucn_token::TOK_EOF) {
00131                                 // save the EOF
00132                                 saved = t;
00133                                 // report error: missing newline at the end of file
00134                                 t = ucn_token::create_error(missing_newline->format(),t->location_get());
00135                                 state = ERROR;
00136                         } else if (utt == ucn_token::TOK_BASIC && t->value_get() == character::ascii_new_line) {
00137                                 // save token's location
00138                                 saved = t;
00139                                 state = NEWLINE;
00140                         } else {
00141                                 saved = NULL;
00142                         }
00143                         break;
00144                 case NEWLINE:
00145                         switch (utt) {
00146                                 case ucn_token::TOK_EOF:
00147                                         // move the location to that of the newline
00148                                         t = saved = t->clone_location(saved->location_get());
00149                                         state = END;
00150                                         break;
00151                                 case ucn_token::TOK_ERROR:
00152                                         break;
00153                                 case ucn_token::TOK_BASIC:
00154                                         if (t->value_get() == character::ascii_new_line)
00155                                                 break;
00156                                         // fall through
00157                                 case ucn_token::TOK_TRANSLATED:
00158                                         state = CONT;
00159                                         break;
00160                                 default:
00161                                         lassert2(false,"You should never get here");
00162                         }
00163                         break;
00164                 case END:
00165                 case ERROR:
00166                         break;
00167                 default:
00168                         lassert2(false,"You should never get here");
00169         }
00170 
00171         pre_lex_logger << "returning " << static_cast<ulint>(t->type_get()) << '\n' << msg::eolog;
00172         pre_lex_logger << "pre_lex::read() end\n" << msg::eolog;
00173         return t;
00174 }
00175 
00176 /*!
00177   Marks the object.
00178 */
00179 void pre_lex::gc_mark(void)
00180 {
00181         saved.gc_mark();
00182         src.gc_mark();
00183         enc.gc_mark();
00184         stok.gc_mark();
00185         lnum.gc_mark();
00186         trig.gc_mark();
00187         ljoin.gc_mark();
00188 	::lestes::std::object::gc_mark();
00189 }
00190 
00191 /*!
00192   Returns new instance, initializes with source and encoding.
00193   \param a_src  The source stream of data.
00194   \param a_enc  The encoder to apply on the source.
00195   \return New instance of the class.
00196 */
00197 ptr<pre_lex> pre_lex::create(const ptr<data_source> &a_src, const ptr<encoder> &a_enc)
00198 {
00199         return new pre_lex(a_src,a_enc);
00200 }
00201 
00202 end_package(lex);
00203 end_package(cplus);
00204 end_package(lang);
00205 end_package(lestes);
00206 
00207 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:23:10 2007 for lestes by doxygen 1.5.1-20070107