string_joiner.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief String literal joining filter.
00030   
00031   Definition of string_joiner class performing joining of string literals.
00032   \author pt
00033 */
00034 #include <lestes/common.hh> 
00035 #include <lestes/std/source_location.hh>
00036 #include <lestes/std/vector.hh>
00037 #include <lestes/lang/cplus/lex/string_joiner.hh>
00038 #include <lestes/lang/cplus/lex/string_joiner.m.hh>
00039 #include <lestes/lang/cplus/lex/pp_token.hh>
00040 #include <lestes/lang/cplus/lex/token_value.hh>
00041 
00042 package(lestes);
00043 package(lang);
00044 package(cplus);
00045 package(lex);
00046 
00047 using namespace ::std;
00048 
00049 /*!
00050   Creates the object.
00051   \pre a_input != NULL
00052   \param a_input  The input for the filter.
00053   \post state == START
00054 */
00055 string_joiner::string_joiner(const ptr<pp_filter> &a_input):
00056         pp_filter(checked(a_input)),
00057         state(START),
00058         saved(NULL),
00059         sequence(sequence_type::create())
00060 {
00061 }
00062 
00063 /*!
00064   Reads next token from input.
00065   Joins all adjacent string literal tokens into single string literal token.
00066   Joins all adjacent normal and wide string literal tokens into single wide string literal token.
00067 */
00068 ptr<pp_token> string_joiner::read(void)
00069 {
00070         if (state == SAVED) {
00071                 ptr<pp_token> tmp = saved;
00072                 saved = NULL;
00073                 state = START;
00074                 return tmp;
00075         }
00076                 
00077         ptr<pp_token> t;
00078         pp_token_type ptt;
00079         // first literal
00080         ptr<pp_token> first;
00081 
00082         while (true) {
00083                 t = input_read();
00084                 ptt = t->type_get();
00085                 
00086                 switch (state) {
00087                         case START:
00088                                 switch (ptt) {
00089                                         case pp_token::TOK_STRING_LIT:
00090                                                 first = t;
00091                                                 state = FIRST_STRING;
00092                                                 break;
00093                                         case pp_token::TOK_WSTRING_LIT:
00094                                                 first = t;
00095                                                 state = FIRST_WIDE_STRING;
00096                                                 break;
00097                                         default:
00098                                                 // fast path
00099                                                 return t;
00100                                 }
00101                                 break;
00102                         case FIRST_STRING:
00103                                 switch (ptt) {
00104                                         case pp_token::TOK_STRING_LIT:
00105                                                 sequence->clear();
00106                                                 sequence->push_back(first);
00107                                                 sequence->push_back(t);
00108                                                 state = NEXT_STRING;
00109                                                 break;
00110                                         case pp_token::TOK_WSTRING_LIT:
00111                                                 report << mixed_strings << t->location_get();
00112                                                 sequence->clear();
00113                                                 sequence->push_back(first);
00114                                                 sequence->push_back(t);
00115                                                 state = NEXT_BAD_STRING;
00116                                                 break;
00117                                         default:
00118                                                 saved = t;
00119                                                 state = SAVED;
00120                                                 return first;
00121                                 }
00122                                 break;
00123                         case NEXT_STRING:
00124                                 switch (ptt) {
00125                                         case pp_token::TOK_WSTRING_LIT:
00126                                                 state = NEXT_BAD_STRING;
00127                                                 report << mixed_strings << t->location_get();
00128                                                 // fall through
00129                                         case pp_token::TOK_STRING_LIT:
00130                                                 sequence->push_back(t);
00131                                                 break;
00132                                         default:
00133                                                 saved = t;
00134                                                 state = SAVED;
00135                                                 return join_sequence(pp_token::TOK_STRING_LIT);
00136                                 }
00137                                 break;
00138                         case FIRST_WIDE_STRING:
00139                                 switch (ptt) {
00140                                         case pp_token::TOK_STRING_LIT:
00141                                                 report << mixed_strings << t->location_get();
00142                                                 sequence->clear();
00143                                                 sequence->push_back(first);
00144                                                 sequence->push_back(t);
00145                                                 state = NEXT_BAD_STRING;
00146                                                 break;
00147                                         case pp_token::TOK_WSTRING_LIT:
00148                                                 sequence->clear();
00149                                                 sequence->push_back(first);
00150                                                 sequence->push_back(t);
00151                                                 state = NEXT_WIDE_STRING;
00152                                                 break;
00153                                         default:
00154                                                 saved = t;
00155                                                 state = SAVED;
00156                                                 return first;
00157                                 }
00158                                 break;
00159                         case NEXT_WIDE_STRING:
00160                                 switch (ptt) {
00161                                         case pp_token::TOK_STRING_LIT:
00162                                                 report << mixed_strings << t->location_get();
00163                                                 sequence->push_back(t);
00164                                                 state = NEXT_BAD_STRING;
00165                                                 break;
00166                                         case pp_token::TOK_WSTRING_LIT:
00167                                                 sequence->push_back(t);
00168                                                 break;
00169                                         default:
00170                                                 saved = t;
00171                                                 state = SAVED;
00172                                                 return join_sequence(pp_token::TOK_WSTRING_LIT);
00173                                 }
00174                                 break;
00175                         case NEXT_BAD_STRING:
00176                                 switch (ptt) {
00177                                         case pp_token::TOK_STRING_LIT:
00178                                         case pp_token::TOK_WSTRING_LIT:
00179                                                 sequence->push_back(t);
00180                                                 break;
00181                                         default:
00182                                                 saved = t;
00183                                                 state = SAVED;
00184                                                 return join_sequence(pp_token::TOK_WSTRING_LIT);
00185                                 }
00186                                 break;
00187                         default:
00188                                 lassert2(false,"You should never get here");
00189                 }
00190         }
00191 }
00192 
00193 /*!
00194   Returns token containing joined string literals. Does not affect the contents of the sequence.
00195   \pre The sequence is not empty.
00196   \pre The sequence contains tokens of one type, either pp_token::TOK_STRING_LIT or pp_token::TOK_WSTRING_LIT.
00197   The location for the new token is taken from the first entry.
00198   \param ptt  The type of the new token.
00199   \return String or wide string literal token.
00200 */
00201 ptr<pp_token> string_joiner::join_sequence(pp_token_type ptt) const
00202 {
00203         sequence_type::iterator it = sequence->begin();
00204         sequence_type::iterator end = sequence->end();
00205 
00206         // process first entry
00207         lassert(it != end);
00208         ptr<pp_token> t = *it;
00209         
00210         ptr<source_location> loc = t->location_get();
00211         ucn_string str(t->value_get()->content_get());
00212         ++it;
00213         
00214         // process second and following entries
00215         for ( ;it != end; ++it) {
00216                 t = *it;
00217                 str += t->value_get()->content_get();
00218         }
00219 
00220         return pp_token::create(loc,ptt,token_value::create(str));
00221 }
00222 
00223 /*!
00224   Marks the object.
00225 */
00226 void string_joiner::gc_mark(void)
00227 {
00228         sequence.gc_mark();
00229         saved.gc_mark();
00230         pp_filter::gc_mark();
00231 }
00232 
00233 /*!
00234   Returns new instance.
00235   \pre a_input != NULL
00236   \param a_input The input for the filter.
00237   \return  New instance of the class.
00238 */
00239 ptr<string_joiner> string_joiner::create(const ptr<pp_filter> &a_input)
00240 {
00241         return new string_joiner(a_input);
00242 }
00243 
00244 end_package(lex);
00245 end_package(cplus);
00246 end_package(lang);
00247 end_package(lestes);
00248 
00249 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:23:36 2007 for lestes by doxygen 1.5.1-20070107