lestes: stringifier.cc Source File

00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Token stringifier.
00030   
00031   Definition of stringifier class performing token stringification.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/std/source_location.hh>
00036 #include <lestes/std/ucn_string.hh>
00037 #include <lestes/lang/cplus/lex/stringifier.hh>
00038 #include <lestes/lang/cplus/lex/pp_token.hh>
00039 #include <lestes/lang/cplus/lex/token_input.hh>
00040 #include <lestes/lang/cplus/lex/token_value.hh>
00041 
00042 package(lestes);
00043 package(lang);
00044 package(cplus);
00045 package(lex);
00046 
00047 /*!
00048   Creates the only object of the class.
00049 */
00050 stringifier::stringifier(void)
00051 {
00052 }
00053 
00054 /*!
00055   Attempts to stringify tokens. Creates new token of type pp_token::TOK_STRING_LIT,
00056   with location taken from the first nonblank input token and value representing joined
00057   stringification of all input tokens, with character and string literals escaped.
00058   Runs of blank tokens inside the sequence are represented by single space, 
00059   surrounding blanks are discarded.
00060   \pre input != NULL
00061   \todo pt necessary? pre input does not contain pp_token::TOK_LINE_END tokens.
00062   \param input The tokens to stringify.
00063   \return  New token containing the stringification, no correctness checks are done.
00064 */
00065 ptr<pp_token> stringifier::process(const ptr<token_input> &input)
00066 {
00067         ucn_string str;
00068         ptr<pp_token> tok = input->read_front();
00069         // skip the leading blank
00070         if (tok->type_get() == pp_token::TOK_BLANK) tok = input->read_front();
00071         ptr<source_location> loc = tok->location_get();
00072         
00073         if (tok->type_get() != pp_token::TOK_TERMINATOR) {
00074                 ptr<pp_token> last = tok;
00075                 tok = input->read_front();
00076                 
00077                 while (tok->type_get() != pp_token::TOK_TERMINATOR) {
00078                         str += escape_spelling(last);
00079                         last = tok;
00080                         tok = input->read_front();
00081                 }
00082 
00083                 // add the last token iff nonblank
00084                 if (last->type_get() != pp_token::TOK_BLANK) str += escape_spelling(last);
00085         }
00086         
00087         // TODO ??? set error flag, because the literal can be broken
00088         return pp_token::create(loc,pp_token::TOK_STRING_LIT,token_value::create(str));
00089 }
00090 
00091 /*!
00092   Adds guard backslash characters before double quotes and backslash characters into the spelling
00093   of string, character and other literals to avoid interpreting them as escape sequences.
00094   Spelling of tokens with other types are returned intact.
00095   \pre tok != NULL
00096   \param tok The token to process.
00097   \return The spelling  string with special characters escaped.
00098 */
00099 ucn_string stringifier::escape_spelling(const ptr<pp_token> &tok)
00100 {
00101         lassert(tok);
00102 
00103         switch (tok->type_get()) {
00104                 case pp_token::TOK_STRING_LIT:
00105                 case pp_token::TOK_WSTRING_LIT:
00106                 case pp_token::TOK_CHAR_LIT:
00107                 case pp_token::TOK_WCHAR_LIT:
00108                 case pp_token::TOK_OTHER:
00109                 case pp_token::TOK_IDENT:
00110                         break;
00111                 default:
00112                         return tok->spelling_get();
00113         }
00114 
00115         ucn_string str(tok->spelling_get());
00116         ucn_string::size_type len = str.length();
00117         ucn u;
00118         ucn_string work;
00119 
00120         for (ucn_string::size_type i = 0; i < len; i++) {
00121                 u = str[i];
00122                 
00123                 if (character::is_translated(u)) {
00124                         ulint x = character::extract_value(u);
00125                         work += character::ascii_backslash;
00126                         work += character::ascii_backslash;
00127                         if (x <= 0xffff) {
00128                                 work += character::ascii_lower_u;
00129                                 work += character::create_xdigit((x >> 12) & 0xf);
00130                                 work += character::create_xdigit((x >> 4) & 0xf);
00131                                 work += character::create_xdigit((x >> 8) & 0xf);
00132                                 work += character::create_xdigit(x & 0xf);
00133                         } else {
00134                                 work += character::ascii_upper_u;
00135                                 work += character::create_xdigit((x >> 28) & 0xf);
00136                                 work += character::create_xdigit((x >> 24) & 0xf);
00137                                 work += character::create_xdigit((x >> 20) & 0xf);
00138                                 work += character::create_xdigit((x >> 16) & 0xf);
00139                                 work += character::create_xdigit((x >> 12) & 0xf);
00140                                 work += character::create_xdigit((x >> 8) & 0xf);
00141                                 work += character::create_xdigit((x >> 4) & 0xf);
00142                                 work += character::create_xdigit(x & 0xf);
00143                         }
00144                 } else {
00145                         if (u == character::ascii_dquote || u == character::ascii_backslash)
00146                                 work += character::ascii_backslash;
00147                         work += u;
00148                 }
00149                 
00150         }
00151         return work;
00152 }
00153 
00154 /*!
00155   Returns the only instance. Lazy initialized.
00156   \return The singleton.
00157 */
00158 ptr<stringifier> stringifier::instance(void)
00159 {
00160         if (!singleton) {
00161                 singleton = new stringifier();
00162         }
00163         return singleton;
00164 }
00165 
00166 /*!
00167   The only instance of the class. Lazy initialized in the instance() method.
00168 */
00169 ptr<stringifier> stringifier::singleton;
00170 
00171 end_package(lex);
00172 end_package(cplus);
00173 end_package(lang);
00174 end_package(lestes);
00175 
00176 /* vim: set ft=lestes : */