string_translator.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Encoder into execution character set.
00030   
00031   Definition of string_translator class representing translator into execution character set.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/std/ucn_string.hh>
00036 #include <lestes/std/source_location.hh>
00037 #include <lestes/lang/cplus/lex/string_translator.hh>
00038 #include <lestes/lang/cplus/lex/pp_filter.hh>
00039 #include <lestes/lang/cplus/lex/pp_token.hh>
00040 #include <lestes/lang/cplus/lex/token_value.hh>
00041 
00042 package(lestes);
00043 package(lang);
00044 package(cplus);
00045 package(lex);
00046 
00047 /*!
00048   Creates the translator.
00049   \pre a_input != NULL
00050   \param a_input  The input for the filter.
00051 */
00052 string_translator::string_translator(const ptr<pp_filter> &a_input):
00053         pp_filter(checked(a_input))
00054 {
00055 }
00056 
00057 /*!
00058   Reads next token, encodes character and string literals into execution character set.
00059   \return  The next token.
00060 */
00061 ptr<pp_token> string_translator::read(void)
00062 {
00063         ptr<pp_token> t = input_read();
00064         pp_token_type ptt = t->type_get();
00065         ptr<source_location> loc;
00066 
00067         switch (ptt) {
00068                 case pp_token::TOK_STRING_LIT:
00069                 case pp_token::TOK_WSTRING_LIT:
00070                 case pp_token::TOK_CHAR_LIT:
00071                 case pp_token::TOK_WCHAR_LIT:
00072                         // TODO pt change the TOK*LIT for TOK*TRN
00073                         loc = t->location_get();
00074                         t = pp_token::create(loc,ptt,translate(t->value_get()->content_get(),loc));
00075                         break;
00076                 default:
00077                         break;
00078         }
00079 
00080         return t;
00081 }
00082 
00083 /*!
00084   Translates string or character literal to execution character set.
00085   \pre loc != NULL
00086   \param str The literal representation to translate.
00087   \param loc The location of the literal.
00088   \return The literal in execution character set.
00089 */ 
00090 ptr<token_value> string_translator::translate(const ucn_string &str, const ptr<source_location> &loc)
00091 {
00092         // state of the function
00093         enum {
00094                 BEGIN,
00095                 PASS,
00096                 BACK,
00097                 OCT,
00098                 HEX,
00099                 TRANSLATE
00100         } fstate = BEGIN;
00101 
00102         ucn_string::size_type len = str.length();
00103         ulint count = 0xbad;
00104         ulint value = 0xbad;
00105         ucn u = 0xbad;
00106         
00107         // reserve space
00108         ucn_string us(len,0xbeef);
00109         
00110         ucn_string::const_iterator it = str.begin();
00111         ucn_string::const_iterator end = str.end();
00112         ucn_string::iterator sit = us.begin();
00113         while (true) {
00114                 if (fstate == PASS) {
00115                         fstate = BEGIN;
00116                 } else if (it != end) {
00117                         u = *it;
00118                         ++it;
00119                 } else break;
00120 
00121                 switch (fstate) {
00122                         case BEGIN:
00123                                 if (u == character::ascii_backslash) {
00124                                         fstate = BACK;
00125                                 } else {
00126                                         fstate = TRANSLATE;
00127                                 }
00128                                 break;
00129                         case BACK:
00130                                 switch (u) {
00131                                         case character::ascii_lower_x:
00132                                                 count = 1;
00133                                                 value = 0;
00134                                                 fstate = HEX;
00135                                                 break;
00136                                         case character::ascii_quote:
00137                                         case character::ascii_dquote:
00138                                         case character::ascii_qmark:
00139                                         case character::ascii_backslash:
00140                                                 fstate = TRANSLATE;
00141                                                 break;
00142                                         case character::ascii_lower_a:
00143                                                 u = character::ascii_bell;
00144                                                 break;
00145                                         case character::ascii_lower_b:
00146                                                 u = character::ascii_backspace;
00147                                                 fstate = TRANSLATE;
00148                                                 break;
00149                                         case character::ascii_lower_f:
00150                                                 u = character::ascii_form_feed;
00151                                                 fstate = TRANSLATE;
00152                                                 break;
00153                                         case character::ascii_lower_n:
00154                                                 u = character::ascii_new_line;
00155                                                 fstate = TRANSLATE;
00156                                                 break;
00157                                         case character::ascii_lower_r:
00158                                                 u = character::ascii_carriage_return;
00159                                                 fstate = TRANSLATE;
00160                                                 break;
00161                                         case character::ascii_lower_t:
00162                                                 u = character::ascii_tab;
00163                                                 fstate = TRANSLATE;
00164                                                 break;
00165                                         case character::ascii_lower_v:
00166                                                 u = character::ascii_vtab;
00167                                                 fstate = TRANSLATE;
00168                                                 break;
00169                                         default:
00170                                           lassert(character::is_odigit(u));
00171                                           value = character::extract_odigit(u);
00172                                           count = 2;
00173                                           fstate = OCT;
00174                                           break;
00175                                 }
00176                                 break;
00177                         case OCT:
00178                                 if (character::is_odigit(u)) {
00179                                         value = (value << 3) + character::extract_odigit(u);
00180                                         if (--count == 0) {
00181                                                 *sit = character::create_external(value);
00182                                                 ++sit;
00183                                                 fstate = BEGIN;
00184                                         }
00185                                 } else {
00186                                         *sit = character::create_external(value);
00187                                         ++sit;
00188                                         fstate = PASS;
00189                                 }
00190                                 break;
00191                         case HEX:
00192                                 if (character::is_xdigit(u)) {
00193                                         value = (value << 4) + character::extract_xdigit(u);
00194                                 } else {
00195                                         lassert(count);
00196                                         *sit = character::create_external(value);
00197                                         ++sit;
00198                                         fstate = PASS;
00199                                 }
00200                                 break;
00201                         default:
00202                                 lassert(false);
00203                                 break;
00204                 }
00205 
00206                 if (fstate == TRANSLATE) {
00207                         // TODO not hardcoded ASCII
00208                         if (!character::is_ascii7(u)) {
00209                                 // TODO pt report error: unknown character
00210                                 (void)loc;
00211                                 u = character::ascii_qmark;
00212                         }
00213                         *sit = character::extract_value(u);
00214                         ++sit;
00215 
00216                         fstate = BEGIN;
00217                 }
00218         }
00219 
00220         switch (fstate) {
00221                 case BEGIN:
00222                         break;
00223                 case OCT:
00224                         *sit = character::create_external(value);
00225                         ++sit;
00226                         break;
00227                 case HEX:
00228                         lassert(count);
00229                         *sit = character::create_external(value);
00230                         ++sit;
00231                         break;
00232                 default:
00233                         lassert2(false,"You should never get here");
00234         }
00235         
00236         // use only filled part of the string
00237         return token_value::create(ucn_string(us.begin(),sit));
00238 }
00239 
00240 /*!
00241   Returns new translator.
00242   \pre a_input != NULL
00243   \param a_input The input for the filter.
00244   \return  New instance of the translator.
00245 */
00246 ptr<string_translator> string_translator::create(const ptr<pp_filter> &a_input)
00247 {
00248         return new string_translator(a_input);
00249 }
00250 
00251 end_package(lex);
00252 end_package(cplus);
00253 end_package(lang);
00254 end_package(lestes);
00255 
00256 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:23:36 2007 for lestes by doxygen 1.5.1-20070107