trigraphs.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   \brief Trigraphs filter.
00030 
00031   Definition of trigraphs class performing trigraph translation.
00032   \author pt
00033 */
00034 #include <lestes/common.hh>
00035 #include <lestes/lang/cplus/lex/trigraphs.hh>
00036 #include <lestes/lang/cplus/lex/ucn_filter.hh>
00037 #include <lestes/lang/cplus/lex/lex_loggers.hh>
00038 #include <lestes/msg/logger.hh>
00039 #include <lestes/msg/logger_util.hh>
00040 //#include <climits> 
00041 
00042 package(lestes);
00043 package(lang);
00044 package(cplus);
00045 package(lex);
00046 
00047 using namespace ::std;
00048 
00049 /*!
00050   Creates new instance.
00051   \post state == START
00052 */
00053 trigraphs::trigraphs(void):
00054         ucn_filter(),
00055         state(START)
00056 {
00057 }
00058 
00059 /*!
00060   Reads next token after translation of trigraph sequences.
00061   \return  Token possibly originating from trigraph.
00062 */
00063 ptr<ucn_token> trigraphs::read(void)
00064 {
00065         trigraphs_logger << "trigraphs::read()\n" << msg::eolog;
00066         trigraphs_logger << "state == " << state["sqQ12"] << '\n' << msg::eolog;
00067 
00068         ptr<ucn_token> t;
00069 
00070         switch (state) {
00071                 case ONE: 
00072                         // flush one token
00073                         t = first;
00074                         // release reference
00075                         first = NULL;
00076                         state = START;
00077                         trigraphs_logger << "return only saved\ntrigraphs::read() end\n" << msg::eolog;
00078                         return t;
00079                 case TWO:
00080                         // flush one of two tokens
00081                         t = first;
00082                         first = second;
00083                         // release reference
00084                         second = NULL;
00085                         state = ONE;
00086                         trigraphs_logger << "return first saved\ntrigraphs::read() end\n" << msg::eolog;
00087                         return t;
00088                 default:
00089                         break;
00090         }
00091         
00092         while (true) {
00093                 t = input_read();
00094                 ucn_token_type utt = t->type_get();
00095                 // can return out of order, location is already recorded
00096 //      if (utt == ucn_token::TOK_ERROR)
00097   //       return t;
00098                 
00099                 switch (state) {
00100                         case START:
00101                                 if (utt == ucn_token::TOK_BASIC && t->value_get() == character::ascii_qmark) {
00102                                         // first '?'
00103                                         state = QUE;
00104                                         first = t;
00105                                 } else {
00106                                         trigraphs_logger << "trigraphs::read() end\n" << msg::eolog;
00107                                         return t;
00108                                 }
00109                                 break;
00110                         case QUE:
00111                                 if (utt == ucn_token::TOK_BASIC && t->value_get() == character::ascii_qmark) {
00112                                         // second '?'
00113                                         state = QUEQUE;
00114                                         second = t;
00115                                 } else {
00116                                         ptr<ucn_token> tmp;
00117                                         // flush first
00118                                         tmp = first;
00119                                         // prepare to flush the second
00120                                         first = t;
00121                                         state = ONE;
00122                                         trigraphs_logger << "trigraphs::read() end\n" << msg::eolog;
00123                                         return tmp;
00124                                 }
00125                                 break;
00126                         case QUEQUE:
00127                                 // found trigraph sequence
00128                                 if (utt == ucn_token::TOK_BASIC) {
00129                                         ucn u = t->value_get();
00130                                         ucn v = translation_map[character::extract_value(u)];
00131                                         if (v != 0) {
00132                                                 t = first;
00133                                                 // release the reference
00134                                                 first = NULL;
00135                                                 t = t->clone_value(v);
00136                                                 state = START;
00137                                                 trigraphs_logger << "trigraphs::read() end\n" << msg::eolog;
00138                                                 return t;
00139                                         }
00140 
00141                                         // check the third token, with '?' so there will be "??" again
00142                                         state = u == character::ascii_qmark ? QUEQUE : TWO;
00143                                 } else {
00144                                         // there will be ordinary "?X" sequence in the buffer
00145                                         state = TWO;
00146                                 }
00147                                 
00148 
00149                                 {
00150                                         // shift the buffer
00151                                         ptr<ucn_token> tmp = first;
00152                                         first = second;
00153                                         second = t;
00154                                         trigraphs_logger << "trigraphs::read() end\n" << msg::eolog;
00155                                         return tmp;
00156                                 }
00157                         default:
00158                                 lassert2(false,"You should never get here");
00159                 }
00160                 trigraphs_logger << "state == " << state["sqQ12"] << '\n' << msg::eolog;
00161         }
00162 }
00163 
00164 /*!
00165   Marks the object.
00166 */
00167 void trigraphs::gc_mark(void)
00168 {
00169         first.gc_mark();
00170         second.gc_mark();
00171         ucn_filter::gc_mark();
00172 }
00173 
00174 /*!
00175   Returns new instance.
00176   \return New instance of the class.
00177 */
00178 ptr<trigraphs> trigraphs::create(void)
00179 {
00180         return new trigraphs();
00181 }
00182 
00183 /*!
00184   Trigraph translation map. Defines mapping between ASCII characters.
00185   Zero values have no defined translation.
00186 */
00187 const ucn trigraphs::translation_map[trigraphs::translation_length] = {
00188         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00189         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00190         /* index '!' */ character::ascii_vbar,
00191         0, 0, 0, 0, 0,
00192         /* index '\'' */ character::ascii_hat,
00193         /* index '(' */ character::ascii_left_bracket,
00194         /* index ')' */ character::ascii_right_bracket,
00195         0, 0, 0,
00196         /* index '-' */ character::ascii_tilde,
00197         0,
00198         /* index '/' */ character::ascii_backslash,
00199         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00200         /* index '<' */ character::ascii_left_brace,
00201         /* index '=' */ character::ascii_hash,
00202         /* index '>' */ character::ascii_right_brace,
00203         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00204         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00205         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00206         0, 0, 0, 0, 0
00207 };
00208 
00209 end_package(lex);
00210 end_package(cplus);
00211 end_package(lang);
00212 end_package(lestes);
00213 
00214 
00215 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:23:43 2007 for lestes by doxygen 1.5.1-20070107