00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <lestes/common.hh>
00035 #include <lestes/std/ucn_string.hh>
00036 #include <lestes/std/source_location.hh>
00037 #include <lestes/lang/cplus/lex/string_translator.hh>
00038 #include <lestes/lang/cplus/lex/pp_filter.hh>
00039 #include <lestes/lang/cplus/lex/pp_token.hh>
00040 #include <lestes/lang/cplus/lex/token_value.hh>
00041
00042 package(lestes);
00043 package(lang);
00044 package(cplus);
00045 package(lex);
00046
00047
00048
00049
00050
00051
00052 string_translator::string_translator(const ptr<pp_filter> &a_input):
00053 pp_filter(checked(a_input))
00054 {
00055 }
00056
00057
00058
00059
00060
00061 ptr<pp_token> string_translator::read(void)
00062 {
00063 ptr<pp_token> t = input_read();
00064 pp_token_type ptt = t->type_get();
00065 ptr<source_location> loc;
00066
00067 switch (ptt) {
00068 case pp_token::TOK_STRING_LIT:
00069 case pp_token::TOK_WSTRING_LIT:
00070 case pp_token::TOK_CHAR_LIT:
00071 case pp_token::TOK_WCHAR_LIT:
00072
00073 loc = t->location_get();
00074 t = pp_token::create(loc,ptt,translate(t->value_get()->content_get(),loc));
00075 break;
00076 default:
00077 break;
00078 }
00079
00080 return t;
00081 }
00082
00083
00084
00085
00086
00087
00088
00089
00090 ptr<token_value> string_translator::translate(const ucn_string &str, const ptr<source_location> &loc)
00091 {
00092
00093 enum {
00094 BEGIN,
00095 PASS,
00096 BACK,
00097 OCT,
00098 HEX,
00099 TRANSLATE
00100 } fstate = BEGIN;
00101
00102 ucn_string::size_type len = str.length();
00103 ulint count = 0xbad;
00104 ulint value = 0xbad;
00105 ucn u = 0xbad;
00106
00107
00108 ucn_string us(len,0xbeef);
00109
00110 ucn_string::const_iterator it = str.begin();
00111 ucn_string::const_iterator end = str.end();
00112 ucn_string::iterator sit = us.begin();
00113 while (true) {
00114 if (fstate == PASS) {
00115 fstate = BEGIN;
00116 } else if (it != end) {
00117 u = *it;
00118 ++it;
00119 } else break;
00120
00121 switch (fstate) {
00122 case BEGIN:
00123 if (u == character::ascii_backslash) {
00124 fstate = BACK;
00125 } else {
00126 fstate = TRANSLATE;
00127 }
00128 break;
00129 case BACK:
00130 switch (u) {
00131 case character::ascii_lower_x:
00132 count = 1;
00133 value = 0;
00134 fstate = HEX;
00135 break;
00136 case character::ascii_quote:
00137 case character::ascii_dquote:
00138 case character::ascii_qmark:
00139 case character::ascii_backslash:
00140 fstate = TRANSLATE;
00141 break;
00142 case character::ascii_lower_a:
00143 u = character::ascii_bell;
00144 break;
00145 case character::ascii_lower_b:
00146 u = character::ascii_backspace;
00147 fstate = TRANSLATE;
00148 break;
00149 case character::ascii_lower_f:
00150 u = character::ascii_form_feed;
00151 fstate = TRANSLATE;
00152 break;
00153 case character::ascii_lower_n:
00154 u = character::ascii_new_line;
00155 fstate = TRANSLATE;
00156 break;
00157 case character::ascii_lower_r:
00158 u = character::ascii_carriage_return;
00159 fstate = TRANSLATE;
00160 break;
00161 case character::ascii_lower_t:
00162 u = character::ascii_tab;
00163 fstate = TRANSLATE;
00164 break;
00165 case character::ascii_lower_v:
00166 u = character::ascii_vtab;
00167 fstate = TRANSLATE;
00168 break;
00169 default:
00170 lassert(character::is_odigit(u));
00171 value = character::extract_odigit(u);
00172 count = 2;
00173 fstate = OCT;
00174 break;
00175 }
00176 break;
00177 case OCT:
00178 if (character::is_odigit(u)) {
00179 value = (value << 3) + character::extract_odigit(u);
00180 if (--count == 0) {
00181 *sit = character::create_external(value);
00182 ++sit;
00183 fstate = BEGIN;
00184 }
00185 } else {
00186 *sit = character::create_external(value);
00187 ++sit;
00188 fstate = PASS;
00189 }
00190 break;
00191 case HEX:
00192 if (character::is_xdigit(u)) {
00193 value = (value << 4) + character::extract_xdigit(u);
00194 } else {
00195 lassert(count);
00196 *sit = character::create_external(value);
00197 ++sit;
00198 fstate = PASS;
00199 }
00200 break;
00201 default:
00202 lassert(false);
00203 break;
00204 }
00205
00206 if (fstate == TRANSLATE) {
00207
00208 if (!character::is_ascii7(u)) {
00209
00210 (void)loc;
00211 u = character::ascii_qmark;
00212 }
00213 *sit = character::extract_value(u);
00214 ++sit;
00215
00216 fstate = BEGIN;
00217 }
00218 }
00219
00220 switch (fstate) {
00221 case BEGIN:
00222 break;
00223 case OCT:
00224 *sit = character::create_external(value);
00225 ++sit;
00226 break;
00227 case HEX:
00228 lassert(count);
00229 *sit = character::create_external(value);
00230 ++sit;
00231 break;
00232 default:
00233 lassert2(false,"You should never get here");
00234 }
00235
00236
00237 return token_value::create(ucn_string(us.begin(),sit));
00238 }
00239
00240
00241
00242
00243
00244
00245
00246 ptr<string_translator> string_translator::create(const ptr<pp_filter> &a_input)
00247 {
00248 return new string_translator(a_input);
00249 }
00250
00251 end_package(lex);
00252 end_package(cplus);
00253 end_package(lang);
00254 end_package(lestes);
00255
00256