00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <lestes/common.hh>
00033 #include <lestes/lang/cplus/lex/special_tokens.hh>
00034 #include <lestes/lang/cplus/lex/string_source.hh>
00035 #include <lestes/lang/cplus/lex/encoder_utf8.hh>
00036 #include <lestes/std/file_info.hh>
00037 #include <lestes/std/source_location.hh>
00038 #include <string>
00039
00040 package(lestes);
00041 package(lang);
00042 package(cplus);
00043 package(lex);
00044
00045 using namespace ::std;
00046
00047 #define TEST_CNT 47
00048
00049 void encoder_utf8_test(void)
00050 {
00051 char *in[TEST_CNT] = {
00052
00053
00054 "\x80"
00055 ,
00056
00057 "\xC0"
00058 ,
00059
00060 "\xF5"
00061 ,
00062
00063 "\xC2"
00064 ,
00065
00066 "\xC2"
00067 "\x7F"
00068 ,
00069
00070 "\xC2"
00071 "\xC0"
00072 ,
00073
00074 "\xE0"
00075 ,
00076
00077 "\xE0"
00078 "\xA0"
00079 ,
00080
00081 "\xE0"
00082 "\x9F"
00083 "\x80"
00084 ,
00085
00086 "\xE0"
00087 "\xC0"
00088 "\x80"
00089 ,
00090
00091 "\xE0"
00092 "\xA0"
00093 "\x7F"
00094 ,
00095
00096 "\xE0"
00097 "\xA0"
00098 "\xC0"
00099 ,
00100
00101 "\xE1"
00102 "\x7F"
00103 "\x80"
00104 ,
00105
00106 "\xE1"
00107 "\xC0"
00108 "\x80"
00109 ,
00110
00111 "\xE1"
00112 "\x80"
00113 "\x7F"
00114 ,
00115
00116 "\xE1"
00117 "\x80"
00118 "\xC0"
00119 ,
00120
00121 "\xED"
00122 "\x7F"
00123 "\x80"
00124 ,
00125
00126 "\xED"
00127 "\xA0"
00128 "\x80"
00129 ,
00130
00131 "\xED"
00132 "\x80"
00133 "\x7F"
00134 ,
00135
00136 "\xED"
00137 "\x80"
00138 "\xC0"
00139 ,
00140
00141 "\xEE"
00142 "\x7F"
00143 "\x80"
00144 ,
00145
00146 "\xEE"
00147 "\xC0"
00148 "\x80"
00149 ,
00150
00151 "\xEE"
00152 "\x80"
00153 "\x7F"
00154 ,
00155
00156 "\xEE"
00157 "\x80"
00158 "\xC0"
00159 ,
00160
00161 "\xF0"
00162 ,
00163
00164 "\xF0"
00165 "\x90"
00166 ,
00167
00168 "\xF0"
00169 "\x90"
00170 "\x80"
00171 ,
00172
00173 "\xF0"
00174 "\x8F"
00175 "\x80"
00176 "\x80"
00177 ,
00178
00179 "\xF0"
00180 "\xC0"
00181 "\x80"
00182 "\x80"
00183 ,
00184
00185 "\xF0"
00186 "\x90"
00187 "\x7F"
00188 "\x80"
00189 ,
00190
00191 "\xF0"
00192 "\x90"
00193 "\xC0"
00194 "\x80"
00195 ,
00196
00197 "\xF0"
00198 "\x90"
00199 "\x80"
00200 "\x7F"
00201 ,
00202
00203 "\xF0"
00204 "\x90"
00205 "\x80"
00206 "\xC0"
00207 ,
00208
00209 "\xF1"
00210 "\x7F"
00211 "\x80"
00212 "\x80"
00213 ,
00214
00215 "\xF1"
00216 "\xC0"
00217 "\x80"
00218 "\x80"
00219 ,
00220
00221 "\xF1"
00222 "\x80"
00223 "\x7F"
00224 "\x80"
00225 ,
00226
00227 "\xF1"
00228 "\x80"
00229 "\xC0"
00230 "\x80"
00231 ,
00232
00233 "\xF1"
00234 "\x80"
00235 "\x80"
00236 "\x7F"
00237 ,
00238
00239 "\xF1"
00240 "\x80"
00241 "\x80"
00242 "\xC0"
00243 ,
00244
00245 "\xF4"
00246 "\x7F"
00247 "\x80"
00248 "\x80"
00249 ,
00250
00251 "\xF4"
00252 "\x90"
00253 "\x80"
00254 "\x80"
00255 ,
00256
00257 "\xF4"
00258 "\x80"
00259 "\x7F"
00260 "\x80"
00261 ,
00262
00263 "\xF4"
00264 "\x80"
00265 "\xC0"
00266 "\x80"
00267 ,
00268
00269 "\xF4"
00270 "\x80"
00271 "\x80"
00272 "\x7F"
00273 ,
00274
00275 "\xF4"
00276 "\x80"
00277 "\x80"
00278 "\xC0"
00279 ,
00280
00281 "a"
00282 "z"
00283 ,
00284
00285 "\xC3\xA1"
00286 "\xC4\x8D"
00287 "\xC4\x8F"
00288 "\xC3\xA9"
00289 "\xC4\x9B"
00290 "\xC3\xAD"
00291 "\xC5\x88"
00292 "\xC3\xB3"
00293 "\xC5\x99"
00294 "\xC5\xA1"
00295 "\xC5\xA5"
00296 "\xC3\xBA"
00297 "\xC5\xAF"
00298 "\xC5\xBE"
00299 };
00300
00301 ucn_token_type out[] = {
00302 ucn_token::TOK_ERROR,
00303 ucn_token::TOK_ERROR,
00304 ucn_token::TOK_ERROR,
00305 ucn_token::TOK_ERROR,
00306 ucn_token::TOK_ERROR,
00307 ucn_token::TOK_ERROR,
00308 ucn_token::TOK_ERROR,
00309 ucn_token::TOK_ERROR,
00310 ucn_token::TOK_ERROR,
00311 ucn_token::TOK_ERROR,
00312 ucn_token::TOK_ERROR,
00313 ucn_token::TOK_ERROR,
00314 ucn_token::TOK_ERROR,
00315 ucn_token::TOK_ERROR,
00316 ucn_token::TOK_ERROR,
00317 ucn_token::TOK_ERROR,
00318 ucn_token::TOK_ERROR,
00319 ucn_token::TOK_ERROR,
00320 ucn_token::TOK_ERROR,
00321 ucn_token::TOK_ERROR,
00322 ucn_token::TOK_ERROR,
00323 ucn_token::TOK_ERROR,
00324 ucn_token::TOK_ERROR,
00325 ucn_token::TOK_ERROR,
00326 ucn_token::TOK_ERROR,
00327 ucn_token::TOK_ERROR,
00328 ucn_token::TOK_ERROR,
00329 ucn_token::TOK_ERROR,
00330 ucn_token::TOK_ERROR,
00331 ucn_token::TOK_ERROR,
00332 ucn_token::TOK_ERROR,
00333 ucn_token::TOK_ERROR,
00334 ucn_token::TOK_ERROR,
00335 ucn_token::TOK_ERROR,
00336 ucn_token::TOK_ERROR,
00337 ucn_token::TOK_ERROR,
00338 ucn_token::TOK_ERROR,
00339 ucn_token::TOK_ERROR,
00340 ucn_token::TOK_ERROR,
00341 ucn_token::TOK_ERROR,
00342 ucn_token::TOK_ERROR,
00343 ucn_token::TOK_ERROR,
00344 ucn_token::TOK_ERROR,
00345 ucn_token::TOK_ERROR,
00346 ucn_token::TOK_ERROR,
00347 ucn_token::TOK_NOT_EOF,
00348 ucn_token::TOK_NOT_EOF,
00349 ucn_token::TOK_EOF,
00350 ucn_token::TOK_NOT_EOF,
00351 ucn_token::TOK_NOT_EOF,
00352 ucn_token::TOK_NOT_EOF,
00353 ucn_token::TOK_NOT_EOF,
00354 ucn_token::TOK_NOT_EOF,
00355 ucn_token::TOK_NOT_EOF,
00356 ucn_token::TOK_NOT_EOF,
00357 ucn_token::TOK_NOT_EOF,
00358 ucn_token::TOK_NOT_EOF,
00359 ucn_token::TOK_NOT_EOF,
00360 ucn_token::TOK_NOT_EOF,
00361 ucn_token::TOK_NOT_EOF,
00362 ucn_token::TOK_NOT_EOF,
00363 ucn_token::TOK_NOT_EOF,
00364 ucn_token::TOK_EOF
00365 };
00366
00367 ptr<file_info> fi = file_info::create(string("abc"),NULL);
00368 ptr<ucn_token> tok;
00369 ucn_token_type utt;
00370 ulint test, i;
00371
00372 for (i = test = 0; test < TEST_CNT; test++) {
00373 ptr<data_source> ds = string_source::create(string_source::string_type(in[test]));
00374 ptr<encoder_utf8> enc = encoder_utf8::create();
00375
00376 enc->input_set(ds);
00377
00378 while (true) {
00379 tok = enc->read();
00380 utt = tok->type_get();
00381 lassert(utt == out[i]);
00382 i++;
00383 if (utt == ucn_token::TOK_EOF || utt == ucn_token::TOK_ERROR) break;
00384 }
00385 }
00386 }
00387 end_package(lex);
00388 end_package(cplus);
00389 end_package(lang);
00390 end_package(lestes);
00391
00392 int main(void)
00393 {
00394 ::lestes::lang::cplus::lex::encoder_utf8_test();
00395 return 0;
00396 }
00397