encoder_utf8.test.cc

Go to the documentation of this file.
00001 /*
00002    The lestes compiler suite
00003    Copyright (C) 2002, 2003, 2004, 2005 Miroslav Tichy
00004    Copyright (C) 2002, 2003, 2004, 2005 Petr Zika
00005    Copyright (C) 2002, 2003, 2004, 2005 Vojtech Hala
00006    Copyright (C) 2002, 2003, 2004, 2005 Jiri Kosina
00007    Copyright (C) 2002, 2003, 2004, 2005 Pavel Sanda
00008    Copyright (C) 2002, 2003, 2004, 2005 Jan Zouhar
00009    Copyright (C) 2002, 2003, 2004, 2005 Rudolf Thomas
00010 
00011    This program is free software; you can redistribute it and/or modify
00012    it under the terms of the GNU General Public License as published by
00013    the Free Software Foundation; version 2 of the License.
00014 
00015    This program is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018    GNU General Public License for more details.
00019 
00020    See the full text of the GNU General Public License version 2, and
00021    the limitations in the file doc/LICENSE.
00022 
00023    By accepting the license the licensee waives any and all claims
00024    against the copyright holder(s) related in whole or in part to the
00025    work, its use, and/or the inability to use it.
00026  
00027  */
00028 /*! \file
00029   Unit test for class encoder_utf8.
00030   \author pt
00031 */
00032 #include <lestes/common.hh>
00033 #include <lestes/lang/cplus/lex/special_tokens.hh>
00034 #include <lestes/lang/cplus/lex/string_source.hh>
00035 #include <lestes/lang/cplus/lex/encoder_utf8.hh>
00036 #include <lestes/std/file_info.hh>
00037 #include <lestes/std/source_location.hh>
00038 #include <string>
00039 
00040 package(lestes);
00041 package(lang);
00042 package(cplus);
00043 package(lex);
00044 
00045 using namespace ::std;
00046 
00047 #define TEST_CNT 47
00048 
00049 void encoder_utf8_test(void)
00050 {
00051         char *in[TEST_CNT] = {
00052                 
00053                         /* disallowed 1st */
00054                         "\x80"
00055                 ,
00056                         /* disallowed 1st */
00057                         "\xC0"
00058                 ,
00059                         /* disallowed 1st */
00060                         "\xF5"
00061                 ,
00062                         /* missing 2nd */
00063                         "\xC2"
00064                 ,
00065                         /* disallowed 2nd */
00066                         "\xC2"
00067                         "\x7F"
00068                 ,
00069                         /* disallowed 2nd */
00070                         "\xC2"
00071                         "\xC0"
00072                 ,
00073                         /* missing 2nd */
00074                         "\xE0"
00075                 ,
00076                         /* missing 3rd */
00077                         "\xE0"
00078                         "\xA0"
00079                 ,
00080                         /* disallowed 2nd */
00081                         "\xE0"
00082                         "\x9F"
00083                         "\x80"
00084                 ,
00085                         /* disallowed 2nd */
00086                         "\xE0"
00087                         "\xC0"
00088                         "\x80"
00089                 ,
00090                         /* disallowed 3rd */
00091                         "\xE0"
00092                         "\xA0"
00093                         "\x7F"
00094                 ,
00095                         /* disallowed 3rd */
00096                         "\xE0"
00097                         "\xA0"
00098                         "\xC0"
00099                 ,
00100                         /* disallowed 2nd */
00101                         "\xE1"
00102                         "\x7F"
00103                         "\x80"
00104                 ,
00105                         /* disallowed 2nd */
00106                         "\xE1"
00107                         "\xC0"
00108                         "\x80"
00109                 ,
00110                         /* disallowed 3rd */
00111                         "\xE1"
00112                         "\x80"
00113                         "\x7F"
00114                 ,
00115                         /* disallowed 3rd */
00116                         "\xE1"
00117                         "\x80"
00118                         "\xC0"
00119                 ,
00120                         /* disallowed 2nd */
00121                         "\xED"
00122                         "\x7F"
00123                         "\x80"
00124                 ,
00125                         /* disallowed 2nd */
00126                         "\xED"
00127                         "\xA0"
00128                         "\x80"
00129                 ,
00130                         /* disallowed 3rd */
00131                         "\xED"
00132                         "\x80"
00133                         "\x7F"
00134                 ,
00135                         /* disallowed 3rd */
00136                         "\xED"
00137                         "\x80"
00138                         "\xC0"
00139                 ,
00140                         /* disallowed 2nd */
00141                         "\xEE"
00142                         "\x7F"
00143                         "\x80"
00144                 ,
00145                         /* disallowed 2nd */
00146                         "\xEE"
00147                         "\xC0"
00148                         "\x80"
00149                 ,
00150                         /* disallowed 3rd */
00151                         "\xEE"
00152                         "\x80"
00153                         "\x7F"
00154                 ,
00155                         /* disallowed 3rd */
00156                         "\xEE"
00157                         "\x80"
00158                         "\xC0"
00159                 ,
00160                         /* missing 2nd */
00161                         "\xF0"
00162                 ,
00163                         /* missing 3rd */
00164                         "\xF0"
00165                         "\x90"
00166                 ,
00167                         /* missing 4th */
00168                         "\xF0"
00169                         "\x90"
00170                         "\x80"
00171                 ,
00172                         /* disallowed 2nd */
00173                         "\xF0"
00174                         "\x8F"
00175                         "\x80"
00176                         "\x80"
00177                 ,
00178                         /* disallowed 2nd */
00179                         "\xF0"
00180                         "\xC0"
00181                         "\x80"
00182                         "\x80"
00183                 ,
00184                         /* disallowed 3rd */
00185                         "\xF0"
00186                         "\x90"
00187                         "\x7F"
00188                         "\x80"
00189                 ,
00190                         /* disallowed 3rd */
00191                         "\xF0"
00192                         "\x90"
00193                         "\xC0"
00194                         "\x80"
00195                 ,
00196                         /* disallowed 4th */
00197                         "\xF0"
00198                         "\x90"
00199                         "\x80"
00200                         "\x7F"
00201                 ,
00202                         /* disallowed 4th */
00203                         "\xF0"
00204                         "\x90"
00205                         "\x80"
00206                         "\xC0"
00207                 ,
00208                         /* disallowed 2nd */
00209                         "\xF1"
00210                         "\x7F"
00211                         "\x80"
00212                         "\x80"
00213                 ,
00214                         /* disallowed 2nd */
00215                         "\xF1"
00216                         "\xC0"
00217                         "\x80"
00218                         "\x80"
00219                 ,
00220                         /* disallowed 3rd */
00221                         "\xF1"
00222                         "\x80"
00223                         "\x7F"
00224                         "\x80"
00225                 ,
00226                         /* disallowed 3rd */
00227                         "\xF1"
00228                         "\x80"
00229                         "\xC0"
00230                         "\x80"
00231                 ,
00232                         /* disallowed 4th */
00233                         "\xF1"
00234                         "\x80"
00235                         "\x80"
00236                         "\x7F"
00237                 ,
00238                         /* disallowed 4th */
00239                         "\xF1"
00240                         "\x80"
00241                         "\x80"
00242                         "\xC0"
00243                 ,
00244                         /* disallowed 2nd */
00245                         "\xF4"
00246                         "\x7F"
00247                         "\x80"
00248                         "\x80"
00249                 ,
00250                         /* disallowed 2nd */
00251                         "\xF4"
00252                         "\x90"
00253                         "\x80"
00254                         "\x80"
00255                 ,
00256                         /* disallowed 3rd */
00257                         "\xF4"
00258                         "\x80"
00259                         "\x7F"
00260                         "\x80"
00261                 ,
00262                         /* disallowed 3rd */
00263                         "\xF4"
00264                         "\x80"
00265                         "\xC0"
00266                         "\x80"
00267                 ,
00268                         /* disallowed 4th */
00269                         "\xF4"
00270                         "\x80"
00271                         "\x80"
00272                         "\x7F"
00273                 ,
00274                         /* disallowed 4th */
00275                         "\xF4"
00276                         "\x80"
00277                         "\x80"
00278                         "\xC0"
00279                 ,
00280                         /* ordinary characters */
00281                         "a"
00282                         "z"
00283                 ,
00284                         /* Czech characters */
00285                         "\xC3\xA1"
00286                         "\xC4\x8D"
00287                         "\xC4\x8F"
00288                         "\xC3\xA9"
00289                         "\xC4\x9B"
00290                         "\xC3\xAD"
00291                         "\xC5\x88"
00292                         "\xC3\xB3"
00293                         "\xC5\x99"
00294                         "\xC5\xA1"
00295                         "\xC5\xA5"
00296                         "\xC3\xBA"
00297                         "\xC5\xAF"
00298                         "\xC5\xBE"
00299         };
00300 
00301         ucn_token_type out[] = {
00302                 ucn_token::TOK_ERROR,
00303                 ucn_token::TOK_ERROR,
00304                 ucn_token::TOK_ERROR,
00305                 ucn_token::TOK_ERROR,
00306                 ucn_token::TOK_ERROR,
00307                 ucn_token::TOK_ERROR,
00308                 ucn_token::TOK_ERROR,
00309                 ucn_token::TOK_ERROR,
00310                 ucn_token::TOK_ERROR,
00311                 ucn_token::TOK_ERROR,
00312                 ucn_token::TOK_ERROR,
00313                 ucn_token::TOK_ERROR,
00314                 ucn_token::TOK_ERROR,
00315                 ucn_token::TOK_ERROR,
00316                 ucn_token::TOK_ERROR,
00317                 ucn_token::TOK_ERROR,
00318                 ucn_token::TOK_ERROR,
00319                 ucn_token::TOK_ERROR,
00320                 ucn_token::TOK_ERROR,
00321                 ucn_token::TOK_ERROR,
00322                 ucn_token::TOK_ERROR,
00323                 ucn_token::TOK_ERROR,
00324                 ucn_token::TOK_ERROR,
00325                 ucn_token::TOK_ERROR,
00326                 ucn_token::TOK_ERROR,
00327                 ucn_token::TOK_ERROR,
00328                 ucn_token::TOK_ERROR,
00329                 ucn_token::TOK_ERROR,
00330                 ucn_token::TOK_ERROR,
00331                 ucn_token::TOK_ERROR,
00332                 ucn_token::TOK_ERROR,
00333                 ucn_token::TOK_ERROR,
00334                 ucn_token::TOK_ERROR,
00335                 ucn_token::TOK_ERROR,
00336                 ucn_token::TOK_ERROR,
00337                 ucn_token::TOK_ERROR,
00338                 ucn_token::TOK_ERROR,
00339                 ucn_token::TOK_ERROR,
00340                 ucn_token::TOK_ERROR,
00341                 ucn_token::TOK_ERROR,
00342                 ucn_token::TOK_ERROR,
00343                 ucn_token::TOK_ERROR,
00344                 ucn_token::TOK_ERROR,
00345                 ucn_token::TOK_ERROR,
00346                 ucn_token::TOK_ERROR,
00347                 ucn_token::TOK_NOT_EOF,
00348                 ucn_token::TOK_NOT_EOF,
00349                 ucn_token::TOK_EOF,
00350                 ucn_token::TOK_NOT_EOF,
00351                 ucn_token::TOK_NOT_EOF,
00352                 ucn_token::TOK_NOT_EOF,
00353                 ucn_token::TOK_NOT_EOF,
00354                 ucn_token::TOK_NOT_EOF,
00355                 ucn_token::TOK_NOT_EOF,
00356                 ucn_token::TOK_NOT_EOF,
00357                 ucn_token::TOK_NOT_EOF,
00358                 ucn_token::TOK_NOT_EOF,
00359                 ucn_token::TOK_NOT_EOF,
00360                 ucn_token::TOK_NOT_EOF,
00361                 ucn_token::TOK_NOT_EOF,
00362                 ucn_token::TOK_NOT_EOF,
00363                 ucn_token::TOK_NOT_EOF,
00364                 ucn_token::TOK_EOF
00365         };
00366 
00367         ptr<file_info> fi = file_info::create(string("abc"),NULL);
00368         ptr<ucn_token> tok;
00369         ucn_token_type utt;
00370         ulint test, i;
00371         
00372         for (i = test = 0; test < TEST_CNT; test++) {
00373                 ptr<data_source> ds = string_source::create(string_source::string_type(in[test]));
00374                 ptr<encoder_utf8> enc = encoder_utf8::create();
00375 
00376                 enc->input_set(ds);
00377                                 
00378                 while (true) {
00379                         tok = enc->read();
00380                         utt = tok->type_get();
00381                         lassert(utt == out[i]);
00382                         i++;
00383                         if (utt == ucn_token::TOK_EOF || utt == ucn_token::TOK_ERROR) break;
00384                 }
00385         }
00386 }
00387 end_package(lex);
00388 end_package(cplus);
00389 end_package(lang);
00390 end_package(lestes);
00391 
00392 int main(void)
00393 {
00394 	::lestes::lang::cplus::lex::encoder_utf8_test();
00395         return 0;
00396 }
00397 /* vim: set ft=lestes : */

Generated on Mon Feb 12 18:22:33 2007 for lestes by doxygen 1.5.1-20070107