name_mangler.cc

Go to the documentation of this file.
00001 /*!
00002         \file
00003         \brief Name mangler.
00004         \author jaz
00005 */
00006 
00007 #include <lestes/md/symbols/name_mangler.g.hh>
00008 #include <lestes/lang/cplus/sem/ss_decl2lstring_base.g.hh>
00009 #include <lestes/lang/cplus/sem/ss_declaration.g.hh>
00010 #include <lestes/lang/cplus/sem/ss_decl_name.g.hh>
00011 #include <lestes/lang/cplus/sem/ss_decl_name2lstring_base.g.hh>
00012 #include <iostream>
00013 #include <sstream>
00014 
00015 package(lestes);
00016 package(md);
00017 package(symbols);
00018 
00019 /*
00020 
00021 A grammar used in GNU gcc in name mangling process
00022 
00023 ------------
00024    Check whether a substitution should be used to represent NODE in
00025    the mangling.
00026 
00027    First, check standard special-case substitutions.
00028 
00029 *** <substitution> ::= St     # ::std
00030 
00031                     ::= Sa   # ::std::allocator
00032 
00033                     ::= Sb   # ::std::basic_string
00034 
00035                     ::= Ss   # ::std::basic_string<char, ::std::char_traits<char>, ::std::allocator<char> >
00036 
00037                     ::= Si   # ::std::basic_istream<char, ::std::char_traits<char> >
00038 
00039                     ::= So   # ::std::basic_ostream<char, ::std::char_traits<char> >
00040 
00041                     ::= Sd   # ::std::basic_iostream<char, ::std::char_traits<char> >   
00042 
00043    Then examine the stack of currently available substitution
00044    candidates for entities appearing earlier in the same mangling
00045 
00046    If a substitution is found, write its mangled representation and
00047    return nonzero.  If none is found, just return zero.
00048 -------------
00049    
00050 *** <mangled-name>   ::= _Z <encoding>  
00051 
00052 -------------
00053 
00054 *** <encoding>          ::= <function name> <bare-function-type>
00055                                         ::= <data name> 
00056 -------------
00057                         
00058 *** <name>                      ::= <unscoped-name>
00059                                 ::= <unscoped-template-name> <template-args>
00060                                         ::= <nested-name>
00061                                         ::= <local-name>  
00062 
00063    If IGNORE_LOCAL_SCOPE is nonzero, this production of <name> is
00064    called from <local-name>, which mangles the enclosing scope
00065    elsewhere and then uses this function to mangle just the part
00066    underneath the function scope.  So don't use the <local-name>
00067    production, to avoid an infinite recursion.  
00068 -------------
00069    
00070 *** <unscoped-name> ::= <unqualified-name>
00071                         ::= St <unqualified-name>   # ::std::  
00072 -------------
00073 
00074 *** <unscoped-template-name>    ::= <unscoped-name>
00075                                 ::= <substitution>  
00076 ------------
00077                          
00078   Write the nested name, including CV-qualifiers, of DECL.
00079 
00080 ***  <nested-name>      ::= N [<CV-qualifiers>] <prefix> <unqualified-name> E  
00081                         ::= N [<CV-qualifiers>] <template-prefix> <template-args> E
00082 
00083 ***   <CV-qualifiers> ::= [r] [V] [K] 
00084 ------------
00085    
00086 *** <prefix>    ::= <prefix> <unqualified-name>
00087                 ::= <template-param>
00088                 ::= <template-prefix> <template-args>
00089                         ::= # empty
00090                         ::= <substitution> 
00091 ------------
00092 
00093 *** <template-prefix>   ::= <prefix> <template component>
00094                         ::= <template-param>
00095                         ::= <substitution>  
00096 ------------
00097 
00098    We don't need to handle thunks, vtables, or VTTs here.  Those are
00099    mangled through special entry points.  
00100 
00101 *** <unqualified-name>  ::= <operator-name>
00102                                                 ::= <special-name>  
00103                                                 ::= <source-name>
00104                                                 
00105 ____________
00106                         
00107   Non-termial <source-name>.  IDENTIFIER is an IDENTIFIER_NODE.  
00108 
00109 ***  <source-name> ::= </length/ number> <identifier> 
00110 
00111 ------------
00112      
00113   Non-terminal <number>.
00114 
00115 ***  <number> ::= [n] </decimal integer/> 
00116 
00117 ------------
00118      
00119   Non-terminal <identifier>.
00120 
00121      <identifier> ::= </unqualified source code identifier> 
00122 ------------
00123 
00124   Handle constructor productions of non-terminal <special-name>.
00125    CTOR is a constructor FUNCTION_DECL. 
00126 
00127 ***  <special-name> ::= C1   # complete object constructor
00128                     ::= C2   # base object constructor
00129                     ::= C3   # complete object allocating constructor
00130 
00131    Currently, allocating constructors are never used. 
00132 
00133    We also need to provide mangled names for the maybe-in-charge
00134    constructor, so we treat it here too.  mangle_decl_string will
00135    append *INTERNAL* to that, to make sure we never emit it. 
00136    
00137 ------------  
00138    
00139    Handle destructor productions of non-terminal <special-name>.
00140    DTOR is a destructor FUNCTION_DECL. 
00141 
00142 ***  <special-name> ::= D0 # deleting (in-charge) destructor
00143                     ::= D1 # complete object (in-charge) destructor
00144                     ::= D2 # base object (not-in-charge) destructor
00145 
00146    We also need to provide mangled names for the maybe-incharge
00147    destructor, so we treat it here too.  mangle_decl_string will
00148    append *INTERNAL* to that, to make sure we never emit it.  
00149    
00150 --------------
00151 
00152 ***   <discriminator> := _ <number>   
00153 
00154    The discriminator is used only for the second and later occurrences
00155    of the same name within a single function. In this case <number> is
00156    n - 2, if this is the nth occurrence, in lexical order.  
00157    
00158 ---------------
00159    
00160    Mangle the name of a function-scope entity.  FUNCTION is the
00161    FUNCTION_DECL for the enclosing function.  ENTITY is the decl for
00162    the entity itself.  LOCAL_ENTITY is the entity that's directly
00163    scoped in FUNCTION_DECL, either ENTITY itself or an enclosing scope
00164    of ENTITY.
00165 
00166 ***  <local-name> := Z <function encoding> E <entity name> [<discriminator>]
00167                   := Z <function encoding> E s [<discriminator>]  
00168                                   
00169 -------------
00170 
00171  Non-terminals <type> and <CV-qualifier>.  
00172 
00173 ***  <type> ::= <builtin-type>
00174             ::= <function-type>
00175             ::= <class-enum-type>
00176             ::= <array-type>
00177             ::= <pointer-to-member-type>
00178             ::= <template-param>
00179             ::= <substitution>
00180             ::= <CV-qualifier>
00181             ::= P <type>    # pointer-to
00182             ::= R <type>    # reference-to
00183             ::= C <type>    # complex pair (C 2000)
00184             ::= G <type>    # imaginary (C 2000)     [not supported]
00185             ::= U <source-name> <type>   # vendor extended type qualifier 
00186 
00187    TYPE is a type node.  
00188    
00189 ---------------
00190    
00191    Non-terminal <CV-qualifiers> for type nodes.  Returns the number of
00192    CV-qualifiers written for TYPE.
00193 
00194 *** <CV-qualifiers> ::= [r] [V] [K]  
00195          
00196 ---------------
00197      
00198   Non-terminal <builtin-type>. 
00199 
00200 ***  <builtin-type> ::= v   # void 
00201                     ::= b   # bool
00202                     ::= w   # wchar_t
00203                     ::= c   # char
00204                     ::= a   # signed char
00205                     ::= h   # unsigned char
00206                     ::= s   # short
00207                     ::= t   # unsigned short
00208                     ::= i   # int
00209                     ::= j   # unsigned int
00210                     ::= l   # long
00211                     ::= m   # unsigned long
00212                     ::= x   # long long, __int64
00213                     ::= y   # unsigned long long, __int64  
00214                     ::= n   # __int128
00215                     ::= o   # unsigned __int128
00216                     ::= f   # float
00217                     ::= d   # double
00218                     ::= e   # long double, __float80 
00219                     ::= g   # __float128          [not supported]
00220                     ::= u <source-name>  # vendor extended type 
00221 ---------------
00222    Non-terminal <function-type>.  NODE is a FUNCTION_TYPE or
00223    METHOD_TYPE.  The return type is mangled before the parameter
00224    types.
00225 
00226 *** <function-type> ::= F [Y] <bare-function-type> E  
00227 
00228 --------------     
00229    Non-terminal <bare-function-type>.  TYPE is a FUNCTION_TYPE or
00230    METHOD_TYPE.  If INCLUDE_RETURN_TYPE is nonzero, the return value
00231    is mangled before the parameter types.  If non-NULL, DECL is
00232    FUNCTION_DECL for the function whose type is being emitted.
00233 
00234 ***  <bare-function-type> ::= </signature/ type>+  
00235 
00236 -------------
00237 
00238 *** <class-enum-type> ::= <name>  
00239 
00240 -------------
00241 
00242   Non-terminal <template-args>.  ARGS is a TREE_VEC of template
00243    arguments.
00244 
00245 *** <template-args> ::= I <template-arg>+ E  
00246 
00247 -------------
00248 *** <expression>        ::= <unary operator-name> <expression>
00249                                         ::= <binary operator-name> <expression> <expression>
00250                                         ::= <expr-primary>
00251 
00252 ***  <expr-primary>     ::= <template-param>
00253                                         ::= L <type> <value number> E  # literal
00254                                         ::= L <mangled-name> E         # external name  
00255                         ::= sr <type> <unqualified-name>
00256                         ::= sr <type> <unqualified-name> <template-args> 
00257 
00258 --------------
00259                  
00260  Non-terminal <tempalate-arg>.  
00261 
00262 ***  <template-arg> ::= <type>                        # type
00263                     ::= L <type> </value/ number> E   # literal
00264                     ::= LZ <name> E                   # external name
00265                     ::= X <expression> E              # expression  
00266 ---------------
00267                     
00268 *** <template-template-arg> ::= <name>
00269                                                         ::= <substitution> 
00270 
00271 ---------------
00272                 
00273   Non-terminal <array-type>.  TYPE is an ARRAY_TYPE.  
00274 
00275      <array-type> ::= A [</dimension/ number>] _ </element/ type>  
00276                   ::= A <expression> _ </element/ type>
00277 
00278      "Array types encode the dimension (number of elements) and the
00279      element type. For variable length arrays, the dimension (but not
00280      the '_' separator) is omitted."  
00281          
00282 ---------------
00283    Non-terminal <pointer-to-member-type> for pointer-to-member
00284    variables.  TYPE is a pointer-to-member POINTER_TYPE.
00285 
00286 *** <pointer-to-member-type> ::= M </class/ type> </member/ type> 
00287 
00288 ---------------
00289    Non-terminal <template-param>.  PARM is a TEMPLATE_TYPE_PARM,
00290    TEMPLATE_TEMPLATE_PARM, BOUND_TEMPLATE_TEMPLATE_PARM or a
00291    TEMPLATE_PARM_INDEX.
00292 
00293 ***  <template-param> ::= T </parameter/ number> _
00294 
00295    If we are internally mangling then we distinguish level and, for
00296    non-type parms, type too. The mangling appends
00297    
00298      </level/ number> _ </non-type type/ type> _
00299 
00300    This is used by mangle_conv_op_name_for_type. 
00301    
00302 --------------
00303 
00304    
00305 *** <template-template-param>   ::= <template-param> 
00306                                                                 ::= <substitution>  
00307 
00308 ---------------
00309   Non-terminal <substitution>.  
00310 
00311 ***  <substitution>     ::= S <seq-id> _
00312                     ::= S_  
00313                                         
00314 --------------
00315 
00316    Return an identifier for a construction vtable group.  TYPE is
00317    the most derived class in the hierarchy; BINFO is the base
00318    subobject for which this construction vtable group will be used.  
00319 
00320    This mangling isn't part of the ABI specification; in the ABI
00321    specification, the vtable group is dumped in the same COMDAT as the
00322    main vtable, and is referenced only from that vtable, so it doesn't
00323    need an external name.  For binary formats without COMDAT sections,
00324    though, we need external names for the vtable groups.  
00325 
00326    We use the production
00327 
00328 *** <special-name> ::= CT <type> <offset number> _ <base type>  
00329 
00330 -----------------
00331     
00332    Return an identifier for the mangled name of a thunk to FN_DECL.
00333    OFFSET is the initial adjustment to this used to find the vptr.  If
00334    VCALL_OFFSET is non-NULL, this is a virtual thunk, and it is the
00335    vtbl offset in bytes.  
00336 
00337 ***  <special-name> ::= Th <offset number> _ <base encoding>
00338                         ::= Tv <offset number> _ <vcall offset number> _ <base encoding>
00339 -----------------
00340 
00341 */
00342 
00343 using ::lestes::lang::cplus::sem::ss_declaration;
00344 using ::lestes::lang::cplus::sem::ss_decl2mangled_name;
00345 using ::lestes::lang::cplus::sem::ss_decl_name2mangled_name;
00346 
00347 /*!
00348         \brief Returns a singleton instance.
00349 */
00350 ptr<name_mangler> name_mangler::instance() {
00351         if ( !singleton_instance_get() ) {
00352                 singleton_instance_set(name_mangler::create());
00353         }
00354         return singleton_instance_get();
00355 }
00356 
00357 
00358 /*!
00359         \brief Replaces wide character escapes after its uppercase form.
00360         
00361         \param name The string.
00362         \return String with repacements.
00363 */
00364 lstring name_mangler::wchar_escapes_replace(lstring name)
00365 {
00366         return ::lestes::md::string_replace(::lestes::md::string_replace( name,"\\u",".u"),"\\U",".U");
00367 }
00368 
00369 
00370 /*!
00371         \brief Mangles declaration.
00372         
00373         It mangles C++ declarations only. Declarations for other languages objects are not mangled - just declaration name is 
00374         returned.
00375         
00376         \param decl The declaration.
00377         \return String identification of the declaration.
00378 */
00379 lstring name_mangler::mangle(ptr<ss_declaration> decl)
00380 {
00381         lassert(decl);
00382         lstring mangled_name;
00383         
00384         if ( decl->linkage_get()->language_get()==ucn_string("C++") ) {
00385                 /*
00386                          C++ object. 
00387                          Mangle its name according to GNU GCC mangling.
00388                 */
00389                 ptr<ss_decl2mangled_name> mangler = ss_decl2mangled_name::instance();
00390                 mangled_name = mangler->process(decl);
00391         } else {
00392                 /*
00393                          Other language extern object. 
00394                          Do not mangle it and return its name instead. ( It works for C object. )
00395                 */
00396                 mangled_name = decl->name_get()->accept_ss_decl_name2lstring_base(ss_decl_name2mangled_name::instance());
00397         }
00398         return wchar_escapes_replace(mangled_name);
00399 }
00400 
00401 end_package(symbols);
00402 end_package(md);
00403 end_package(lestes);
00404 

Generated on Mon Feb 12 18:22:41 2007 for lestes by doxygen 1.5.1-20070107