From 82e6fc0266e21905358e286074437c7c17cafadc Mon Sep 17 00:00:00 2001 From: David Reiss Date: Thu, 26 Mar 2009 23:32:36 +0000 Subject: [PATCH] THRIFT-153. Proper handling of strings with escapes (in IDL) - Recognize and parse escape characters in .thrift files. - Escape strings used as constants in generated source files. git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@758922 13f79535-47bb-0310-9956-ffa450edef68 --- .../cpp/src/generate/t_cocoa_generator.cc | 2 +- compiler/cpp/src/generate/t_cpp_generator.cc | 2 +- .../cpp/src/generate/t_csharp_generator.cc | 2 +- compiler/cpp/src/generate/t_erl_generator.cc | 2 +- compiler/cpp/src/generate/t_generator.cc | 13 ++++ compiler/cpp/src/generate/t_generator.h | 20 ++++++ compiler/cpp/src/generate/t_hs_generator.cc | 2 +- compiler/cpp/src/generate/t_html_generator.cc | 8 ++- compiler/cpp/src/generate/t_java_generator.cc | 2 +- .../cpp/src/generate/t_ocaml_generator.cc | 2 +- compiler/cpp/src/generate/t_perl_generator.cc | 5 +- compiler/cpp/src/generate/t_php_generator.cc | 3 +- compiler/cpp/src/generate/t_py_generator.cc | 2 +- compiler/cpp/src/generate/t_rb_generator.cc | 2 +- compiler/cpp/src/generate/t_st_generator.cc | 2 +- compiler/cpp/src/thriftl.ll | 62 +++++++++++++++---- 16 files changed, 105 insertions(+), 26 deletions(-) diff --git a/compiler/cpp/src/generate/t_cocoa_generator.cc b/compiler/cpp/src/generate/t_cocoa_generator.cc index f2121ca8..04b0ad7a 100644 --- a/compiler/cpp/src/generate/t_cocoa_generator.cc +++ b/compiler/cpp/src/generate/t_cocoa_generator.cc @@ -1794,7 +1794,7 @@ string t_cocoa_generator::render_const_value(string name, t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - render << "@\"" + value->get_string() + "\""; + render << "@\"" << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: render << ((value->get_integer() > 0) ? "YES" : "NO"); diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc index 5ad390a7..c9b20557 100644 --- a/compiler/cpp/src/generate/t_cpp_generator.cc +++ b/compiler/cpp/src/generate/t_cpp_generator.cc @@ -538,7 +538,7 @@ string t_cpp_generator::render_const_value(ofstream& out, string name, t_type* t t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - render << "\"" + value->get_string() + "\""; + render << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: render << ((value->get_integer() > 0) ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_csharp_generator.cc b/compiler/cpp/src/generate/t_csharp_generator.cc index 63879064..f4c6f89a 100644 --- a/compiler/cpp/src/generate/t_csharp_generator.cc +++ b/compiler/cpp/src/generate/t_csharp_generator.cc @@ -330,7 +330,7 @@ std::string t_csharp_generator::render_const_value(ofstream& out, string name, t t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - render << "\"" + value->get_string() + "\""; + render << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: render << ((value->get_integer() > 0) ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_erl_generator.cc b/compiler/cpp/src/generate/t_erl_generator.cc index e7648baf..1027cd29 100644 --- a/compiler/cpp/src/generate/t_erl_generator.cc +++ b/compiler/cpp/src/generate/t_erl_generator.cc @@ -315,7 +315,7 @@ string t_erl_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "\"" << value->get_string() << "\""; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_generator.cc b/compiler/cpp/src/generate/t_generator.cc index 6766cd16..a04446b5 100644 --- a/compiler/cpp/src/generate/t_generator.cc +++ b/compiler/cpp/src/generate/t_generator.cc @@ -59,6 +59,19 @@ void t_generator::generate_program() { close_generator(); } +string t_generator::escape_string(const string &in) const { + string result = ""; + for (string::const_iterator it = in.begin(); it < in.end(); it++) { + std::map::const_iterator res = escape_.find(*it); + if (res != escape_.end()) { + result.append(res->second); + } else { + result.push_back(*it); + } + } + return result; +} + void t_generator::generate_consts(vector consts) { vector::iterator c_iter; for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) { diff --git a/compiler/cpp/src/generate/t_generator.h b/compiler/cpp/src/generate/t_generator.h index 060e6565..8fdf5ff1 100644 --- a/compiler/cpp/src/generate/t_generator.h +++ b/compiler/cpp/src/generate/t_generator.h @@ -27,6 +27,11 @@ class t_generator { indent_ = 0; program_ = program; program_name_ = get_program_name(program); + escape_['\n'] = "\\n"; + escape_['\r'] = "\\r"; + escape_['\t'] = "\\t"; + escape_['"'] = "\\\""; + escape_['\\'] = "\\\\"; } virtual ~t_generator() {} @@ -45,6 +50,16 @@ class t_generator { const std::string& line_prefix, const std::string& contents, const std::string& comment_end); + + /** + * Escape string to use one in generated sources. + */ + virtual std::string escape_string(const std::string &in) const; + + std::string get_escaped_string(t_const_value* constval) { + return escape_string(constval->get_string()); + } + protected: /** @@ -184,6 +199,11 @@ class t_generator { */ std::string out_dir_base_; + /** + * Map of characters to escape in string literals. + */ + std::map escape_; + private: /** * Current code indentation level diff --git a/compiler/cpp/src/generate/t_hs_generator.cc b/compiler/cpp/src/generate/t_hs_generator.cc index 18a07f0c..94fb9590 100644 --- a/compiler/cpp/src/generate/t_hs_generator.cc +++ b/compiler/cpp/src/generate/t_hs_generator.cc @@ -303,7 +303,7 @@ string t_hs_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "\"" << value->get_string() << "\""; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "True" : "False"); diff --git a/compiler/cpp/src/generate/t_html_generator.cc b/compiler/cpp/src/generate/t_html_generator.cc index fc05709b..671019ec 100644 --- a/compiler/cpp/src/generate/t_html_generator.cc +++ b/compiler/cpp/src/generate/t_html_generator.cc @@ -32,6 +32,12 @@ class t_html_generator : public t_generator { : t_generator(program) { out_dir_base_ = "gen-html"; + escape_.clear(); + escape_['&'] = "&"; + escape_['<'] = "<"; + escape_['>'] = ">"; + escape_['"'] = """; + escape_['\''] = "'"; } void generate_program(); @@ -396,7 +402,7 @@ void t_html_generator::print_const_value(t_const_value* tvalue) { f_out_ << tvalue->get_double(); break; case t_const_value::CV_STRING: - f_out_ << "\"" << tvalue->get_string() << "\""; + f_out_ << '"' << get_escaped_string(tvalue) << '"'; break; case t_const_value::CV_MAP: { diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc index e258a470..6e2d0b3d 100644 --- a/compiler/cpp/src/generate/t_java_generator.cc +++ b/compiler/cpp/src/generate/t_java_generator.cc @@ -519,7 +519,7 @@ string t_java_generator::render_const_value(ofstream& out, string name, t_type* t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - render << "\"" + value->get_string() + "\""; + render << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: render << ((value->get_integer() > 0) ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_ocaml_generator.cc b/compiler/cpp/src/generate/t_ocaml_generator.cc index a60e00c2..8c8f0c0d 100644 --- a/compiler/cpp/src/generate/t_ocaml_generator.cc +++ b/compiler/cpp/src/generate/t_ocaml_generator.cc @@ -361,7 +361,7 @@ string t_ocaml_generator::render_const_value(t_type* type, t_const_value* value) t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "\"" << value->get_string() << "\""; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_perl_generator.cc b/compiler/cpp/src/generate/t_perl_generator.cc index 02ac7792..be728910 100644 --- a/compiler/cpp/src/generate/t_perl_generator.cc +++ b/compiler/cpp/src/generate/t_perl_generator.cc @@ -31,6 +31,8 @@ class t_perl_generator : public t_oop_generator { : t_oop_generator(program) { out_dir_base_ = "gen-perl"; + escape_['$'] = "\\$"; + escape_['@'] = "\\@"; } /** @@ -328,7 +330,7 @@ string t_perl_generator::render_const_value(t_type* type, t_const_value* value) t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "'" << value->get_string() << "'"; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "1" : "0"); @@ -1796,5 +1798,4 @@ string t_perl_generator ::type_to_enum(t_type* type) { throw "INVALID TYPE IN type_to_enum: " + type->get_name(); } - THRIFT_REGISTER_GENERATOR(perl, "Perl", ""); diff --git a/compiler/cpp/src/generate/t_php_generator.cc b/compiler/cpp/src/generate/t_php_generator.cc index f74076c1..7b3e617b 100644 --- a/compiler/cpp/src/generate/t_php_generator.cc +++ b/compiler/cpp/src/generate/t_php_generator.cc @@ -51,6 +51,7 @@ class t_php_generator : public t_oop_generator { } out_dir_base_ = (binary_inline_ ? "gen-phpi" : "gen-php"); + escape_['$'] = "\\$"; } /** @@ -371,7 +372,7 @@ string t_php_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "'" << value->get_string() << "'"; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc index cde70a92..59a4705c 100644 --- a/compiler/cpp/src/generate/t_py_generator.cc +++ b/compiler/cpp/src/generate/t_py_generator.cc @@ -387,7 +387,7 @@ string t_py_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "'" << value->get_string() << "'"; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "True" : "False"); diff --git a/compiler/cpp/src/generate/t_rb_generator.cc b/compiler/cpp/src/generate/t_rb_generator.cc index d3ac711b..2a7f7d92 100644 --- a/compiler/cpp/src/generate/t_rb_generator.cc +++ b/compiler/cpp/src/generate/t_rb_generator.cc @@ -338,7 +338,7 @@ string t_rb_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "%q\"" << value->get_string() << '"'; + out << "%q\"" << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "true" : "false"); diff --git a/compiler/cpp/src/generate/t_st_generator.cc b/compiler/cpp/src/generate/t_st_generator.cc index 960db633..76a8c9d5 100644 --- a/compiler/cpp/src/generate/t_st_generator.cc +++ b/compiler/cpp/src/generate/t_st_generator.cc @@ -348,7 +348,7 @@ string t_st_generator::render_const_value(t_type* type, t_const_value* value) { t_base_type::t_base tbase = ((t_base_type*)type)->get_base(); switch (tbase) { case t_base_type::TYPE_STRING: - out << "'" << value->get_string() << "'"; + out << '"' << get_escaped_string(value) << '"'; break; case t_base_type::TYPE_BOOL: out << (value->get_integer() > 0 ? "true" : "false"); diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll index 0b4e9606..a700ca35 100644 --- a/compiler/cpp/src/thriftl.ll +++ b/compiler/cpp/src/thriftl.ll @@ -14,6 +14,7 @@ %{ +#include #include #include "main.h" @@ -58,10 +59,8 @@ doctext ("/**"([^*/]|[^*]"/"|"*"[^/])*"*"*"*/") comment ("//"[^\n]*) unixcomment ("#"[^\n]*) symbol ([:;\,\{\}\(\)\=<>\[\]]) -dliteral ("\""[^"]*"\"") -sliteral ("'"[^']*"'") st_identifier ([a-zA-Z-][\.a-zA-Z_0-9-]*) - +literal_begin (['\"]) %% @@ -222,17 +221,56 @@ st_identifier ([a-zA-Z-][\.a-zA-Z_0-9-]*) return tok_st_identifier; } -{dliteral} { - yylval.id = strdup(yytext+1); - yylval.id[strlen(yylval.id)-1] = '\0'; - return tok_literal; +{literal_begin} { + char mark = yytext[0]; + std::string result; + for(;;) + { + int ch = yyinput(); + switch (ch) { + case EOF: + yyerror("End of file while read string at %d\n", yylineno); + exit(1); + case '\n': + yyerror("End of line while read string at %d\n", yylineno - 1); + exit(1); + case '\\': + ch = yyinput(); + switch (ch) { + case 'r': + result.push_back('\r'); + continue; + case 'n': + result.push_back('\n'); + continue; + case 't': + result.push_back('\t'); + continue; + case '"': + result.push_back('"'); + continue; + case '\'': + result.push_back('\''); + continue; + case '\\': + result.push_back('\\'); + continue; + default: + yyerror("Bad escape character\n"); + return -1; + } + break; + default: + if (ch == mark) { + yylval.id = strdup(result.c_str()); + return tok_literal; + } else { + result.push_back(ch); + } + } + } } -{sliteral} { - yylval.id = strdup(yytext+1); - yylval.id[strlen(yylval.id)-1] = '\0'; - return tok_literal; -} {doctext} { /* This does not show up in the parse tree. */ -- 2.17.1