From: David Reiss Date: Thu, 1 Jul 2010 05:36:25 +0000 (+0000) Subject: THRIFT-395. python: Add option to treat strings as UTF-8 unicode X-Git-Tag: 0.4.0~52 X-Git-Url: https://source.supwisdom.com/gerrit/gitweb?a=commitdiff_plain;h=95c005a7c69d106591202cfa6eded6354608b307;p=common%2Fthrift.git THRIFT-395. python: Add option to treat strings as UTF-8 unicode Add the "utf8strings" option to the Python generator. If set, all Thrift strings (not binary) will be expected to be unicode objects, not str. They will be encoded as UTF-8 before serialization and decoded as UTF-8 after deserialization. The accelerator module for TBinaryProtocol is not affected. git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@959516 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc index 8c545024..c6f84b8d 100644 --- a/compiler/cpp/src/generate/t_py_generator.cc +++ b/compiler/cpp/src/generate/t_py_generator.cc @@ -52,6 +52,9 @@ class t_py_generator : public t_generator { iter = parsed_options.find("twisted"); gen_twisted_ = (iter != parsed_options.end()); + iter = parsed_options.find("utf8strings"); + gen_utf8strings_ = (iter != parsed_options.end()); + if (gen_twisted_){ out_dir_base_ = "gen-py.twisted"; } else { @@ -205,6 +208,11 @@ class t_py_generator : public t_generator { */ bool gen_twisted_; + /** + * True iff strings should be encoded using utf-8. + */ + bool gen_utf8strings_; + /** * File streams */ @@ -1752,7 +1760,11 @@ void t_py_generator::generate_deserialize_field(ofstream &out, name; break; case t_base_type::TYPE_STRING: - out << "readString();"; + if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) { + out << "readString();"; + } else { + out << "readString().decode('utf-8')"; + } break; case t_base_type::TYPE_BOOL: out << "readBool();"; @@ -1946,7 +1958,11 @@ void t_py_generator::generate_serialize_field(ofstream &out, "compiler error: cannot serialize void field in a struct: " + name; break; case t_base_type::TYPE_STRING: - out << "writeString(" << name << ")"; + if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) { + out << "writeString(" << name << ")"; + } else { + out << "writeString(" << name << ".encode('utf-8'))"; + } break; case t_base_type::TYPE_BOOL: out << "writeBool(" << name << ")";