Thrift-1023:Thrift encoding (UTF-8) issue with Ruby 1.9.2
Client: rb
Patch: Nathan Beyer
Fixes encoding issue for UTF-8 strings in ruby client.
git-svn-id: https://svn.apache.org/repos/asf/thrift/trunk@1395832 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/rb/ext/binary_protocol_accelerated.c b/lib/rb/ext/binary_protocol_accelerated.c
index bd1c2da..a8ebe7f 100644
--- a/lib/rb/ext/binary_protocol_accelerated.c
+++ b/lib/rb/ext/binary_protocol_accelerated.c
@@ -22,7 +22,8 @@
#include <stdint.h>
#include <constants.h>
#include <struct.h>
-#include "macros.h"
+#include <macros.h>
+#include <bytes.h>
VALUE rb_thrift_binary_proto_native_qmark(VALUE self) {
return Qtrue;
@@ -80,6 +81,7 @@
if (TYPE(str) != T_STRING) {
rb_raise(rb_eStandardError, "Value should be a string");
}
+ str = convert_to_utf8_byte_buffer(str);
write_i32_direct(trans, RSTRING_LEN(str));
rb_funcall(trans, write_method_id, 1, str);
}
@@ -380,7 +382,8 @@
VALUE rb_thrift_binary_proto_read_string(VALUE self) {
int size = read_i32_direct(self);
- return READ(self, size);
+ VALUE buffer = READ(self, size);
+ return convert_to_string(buffer);
}
void Init_binary_protocol_accelerated() {
diff --git a/lib/rb/ext/bytes.c b/lib/rb/ext/bytes.c
new file mode 100644
index 0000000..8a6fac4
--- /dev/null
+++ b/lib/rb/ext/bytes.c
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.h>
+#ifdef HAVE_RUBY_ENCODING_H
+#include <ruby/encoding.h>
+#endif
+#include <constants.h>
+
+VALUE force_binary_encoding(VALUE buffer) {
+ return rb_funcall(thrift_bytes_module, force_binary_encoding_id, 1, buffer);
+}
+
+VALUE convert_to_utf8_byte_buffer(VALUE string) {
+ return rb_funcall(thrift_bytes_module, convert_to_utf8_byte_buffer_id, 1, string);
+}
+
+VALUE convert_to_string(VALUE utf8_buffer) {
+ return rb_funcall(thrift_bytes_module, convert_to_string_id, 1, utf8_buffer);
+}
diff --git a/lib/rb/ext/bytes.h b/lib/rb/ext/bytes.h
new file mode 100644
index 0000000..7108d83
--- /dev/null
+++ b/lib/rb/ext/bytes.h
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.h>
+
+/*
+ * A collection of utilities for working with bytes and byte buffers.
+ *
+ * These methods are the native analogies to some of the methods in
+ * Thrift::Bytes (thrift/bytes.rb).
+ */
+
+VALUE force_binary_encoding(VALUE buffer);
+VALUE convert_to_utf8_byte_buffer(VALUE string);
+VALUE convert_to_string(VALUE utf8_buffer);
diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c
index a47fe6c..0c05481 100644
--- a/lib/rb/ext/compact_protocol.c
+++ b/lib/rb/ext/compact_protocol.c
@@ -20,9 +20,10 @@
#include <ruby.h>
#include <stdbool.h>
#include <stdint.h>
-#include "constants.h"
-#include "struct.h"
-#include "macros.h"
+#include <constants.h>
+#include <struct.h>
+#include <macros.h>
+#include <bytes.h>
#define LAST_ID(obj) FIX2INT(rb_ary_pop(rb_ivar_get(obj, last_field_id)))
#define SET_LAST_ID(obj, val) rb_ary_push(rb_ivar_get(obj, last_field_id), val)
@@ -305,6 +306,7 @@
VALUE rb_thrift_compact_proto_write_string(VALUE self, VALUE str) {
VALUE transport = GET_TRANSPORT(self);
+ str = convert_to_utf8_byte_buffer(str);
write_varint32(transport, RSTRING_LEN(str));
WRITE(transport, RSTRING_PTR(str), RSTRING_LEN(str));
return Qnil;
@@ -546,7 +548,8 @@
VALUE rb_thrift_compact_proto_read_string(VALUE self) {
int64_t size = read_varint64(self);
- return READ(self, size);
+ VALUE buffer = READ(self, size);
+ return convert_to_string(buffer);
}
static void Init_constants() {
diff --git a/lib/rb/ext/constants.h b/lib/rb/ext/constants.h
index 9ea00d2..3bfac88 100644
--- a/lib/rb/ext/constants.h
+++ b/lib/rb/ext/constants.h
@@ -76,6 +76,9 @@
extern ID read_all_method_id;
extern ID read_into_buffer_method_id;
extern ID native_qmark_method_id;
+extern ID force_binary_encoding_id;
+extern ID convert_to_utf8_byte_buffer_id;
+extern ID convert_to_string_id;
extern ID fields_const_id;
extern ID transport_ivar_id;
@@ -92,5 +95,6 @@
extern VALUE rb_cSet;
extern VALUE thrift_module;
extern VALUE thrift_types_module;
+extern VALUE thrift_bytes_module;
extern VALUE class_thrift_protocol;
extern VALUE protocol_exception_class;
diff --git a/lib/rb/ext/memory_buffer.c b/lib/rb/ext/memory_buffer.c
index 319b073..e7253dc 100644
--- a/lib/rb/ext/memory_buffer.c
+++ b/lib/rb/ext/memory_buffer.c
@@ -19,7 +19,8 @@
#include <ruby.h>
#include <constants.h>
-#include "macros.h"
+#include <bytes.h>
+#include <macros.h>
ID buf_ivar_id;
ID index_ivar_id;
@@ -37,6 +38,7 @@
VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str) {
VALUE buf = GET_BUF(self);
+ str = force_binary_encoding(str);
rb_str_buf_cat(buf, RSTRING_PTR(str), RSTRING_LEN(str));
return Qnil;
}
diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c
index 2f6bb1a..f066d6c 100644
--- a/lib/rb/ext/thrift_native.c
+++ b/lib/rb/ext/thrift_native.c
@@ -18,6 +18,7 @@
*/
#include <ruby.h>
+#include <bytes.h>
#include <struct.h>
#include <binary_protocol_accelerated.h>
#include <compact_protocol.h>
@@ -27,6 +28,7 @@
// cached classes/modules
VALUE rb_cSet;
VALUE thrift_module;
+VALUE thrift_bytes_module;
VALUE thrift_types_module;
// TType constants
@@ -90,6 +92,9 @@
ID read_all_method_id;
ID read_into_buffer_method_id;
ID native_qmark_method_id;
+ID force_binary_encoding_id;
+ID convert_to_utf8_byte_buffer_id;
+ID convert_to_string_id;
// constant ids
ID fields_const_id;
@@ -109,6 +114,7 @@
void Init_thrift_native() {
// cached classes
thrift_module = rb_const_get(rb_cObject, rb_intern("Thrift"));
+ thrift_bytes_module = rb_const_get(thrift_module, rb_intern("Bytes"));
thrift_types_module = rb_const_get(thrift_module, rb_intern("Types"));
rb_cSet = rb_const_get(rb_cObject, rb_intern("Set"));
protocol_exception_class = rb_const_get(thrift_module, rb_intern("ProtocolException"));
@@ -173,6 +179,9 @@
read_all_method_id = rb_intern("read_all");
read_into_buffer_method_id = rb_intern("read_into_buffer");
native_qmark_method_id = rb_intern("native?");
+ force_binary_encoding_id = rb_intern("force_binary_encoding");
+ convert_to_utf8_byte_buffer_id = rb_intern("convert_to_utf8_byte_buffer");
+ convert_to_string_id = rb_intern("convert_to_string");
// constant ids
fields_const_id = rb_intern("FIELDS");