Thrift-1023:Thrift encoding (UTF-8) issue with Ruby 1.9.2
Client: rb
Patch: Nathan Beyer 

Fixes encoding issue for UTF-8 strings in ruby client.



git-svn-id: https://svn.apache.org/repos/asf/thrift/trunk@1395832 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/rb/ext/binary_protocol_accelerated.c b/lib/rb/ext/binary_protocol_accelerated.c
index bd1c2da..a8ebe7f 100644
--- a/lib/rb/ext/binary_protocol_accelerated.c
+++ b/lib/rb/ext/binary_protocol_accelerated.c
@@ -22,7 +22,8 @@
 #include <stdint.h>
 #include <constants.h>
 #include <struct.h>
-#include "macros.h"
+#include <macros.h>
+#include <bytes.h>
 
 VALUE rb_thrift_binary_proto_native_qmark(VALUE self) {
   return Qtrue;
@@ -80,6 +81,7 @@
   if (TYPE(str) != T_STRING) {
     rb_raise(rb_eStandardError, "Value should be a string");    
   }
+  str = convert_to_utf8_byte_buffer(str);
   write_i32_direct(trans, RSTRING_LEN(str));
   rb_funcall(trans, write_method_id, 1, str);
 }
@@ -380,7 +382,8 @@
 
 VALUE rb_thrift_binary_proto_read_string(VALUE self) {
   int size = read_i32_direct(self);
-  return READ(self, size);
+  VALUE buffer = READ(self, size);
+  return convert_to_string(buffer);
 }
 
 void Init_binary_protocol_accelerated() {
diff --git a/lib/rb/ext/bytes.c b/lib/rb/ext/bytes.c
new file mode 100644
index 0000000..8a6fac4
--- /dev/null
+++ b/lib/rb/ext/bytes.c
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.h>
+#ifdef HAVE_RUBY_ENCODING_H
+#include <ruby/encoding.h>
+#endif
+#include <constants.h>
+
+VALUE force_binary_encoding(VALUE buffer) {
+  return rb_funcall(thrift_bytes_module, force_binary_encoding_id, 1, buffer);
+}
+
+VALUE convert_to_utf8_byte_buffer(VALUE string) {
+  return rb_funcall(thrift_bytes_module, convert_to_utf8_byte_buffer_id, 1, string);
+}
+
+VALUE convert_to_string(VALUE utf8_buffer) {
+  return rb_funcall(thrift_bytes_module, convert_to_string_id, 1, utf8_buffer);
+}
diff --git a/lib/rb/ext/bytes.h b/lib/rb/ext/bytes.h
new file mode 100644
index 0000000..7108d83
--- /dev/null
+++ b/lib/rb/ext/bytes.h
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.h>
+
+/*
+ * A collection of utilities for working with bytes and byte buffers.
+ *
+ * These methods are the native analogies to some of the methods in
+ * Thrift::Bytes (thrift/bytes.rb).
+ */
+
+VALUE force_binary_encoding(VALUE buffer);
+VALUE convert_to_utf8_byte_buffer(VALUE string);
+VALUE convert_to_string(VALUE utf8_buffer);
diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c
index a47fe6c..0c05481 100644
--- a/lib/rb/ext/compact_protocol.c
+++ b/lib/rb/ext/compact_protocol.c
@@ -20,9 +20,10 @@
 #include <ruby.h>
 #include <stdbool.h>
 #include <stdint.h>
-#include "constants.h"
-#include "struct.h"
-#include "macros.h"
+#include <constants.h>
+#include <struct.h>
+#include <macros.h>
+#include <bytes.h>
 
 #define LAST_ID(obj) FIX2INT(rb_ary_pop(rb_ivar_get(obj, last_field_id)))
 #define SET_LAST_ID(obj, val) rb_ary_push(rb_ivar_get(obj, last_field_id), val)
@@ -305,6 +306,7 @@
 
 VALUE rb_thrift_compact_proto_write_string(VALUE self, VALUE str) {
   VALUE transport = GET_TRANSPORT(self);
+  str = convert_to_utf8_byte_buffer(str);
   write_varint32(transport, RSTRING_LEN(str));
   WRITE(transport, RSTRING_PTR(str), RSTRING_LEN(str));
   return Qnil;
@@ -546,7 +548,8 @@
 
 VALUE rb_thrift_compact_proto_read_string(VALUE self) {
   int64_t size = read_varint64(self);
-  return READ(self, size);
+  VALUE buffer = READ(self, size);
+  return convert_to_string(buffer);
 }
 
 static void Init_constants() {
diff --git a/lib/rb/ext/constants.h b/lib/rb/ext/constants.h
index 9ea00d2..3bfac88 100644
--- a/lib/rb/ext/constants.h
+++ b/lib/rb/ext/constants.h
@@ -76,6 +76,9 @@
 extern ID read_all_method_id;
 extern ID read_into_buffer_method_id;
 extern ID native_qmark_method_id;
+extern ID force_binary_encoding_id;
+extern ID convert_to_utf8_byte_buffer_id;
+extern ID convert_to_string_id;
 
 extern ID fields_const_id;
 extern ID transport_ivar_id;
@@ -92,5 +95,6 @@
 extern VALUE rb_cSet;
 extern VALUE thrift_module;
 extern VALUE thrift_types_module;
+extern VALUE thrift_bytes_module;
 extern VALUE class_thrift_protocol;
 extern VALUE protocol_exception_class;
diff --git a/lib/rb/ext/memory_buffer.c b/lib/rb/ext/memory_buffer.c
index 319b073..e7253dc 100644
--- a/lib/rb/ext/memory_buffer.c
+++ b/lib/rb/ext/memory_buffer.c
@@ -19,7 +19,8 @@
 
 #include <ruby.h>
 #include <constants.h>
-#include "macros.h"
+#include <bytes.h>
+#include <macros.h>
 
 ID buf_ivar_id;
 ID index_ivar_id;
@@ -37,6 +38,7 @@
 
 VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str) {
   VALUE buf = GET_BUF(self);
+  str = force_binary_encoding(str);
   rb_str_buf_cat(buf, RSTRING_PTR(str), RSTRING_LEN(str));
   return Qnil;
 }
diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c
index 2f6bb1a..f066d6c 100644
--- a/lib/rb/ext/thrift_native.c
+++ b/lib/rb/ext/thrift_native.c
@@ -18,6 +18,7 @@
  */
 
 #include <ruby.h>
+#include <bytes.h>
 #include <struct.h>
 #include <binary_protocol_accelerated.h>
 #include <compact_protocol.h>
@@ -27,6 +28,7 @@
 // cached classes/modules
 VALUE rb_cSet;
 VALUE thrift_module;
+VALUE thrift_bytes_module;
 VALUE thrift_types_module;
 
 // TType constants
@@ -90,6 +92,9 @@
 ID read_all_method_id;
 ID read_into_buffer_method_id;
 ID native_qmark_method_id;
+ID force_binary_encoding_id;
+ID convert_to_utf8_byte_buffer_id;
+ID convert_to_string_id;
 
 // constant ids
 ID fields_const_id;
@@ -109,6 +114,7 @@
 void Init_thrift_native() {
   // cached classes
   thrift_module = rb_const_get(rb_cObject, rb_intern("Thrift"));
+  thrift_bytes_module = rb_const_get(thrift_module, rb_intern("Bytes"));
   thrift_types_module = rb_const_get(thrift_module, rb_intern("Types"));
   rb_cSet = rb_const_get(rb_cObject, rb_intern("Set"));
   protocol_exception_class = rb_const_get(thrift_module, rb_intern("ProtocolException"));
@@ -173,6 +179,9 @@
   read_all_method_id = rb_intern("read_all");
   read_into_buffer_method_id = rb_intern("read_into_buffer");
   native_qmark_method_id = rb_intern("native?");
+  force_binary_encoding_id = rb_intern("force_binary_encoding");
+  convert_to_utf8_byte_buffer_id = rb_intern("convert_to_utf8_byte_buffer");
+  convert_to_string_id = rb_intern("convert_to_string");
 
   // constant ids
   fields_const_id = rb_intern("FIELDS");