Rev 2 of Thrift, the Pillar successor
Summary: End-to-end communications and serialization in C++ is working
Reviewed By: aditya
Test Plan: See the new top-level test/ folder. It vaguely resembles a unit test, though it could be more automated.
Revert Plan: Revertible
Notes: Still a LOT of optimization work to be done on the generated C++ code, which should be using dynamic memory in a number of places. Next major task is writing the PHP/Java/Python generators.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664712 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/cpp/protocol/TBinaryProtocol.cc b/lib/cpp/protocol/TBinaryProtocol.cc
new file mode 100644
index 0000000..4c10bab
--- /dev/null
+++ b/lib/cpp/protocol/TBinaryProtocol.cc
@@ -0,0 +1,140 @@
+#include "protocol/TBinaryProtocol.h"
+using namespace std;
+
+string TBinaryProtocol::readFunction(TBuf& buf) const {
+ // Let readString increment the buffer position
+ return readString(buf);
+}
+
+string TBinaryProtocol::writeFunction(const string& name,
+ const string& args) const{
+ return writeString(name) + args;
+}
+
+map<uint32_t, TBuf> TBinaryProtocol::readStruct(TBuf& buf) const {
+ map<uint32_t, TBuf> fieldMap;
+
+ if (buf.len < 4) {
+ return fieldMap;
+ }
+ uint32_t total_size = readU32(buf);
+ if (buf.len < total_size) {
+ // Data looks corrupt, we don't have that much, we will try to read what
+ // we can but be sure not to go over
+ total_size = buf.len;
+ }
+
+ // Field headers are 8 bytes, 4 byte fid + 4 byte length
+ while (total_size > 0 && buf.len > 8) {
+ uint32_t fid = readU32(buf);
+ uint32_t flen = readU32(buf);
+ if (flen > buf.len) {
+ // flen corrupt, there isn't that much data left
+ break;
+ }
+ fieldMap.insert(make_pair(fid, TBuf(buf.data, flen)));
+ buf.data += flen;
+ buf.len -= flen;
+ total_size -= 8 + flen;
+ }
+
+ return fieldMap;
+}
+
+string TBinaryProtocol::writeStruct(const map<uint32_t,string>& s) const {
+ string result = "";
+ map<uint32_t,string>::const_iterator s_iter;
+ for (s_iter = s.begin(); s_iter != s.end(); ++s_iter) {
+ result += writeU32(s_iter->first);
+ result += writeU32(s_iter->second.size());
+ result += s_iter->second;
+ }
+ return writeU32(result.size()) + result;
+}
+
+string TBinaryProtocol::readString(TBuf& buf) const {
+ uint32_t len = readU32(buf);
+ if (len == 0) {
+ return "";
+ }
+ string result((const char*)(buf.data), len);
+ buf.data += len;
+ buf.len -= len;
+ return result;
+}
+
+uint8_t TBinaryProtocol::readByte(TBuf& buf) const {
+ if (buf.len == 0) {
+ return 0;
+ }
+ uint8_t result = (uint8_t)buf.data[0];
+ buf.data += 1;
+ buf.len -= 1;
+ return result;
+}
+
+uint32_t TBinaryProtocol::readU32(TBuf& buf) const {
+ if (buf.len < 4) {
+ return 0;
+ }
+ uint32_t result = *(uint32_t*)buf.data;
+ buf.data += 4;
+ buf.len -= 4;
+ return result;
+}
+
+int32_t TBinaryProtocol::readI32(TBuf& buf) const {
+ if (buf.len < 4) {
+ return 0;
+ }
+ int32_t result = *(int32_t*)buf.data;
+ buf.data += 4;
+ buf.len -= 4;
+ return result;
+}
+
+uint64_t TBinaryProtocol::readU64(TBuf& buf) const {
+ if (buf.len < 8) {
+ return 0;
+ }
+ uint64_t result = *(uint64_t*)buf.data;
+ buf.data += 8;
+ buf.len -= 8;
+ return result;
+}
+
+int64_t TBinaryProtocol::readI64(TBuf& buf) const {
+ if (buf.len < 8) {
+ return 0;
+ }
+ int64_t result = *(int64_t*)buf.data;
+ buf.data += 8;
+ buf.len -= 8;
+ return result;
+}
+
+string TBinaryProtocol::writeString(const string& str) const {
+ uint32_t size = str.size();
+ string result = string((const char*)&size, 4);
+ return result + str;
+}
+
+string TBinaryProtocol::writeByte(const uint8_t byte) const {
+ return string((const char*)&byte, 1);
+}
+
+string TBinaryProtocol::writeU32(const uint32_t u32) const {
+ return string((const char*)&u32, 4);
+}
+
+string TBinaryProtocol::writeI32(int32_t i32) const {
+ return string((const char*)&i32, 4);
+}
+
+string TBinaryProtocol::writeU64(uint64_t u64) const {
+ return string((const char*)&u64, 8);
+}
+
+string TBinaryProtocol::writeI64(int64_t i64) const {
+ return string((const char*)&i64, 8);
+}
diff --git a/lib/cpp/protocol/TBinaryProtocol.h b/lib/cpp/protocol/TBinaryProtocol.h
new file mode 100644
index 0000000..976c383
--- /dev/null
+++ b/lib/cpp/protocol/TBinaryProtocol.h
@@ -0,0 +1,42 @@
+#ifndef T_BINARY_PROTOCOL_H
+#define T_BINARY_PROTOCOL_H
+
+#include "protocol/TProtocol.h"
+
+/**
+ * The default binary protocol for thrift. Writes all data in a very basic
+ * binary format, essentially just spitting out the raw bytes.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class TBinaryProtocol : public TProtocol {
+ public:
+ TBinaryProtocol() {}
+ ~TBinaryProtocol() {}
+
+ std::string
+ readFunction(TBuf& buf) const;
+ std::string
+ writeFunction(const std::string& name, const std::string& args) const;
+
+ std::map<uint32_t, TBuf>
+ readStruct(TBuf& buf) const;
+ std::string
+ writeStruct(const std::map<uint32_t,std::string>& s) const;
+
+ std::string readString (TBuf& buf) const;
+ uint8_t readByte (TBuf& buf) const;
+ uint32_t readU32 (TBuf& buf) const;
+ int32_t readI32 (TBuf& buf) const;
+ uint64_t readU64 (TBuf& buf) const;
+ int64_t readI64 (TBuf& buf) const;
+
+ std::string writeString (const std::string& str) const;
+ std::string writeByte (const uint8_t byte) const;
+ std::string writeU32 (const uint32_t u32) const;
+ std::string writeI32 (const int32_t i32) const;
+ std::string writeU64 (const uint64_t u64) const;
+ std::string writeI64 (const int64_t i64) const;
+};
+
+#endif
diff --git a/lib/cpp/protocol/TProtocol.h b/lib/cpp/protocol/TProtocol.h
new file mode 100644
index 0000000..1f2e0c8
--- /dev/null
+++ b/lib/cpp/protocol/TProtocol.h
@@ -0,0 +1,88 @@
+#ifndef T_PROTOCOL_H
+#define T_PROTOCOL_H
+
+#include <sys/types.h>
+#include <string>
+#include <map>
+
+/** Forward declaration for TProtocol */
+struct TBuf;
+
+/**
+ * Abstract class for a thrift protocol driver. These are all the methods that
+ * a protocol must implement. Essentially, there must be some way of reading
+ * and writing all the base types, plus a mechanism for writing out structs
+ * with indexed fields. Also notice that all methods are strictly const. This
+ * is by design. Protcol impelementations may NOT keep state, because the
+ * same TProtocol object may be used simultaneously by multiple threads. This
+ * theoretically introduces some limititations into the possible protocol
+ * formats, but with the benefit of performance, clarity, and simplicity.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class TProtocol {
+ public:
+ virtual ~TProtocol() {}
+
+ /**
+ * Function call serialization.
+ */
+
+ virtual std::string
+ readFunction(TBuf& buf) const = 0;
+ virtual std::string
+ writeFunction(const std::string& name, const std::string& args) const = 0;
+
+ /**
+ * Struct serialization.
+ */
+
+ virtual std::map<uint32_t, TBuf>
+ readStruct(TBuf& buf) const = 0;
+ virtual std::string
+ writeStruct(const std::map<uint32_t,std::string>& s) const = 0;
+
+ /**
+ * Basic data type deserialization. Note that these read methods do not
+ * take a const reference to the TBuf object. They SHOULD change the TBuf
+ * object so that it reflects the buffer AFTER the basic data type has
+ * been consumed such that data may continue being read serially from the
+ * buffer.
+ */
+
+ virtual std::string readString (TBuf& buf) const = 0;
+ virtual uint8_t readByte (TBuf& buf) const = 0;
+ virtual uint32_t readU32 (TBuf& buf) const = 0;
+ virtual int32_t readI32 (TBuf& buf) const = 0;
+ virtual uint64_t readU64 (TBuf& buf) const = 0;
+ virtual int64_t readI64 (TBuf& buf) const = 0;
+
+ virtual std::string writeString (const std::string& str) const = 0;
+ virtual std::string writeByte (const uint8_t byte) const = 0;
+ virtual std::string writeU32 (const uint32_t u32) const = 0;
+ virtual std::string writeI32 (const int32_t i32) const = 0;
+ virtual std::string writeU64 (const uint64_t u64) const = 0;
+ virtual std::string writeI64 (const int64_t i64) const = 0;
+
+ protected:
+ TProtocol() {}
+};
+
+/**
+ * Wrapper around raw data that allows us to track the length of a data
+ * buffer. It is the responsibility of a robust TProtocol implementation
+ * to ensure that any reads that are done from data do NOT overrun the
+ * memory address at data+len. It is also a convention that TBuf objects
+ * do NOT own the memory pointed to by data. They are merely wrappers
+ * around buffers that have been allocated elsewhere. Therefore, the user
+ * should never allocate memory before putting it into a TBuf nor should
+ * they free the data pointed to by a TBuf.
+ */
+struct TBuf {
+ TBuf(const TBuf& that) : data(that.data), len(that.len) {}
+ TBuf(const uint8_t* d, uint32_t l) : data(d), len(l) {}
+ const uint8_t* data;
+ uint32_t len;
+};
+
+#endif