Rev 2 of Thrift, the Pillar successor

Summary: End-to-end communications and serialization in C++ is working

Reviewed By: aditya

Test Plan: See the new top-level test/ folder. It vaguely resembles a unit test, though it could be more automated.

Revert Plan: Revertible

Notes: Still a LOT of optimization work to be done on the generated C++ code, which should be using dynamic memory in a number of places. Next major task is writing the PHP/Java/Python generators.




git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664712 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/cpp/protocol/TBinaryProtocol.cc b/lib/cpp/protocol/TBinaryProtocol.cc
new file mode 100644
index 0000000..4c10bab
--- /dev/null
+++ b/lib/cpp/protocol/TBinaryProtocol.cc
@@ -0,0 +1,140 @@
+#include "protocol/TBinaryProtocol.h"
+using namespace std;
+
+string TBinaryProtocol::readFunction(TBuf& buf) const {
+  // Let readString increment the buffer position
+  return readString(buf);
+}
+
+string TBinaryProtocol::writeFunction(const string& name,
+                                      const string& args) const{
+  return writeString(name) + args;
+}
+
+map<uint32_t, TBuf> TBinaryProtocol::readStruct(TBuf& buf) const {
+  map<uint32_t, TBuf> fieldMap;
+  
+  if (buf.len < 4) {
+    return fieldMap;
+  }
+  uint32_t total_size = readU32(buf);
+  if (buf.len < total_size) {
+    // Data looks corrupt, we don't have that much, we will try to read what
+    // we can but be sure not to go over
+    total_size = buf.len;
+  }
+
+  // Field headers are 8 bytes, 4 byte fid + 4 byte length
+  while (total_size > 0 && buf.len > 8) {
+    uint32_t fid  = readU32(buf);
+    uint32_t flen = readU32(buf);
+    if (flen > buf.len) {
+      // flen corrupt, there isn't that much data left
+      break;
+    }
+    fieldMap.insert(make_pair(fid, TBuf(buf.data, flen)));
+    buf.data += flen;
+    buf.len  -= flen;
+    total_size -= 8 + flen;
+  }
+
+  return fieldMap;
+}
+
+string TBinaryProtocol::writeStruct(const map<uint32_t,string>& s) const {
+  string result = "";
+  map<uint32_t,string>::const_iterator s_iter;
+  for (s_iter = s.begin(); s_iter != s.end(); ++s_iter) {
+    result += writeU32(s_iter->first);
+    result += writeU32(s_iter->second.size());
+    result += s_iter->second;
+  }
+  return writeU32(result.size()) + result;
+}
+
+string TBinaryProtocol::readString(TBuf& buf) const {
+  uint32_t len = readU32(buf);
+  if (len == 0) {
+    return "";
+  }
+  string result((const char*)(buf.data), len);
+  buf.data += len;
+  buf.len  -= len;
+  return result;
+}
+
+uint8_t TBinaryProtocol::readByte(TBuf& buf) const {
+  if (buf.len == 0) {
+    return 0;
+  }
+  uint8_t result = (uint8_t)buf.data[0];
+  buf.data += 1;
+  buf.len  -= 1;
+  return result;
+}
+
+uint32_t TBinaryProtocol::readU32(TBuf& buf) const {
+  if (buf.len < 4) {
+    return 0;
+  }
+  uint32_t result = *(uint32_t*)buf.data;
+  buf.data += 4;
+  buf.len  -= 4;
+  return result;
+}
+
+int32_t TBinaryProtocol::readI32(TBuf& buf) const {
+  if (buf.len < 4) {
+    return 0;
+  }
+  int32_t result = *(int32_t*)buf.data;
+  buf.data += 4;
+  buf.len  -= 4;
+  return result; 
+}
+
+uint64_t TBinaryProtocol::readU64(TBuf& buf) const {
+  if (buf.len < 8) {
+    return 0;
+  }
+  uint64_t result = *(uint64_t*)buf.data;
+  buf.data += 8;
+  buf.len  -= 8;
+  return result;
+}
+
+int64_t TBinaryProtocol::readI64(TBuf& buf) const {
+  if (buf.len < 8) {
+    return 0;
+  }
+  int64_t result = *(int64_t*)buf.data;
+  buf.data += 8;
+  buf.len  -= 8;
+  return result;
+}
+
+string TBinaryProtocol::writeString(const string& str) const {
+  uint32_t size = str.size();
+  string result = string((const char*)&size, 4);
+  return result + str;
+}
+
+string TBinaryProtocol::writeByte(const uint8_t byte) const {
+  return string((const char*)&byte, 1);
+}
+
+string TBinaryProtocol::writeU32(const uint32_t u32) const {
+  return string((const char*)&u32, 4);
+}
+
+string TBinaryProtocol::writeI32(int32_t i32) const {
+  return string((const char*)&i32, 4);
+}
+
+string TBinaryProtocol::writeU64(uint64_t u64) const {
+  return string((const char*)&u64, 8);
+}
+
+string TBinaryProtocol::writeI64(int64_t i64) const {
+  return string((const char*)&i64, 8);
+}
diff --git a/lib/cpp/protocol/TBinaryProtocol.h b/lib/cpp/protocol/TBinaryProtocol.h
new file mode 100644
index 0000000..976c383
--- /dev/null
+++ b/lib/cpp/protocol/TBinaryProtocol.h
@@ -0,0 +1,42 @@
+#ifndef T_BINARY_PROTOCOL_H
+#define T_BINARY_PROTOCOL_H
+
+#include "protocol/TProtocol.h"
+
+/**
+ * The default binary protocol for thrift. Writes all data in a very basic
+ * binary format, essentially just spitting out the raw bytes.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class TBinaryProtocol : public TProtocol {
+ public:
+  TBinaryProtocol() {}
+  ~TBinaryProtocol() {}
+
+  std::string
+    readFunction(TBuf& buf) const;
+  std::string
+    writeFunction(const std::string& name, const std::string& args) const;
+
+  std::map<uint32_t, TBuf>
+    readStruct(TBuf& buf) const;
+  std::string
+    writeStruct(const std::map<uint32_t,std::string>& s) const;
+
+  std::string readString  (TBuf& buf) const;
+  uint8_t     readByte    (TBuf& buf) const;
+  uint32_t    readU32     (TBuf& buf) const;
+  int32_t     readI32     (TBuf& buf) const;
+  uint64_t    readU64     (TBuf& buf) const;
+  int64_t     readI64     (TBuf& buf) const;
+
+  std::string writeString (const std::string& str) const;
+  std::string writeByte   (const uint8_t  byte)    const;
+  std::string writeU32    (const uint32_t u32)     const;
+  std::string writeI32    (const int32_t  i32)     const;
+  std::string writeU64    (const uint64_t u64)     const;
+  std::string writeI64    (const int64_t  i64)     const;
+};
+
+#endif
diff --git a/lib/cpp/protocol/TProtocol.h b/lib/cpp/protocol/TProtocol.h
new file mode 100644
index 0000000..1f2e0c8
--- /dev/null
+++ b/lib/cpp/protocol/TProtocol.h
@@ -0,0 +1,88 @@
+#ifndef T_PROTOCOL_H
+#define T_PROTOCOL_H
+
+#include <sys/types.h>
+#include <string>
+#include <map>
+
+/** Forward declaration for TProtocol */
+struct TBuf;
+
+/**
+ * Abstract class for a thrift protocol driver. These are all the methods that
+ * a protocol must implement. Essentially, there must be some way of reading
+ * and writing all the base types, plus a mechanism for writing out structs
+ * with indexed fields. Also notice that all methods are strictly const. This
+ * is by design. Protcol impelementations may NOT keep state, because the
+ * same TProtocol object may be used simultaneously by multiple threads. This
+ * theoretically introduces some limititations into the possible protocol
+ * formats, but with the benefit of performance, clarity, and simplicity.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class TProtocol {
+ public:
+  virtual ~TProtocol() {}
+
+  /**
+   * Function call serialization.
+   */
+
+  virtual std::string
+    readFunction(TBuf& buf) const = 0;
+  virtual std::string
+    writeFunction(const std::string& name, const std::string& args) const = 0;
+
+  /**
+   * Struct serialization.
+   */
+
+  virtual std::map<uint32_t, TBuf>
+    readStruct(TBuf& buf) const = 0;
+  virtual std::string
+    writeStruct(const std::map<uint32_t,std::string>& s) const = 0;
+
+  /**
+   * Basic data type deserialization. Note that these read methods do not
+   * take a const reference to the TBuf object. They SHOULD change the TBuf
+   * object so that it reflects the buffer AFTER the basic data type has
+   * been consumed such that data may continue being read serially from the
+   * buffer.
+   */
+
+  virtual std::string readString  (TBuf& buf) const = 0;
+  virtual uint8_t     readByte    (TBuf& buf) const = 0;
+  virtual uint32_t    readU32     (TBuf& buf) const = 0;
+  virtual int32_t     readI32     (TBuf& buf) const = 0;
+  virtual uint64_t    readU64     (TBuf& buf) const = 0;
+  virtual int64_t     readI64     (TBuf& buf) const = 0;
+
+  virtual std::string writeString (const std::string& str) const = 0;
+  virtual std::string writeByte   (const uint8_t  byte)    const = 0;
+  virtual std::string writeU32    (const uint32_t u32)     const = 0;
+  virtual std::string writeI32    (const int32_t  i32)     const = 0;
+  virtual std::string writeU64    (const uint64_t u64)     const = 0;
+  virtual std::string writeI64    (const int64_t  i64)     const = 0;
+
+ protected:
+  TProtocol() {}
+};
+
+/**
+ * Wrapper around raw data that allows us to track the length of a data
+ * buffer. It is the responsibility of a robust TProtocol implementation
+ * to ensure that any reads that are done from data do NOT overrun the
+ * memory address at data+len. It is also a convention that TBuf objects
+ * do NOT own the memory pointed to by data. They are merely wrappers
+ * around buffers that have been allocated elsewhere. Therefore, the user
+ * should never allocate memory before putting it into a TBuf nor should
+ * they free the data pointed to by a TBuf.
+ */
+struct TBuf {
+  TBuf(const TBuf& that) : data(that.data), len(that.len) {}
+  TBuf(const uint8_t* d, uint32_t l) : data(d), len(l) {}
+  const uint8_t* data;
+  uint32_t len;
+};
+
+#endif