From: Bryan Duxbury Date: Tue, 19 Oct 2010 23:00:27 +0000 (+0000) Subject: THRIFT-947. java: Provide a helper method to determine the TProtocol used to serializ... X-Git-Tag: 0.6.0~65 X-Git-Url: https://source.supwisdom.com/gerrit/gitweb?a=commitdiff_plain;h=206c0dc1af226521eb7fccf3890182741fb56cc1;p=common%2Fthrift.git THRIFT-947. java: Provide a helper method to determine the TProtocol used to serialize some data. Patch: Mathias Herberts git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@1024455 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/lib/java/src/org/apache/thrift/protocol/TProtocolUtil.java b/lib/java/src/org/apache/thrift/protocol/TProtocolUtil.java index 9bf10f67..134eafc8 100644 --- a/lib/java/src/org/apache/thrift/protocol/TProtocolUtil.java +++ b/lib/java/src/org/apache/thrift/protocol/TProtocolUtil.java @@ -155,4 +155,77 @@ public class TProtocolUtil { break; } } + + /** + * Attempt to determine the protocol used to serialize some data. + * + * The guess is based on known specificities of supported protocols. + * In some cases, no guess can be done, in that case we return the + * fallback TProtocolFactory. + * To be certain to correctly detect the protocol, the first encoded + * field should have a field id < 256 + * + * @param data The serialized data to guess the protocol for. + * @param fallback The TProtocol to return if no guess can be made. + * @return a Class implementing TProtocolFactory which can be used to create a deserializer. + */ + public static TProtocolFactory guessProtocolFactory(byte[] data, TProtocolFactory fallback) { + // + // If the first and last bytes are opening/closing curly braces we guess the protocol as + // being TJSONProtocol. + // It could not be a TCompactBinary encoding for a field of type 0xb (Map) + // with delta id 7 as the last byte for TCompactBinary is always 0. + // + + if ('{' == data[0] && '}' == data[data.length - 1]) { + return new TJSONProtocol.Factory(); + } + + // + // If the last byte is not 0, then it cannot be TCompactProtocol, it must be + // TBinaryProtocol. + // + + if (data[data.length - 1] != 0) { + return new TBinaryProtocol.Factory(); + } + + // + // A first byte of value > 16 indicates TCompactProtocol was used, and the first byte + // encodes a delta field id (id <= 15) and a field type. + // + + if (data[0] > 0x10) { + return new TCompactProtocol.Factory(); + } + + // + // If the second byte is 0 then it is a field id < 256 encoded by TBinaryProtocol. + // It cannot possibly be TCompactProtocol since a value of 0 would imply a field id + // of 0 as the zig zag varint encoding would end. + // + + if (data.length > 1 && 0 == data[1]) { + return new TBinaryProtocol.Factory(); + } + + // + // If bit 7 of the first byte of the field id is set then we have two choices: + // 1. A field id > 63 was encoded with TCompactProtocol. + // 2. A field id > 0x7fff (32767) was encoded with TBinaryProtocol and the last byte of the + // serialized data is 0. + // Option 2 is impossible since field ids are short and thus limited to 32767. + // + + if (data.length > 1 && (data[1] & 0x80) != 0) { + return new TCompactProtocol.Factory(); + } + + // + // The remaining case is either a field id <= 63 encoded as TCompactProtocol, + // one >= 256 encoded with TBinaryProtocol with a last byte at 0, or an empty structure. + // As we cannot really decide, we return the fallback protocol. + // + return fallback; + } } diff --git a/lib/java/test/org/apache/thrift/protocol/TestTProtocolUtil.java b/lib/java/test/org/apache/thrift/protocol/TestTProtocolUtil.java new file mode 100644 index 00000000..199c7075 --- /dev/null +++ b/lib/java/test/org/apache/thrift/protocol/TestTProtocolUtil.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.thrift.protocol; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; + +import junit.framework.TestCase; + +import org.apache.thrift.Fixtures; +import org.apache.thrift.TBase; +import org.apache.thrift.TDeserializer; +import org.apache.thrift.TException; +import org.apache.thrift.TSerializer; +import org.apache.thrift.transport.TMemoryBuffer; + +import thrift.test.CompactProtoTestStruct; +import thrift.test.HolyMoley; +import thrift.test.Nesting; +import thrift.test.OneOfEach; +import thrift.test.Srv; +import thrift.test.GuessProtocolStruct; + +public class TestTProtocolUtil extends TestCase { + + public void testGuessProtocolFactory_JSON() throws Exception { + + byte[] data = "{foo}".getBytes(); + TProtocolFactory factory = TProtocolUtil.guessProtocolFactory(data, new TCompactProtocol.Factory()); + assertTrue(factory instanceof TJSONProtocol.Factory); + + // Make sure data serialized with TCompact and which starts with '{' + // is not mistakenly guessed as serialized with JSON. + + GuessProtocolStruct s = new GuessProtocolStruct(); + s.putToMap_field("}","}"); + byte[] ser = new TSerializer(new TCompactProtocol.Factory()).serialize(s); + factory = TProtocolUtil.guessProtocolFactory(ser, new TCompactProtocol.Factory()); + assertFalse(factory instanceof TJSONProtocol.Factory); + } + + public void testGuessProtocolFactory_Binary() throws Exception { + // Check that a last byte != 0 is correctly reported as Binary + + byte[] buf = new byte[1]; + for (int i = 1; i < 256; i++) { + buf[0] = (byte) i; + TProtocolFactory factory = TProtocolUtil.guessProtocolFactory(buf, new TCompactProtocol.Factory()); + assertTrue(factory instanceof TBinaryProtocol.Factory); + } + + // Check that a second byte set to 0 is reported as Binary + buf = new byte[2]; + TProtocolFactory factory = TProtocolUtil.guessProtocolFactory(buf, new TCompactProtocol.Factory()); + assertTrue(factory instanceof TBinaryProtocol.Factory); + } + + public void testGuessProtocolFactory_Compact() throws Exception { + // Check that a first byte > 0x10 is reported as Compact + byte[] buf = new byte[3]; + buf[0] = 0x11; + TProtocolFactory factory = TProtocolUtil.guessProtocolFactory(buf, new TBinaryProtocol.Factory()); + assertTrue(factory instanceof TCompactProtocol.Factory); + + // Check that second byte >= 0x80 is reported as Compact + buf[0] = 0; + for (int i = 0x80; i < 0x100; i++) { + buf[1] = (byte) i; + factory = TProtocolUtil.guessProtocolFactory(buf, new TBinaryProtocol.Factory()); + assertTrue(factory instanceof TCompactProtocol.Factory); + } + } + + public void testGuessProtocolFactory_Undecided() throws Exception { + byte[] buf = new byte[3]; + buf[1] = 0x7e; + TProtocolFactory factory = TProtocolUtil.guessProtocolFactory(buf, new TSimpleJSONProtocol.Factory()); + assertTrue(factory instanceof TSimpleJSONProtocol.Factory); + } +} diff --git a/test/ThriftTest.thrift b/test/ThriftTest.thrift index 66ec2b5a..ce324ef6 100644 --- a/test/ThriftTest.thrift +++ b/test/ThriftTest.thrift @@ -187,3 +187,7 @@ struct ListTypeVersioningV2 { 1: list strings; 2: string hello; } + +struct GuessProtocolStruct { + 7: map map_field, +}