From 06ad7218a9aeb05538d9ec09a6fed8e16b9aa1e1 Mon Sep 17 00:00:00 2001 From: Jens Geyer Date: Sun, 16 Feb 2014 15:48:57 +0100 Subject: [PATCH] THRIFT-2365 C# decodes too many binary bytes from JSON Patch: Jens Geyer --- lib/csharp/src/Protocol/TJSONProtocol.cs | 36 +++--- lib/csharp/test/JSON/Program.cs | 137 ++++++++++++++--------- 2 files changed, 103 insertions(+), 70 deletions(-) diff --git a/lib/csharp/src/Protocol/TJSONProtocol.cs b/lib/csharp/src/Protocol/TJSONProtocol.cs index 9c62becb..e1d0e788 100644 --- a/lib/csharp/src/Protocol/TJSONProtocol.cs +++ b/lib/csharp/src/Protocol/TJSONProtocol.cs @@ -737,7 +737,7 @@ namespace Thrift.Protocol // escaped? if (ch != ESCSEQ[0]) - { + { buffer.Write(new byte[] { (byte)ch }, 0, 1); continue; } @@ -752,20 +752,20 @@ namespace Thrift.Protocol throw new TProtocolException(TProtocolException.INVALID_DATA, "Expected control char"); } - ch = ESCAPE_CHAR_VALS[off]; - buffer.Write(new byte[] { (byte)ch }, 0, 1); + ch = ESCAPE_CHAR_VALS[off]; + buffer.Write(new byte[] { (byte)ch }, 0, 1); continue; - } - - - // it's \uXXXX - trans.ReadAll(tempBuffer, 0, 4); - var wch = (short)((HexVal((byte)tempBuffer[0]) << 12) + - (HexVal((byte)tempBuffer[1]) << 8) + - (HexVal((byte)tempBuffer[2]) << 4) + - HexVal(tempBuffer[3])); - var tmp = utf8Encoding.GetBytes(new char[] { (char)wch }); - buffer.Write(tmp, 0, tmp.Length); + } + + + // it's \uXXXX + trans.ReadAll(tempBuffer, 0, 4); + var wch = (short)((HexVal((byte)tempBuffer[0]) << 12) + + (HexVal((byte)tempBuffer[1]) << 8) + + (HexVal((byte)tempBuffer[2]) << 4) + + HexVal(tempBuffer[3])); + var tmp = utf8Encoding.GetBytes(new char[] { (char)wch }); + buffer.Write(tmp, 0, tmp.Length); } return buffer.ToArray(); } @@ -891,7 +891,13 @@ namespace Thrift.Protocol int len = b.Length; int off = 0; int size = 0; - while (len >= 4) + // reduce len to ignore fill bytes + while ((len > 0) && (b[len - 1] == '=')) + { + --len; + } + // read & decode full byte triplets = 4 source bytes + while (len > 4) { // Decode 4 bytes at a time TBase64Utils.decode(b, off, 4, b, size); // NB: decoded in place diff --git a/lib/csharp/test/JSON/Program.cs b/lib/csharp/test/JSON/Program.cs index 7bdb7f5f..d66c78a6 100644 --- a/lib/csharp/test/JSON/Program.cs +++ b/lib/csharp/test/JSON/Program.cs @@ -1,55 +1,82 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.IO; -using System.Linq; -using System.Text; -using Thrift.Protocol; -using Thrift.Transport; - -namespace JSONTest -{ - class Program - { - static void Main(string[] args) - { - TestThrift2336(); - } - - public static void TestThrift2336() - { - const string RUSSIAN_TEXT = "\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"; - const string RUSSIAN_JSON = "\"\\u0420\\u0443\\u0441\\u0441\\u043a\\u043e\\u0435 \\u041d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0435\""; - - // prepare buffer with JOSN data - byte[] rawBytes = new byte[RUSSIAN_JSON.Length]; - for (var i = 0; i < RUSSIAN_JSON.Length; ++i) - rawBytes[i] = (byte)(RUSSIAN_JSON[i] & (char)0xFF); // only low bytes - - // parse and check - var stm = new MemoryStream(rawBytes); - var trans = new TStreamTransport(stm, null); - var prot = new TJSONProtocol(trans); - Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit"); - } - } -} +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Text; +using Thrift.Protocol; +using Thrift.Transport; + +namespace JSONTest +{ + class Program + { + static void Main(string[] args) + { + TestThrift2365(); // JSON binary decodes too much data + TestThrift2336(); // hex encoding using \uXXXX where 0xXXXX > 0xFF + } + + + public static void TestThrift2365() + { + var rnd = new Random(); + for (var len = 0; len < 10; ++len) + { + byte[] dataWritten = new byte[len]; + rnd.NextBytes(dataWritten); + + Stream stm = new MemoryStream(); + TTransport trans = new TStreamTransport(null, stm); + TProtocol prot = new TJSONProtocol(trans); + prot.WriteBinary(dataWritten); + + stm.Position = 0; + trans = new TStreamTransport(stm, null); + prot = new TJSONProtocol(trans); + byte[] dataRead = prot.ReadBinary(); + + Debug.Assert(dataRead.Length == dataWritten.Length); + for (var i = 0; i < dataRead.Length; ++i) + Debug.Assert(dataRead[i] == dataWritten[i]); + } + } + + + public static void TestThrift2336() + { + const string RUSSIAN_TEXT = "\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"; + const string RUSSIAN_JSON = "\"\\u0420\\u0443\\u0441\\u0441\\u043a\\u043e\\u0435 \\u041d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0435\""; + + // prepare buffer with JSON data + byte[] rawBytes = new byte[RUSSIAN_JSON.Length]; + for (var i = 0; i < RUSSIAN_JSON.Length; ++i) + rawBytes[i] = (byte)(RUSSIAN_JSON[i] & (char)0xFF); // only low bytes + + // parse and check + var stm = new MemoryStream(rawBytes); + var trans = new TStreamTransport(stm, null); + var prot = new TJSONProtocol(trans); + Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit"); + } + } +} -- 2.17.1