THRIFT-2346 C#: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
authorJens Geyer <jensg@apache.org>
Fri, 7 Feb 2014 21:22:36 +0000 (22:22 +0100)
committerJens Geyer <jensg@apache.org>
Fri, 7 Feb 2014 21:22:36 +0000 (22:22 +0100)
Patch: Jens Geyer

lib/csharp/src/Protocol/TJSONProtocol.cs
lib/csharp/test/JSON/JSONTest.csproj [new file with mode: 0644]
lib/csharp/test/JSON/Program.cs [new file with mode: 0644]
lib/csharp/test/JSON/Properties/AssemblyInfo.cs [new file with mode: 0644]
lib/csharp/test/JSON/app.config [new file with mode: 0644]

index 14db9cc..9c62bec 100644 (file)
@@ -58,7 +58,6 @@ namespace Thrift.Protocol
                private static byte[] RBRACKET = new byte[] { (byte)']' };
                private static byte[] QUOTE = new byte[] { (byte)'"' };
                private static byte[] BACKSLASH = new byte[] { (byte)'\\' };
-               private static byte[] ZERO = new byte[] { (byte)'0' };
 
                private byte[] ESCSEQ = new byte[] { (byte)'\\', (byte)'u', (byte)'0', (byte)'0' };
 
@@ -735,28 +734,38 @@ namespace Thrift.Protocol
                                {
                                        break;
                                }
-                               if (ch == ESCSEQ[0])
+
+                               // escaped?
+                               if (ch != ESCSEQ[0])
+                               {\r
+                                       buffer.Write(new byte[] { (byte)ch }, 0, 1);
+                                       continue;
+                               }
+
+                               // distinguish between \uXXXX and \?
+                               ch = reader.Read();
+                               if (ch != ESCSEQ[1])  // control chars like \n
                                {
-                                       ch = reader.Read();
-                                       if (ch == ESCSEQ[1])
+                                       int off = Array.IndexOf(ESCAPE_CHARS, (char)ch);
+                                       if (off == -1)
                                        {
-                                               ReadJSONSyntaxChar(ZERO);
-                                               ReadJSONSyntaxChar(ZERO);
-                                               trans.ReadAll(tempBuffer, 0, 2);
-                                               ch = (byte)((HexVal((byte)tempBuffer[0]) << 4) + HexVal(tempBuffer[1]));
+                                               throw new TProtocolException(TProtocolException.INVALID_DATA,
+                                                                                                               "Expected control char");
                                        }
-                                       else
-                                       {
-                                               int off = Array.IndexOf(ESCAPE_CHARS, (char)ch);
-                                               if (off == -1)
-                                               {
-                                                       throw new TProtocolException(TProtocolException.INVALID_DATA,
-                                                                                                                "Expected control char");
-                                               }
-                                               ch = ESCAPE_CHAR_VALS[off];
-                                       }
-                               }
-                               buffer.Write(new byte[] { (byte)ch }, 0, 1);
+                                       ch = ESCAPE_CHAR_VALS[off];\r
+                                       buffer.Write(new byte[] { (byte)ch }, 0, 1);\r
+                                       continue;
+                               }\r
+\r
+\r
+                               // it's \uXXXX\r
+                               trans.ReadAll(tempBuffer, 0, 4);\r
+                               var wch = (short)((HexVal((byte)tempBuffer[0]) << 12) +\r
+                                                                 (HexVal((byte)tempBuffer[1]) << 8) +\r
+                                                                 (HexVal((byte)tempBuffer[2]) << 4) + \r
+                                                                  HexVal(tempBuffer[3]));\r
+                               var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });\r
+                               buffer.Write(tmp, 0, tmp.Length);\r
                        }
                        return buffer.ToArray();
                }
diff --git a/lib/csharp/test/JSON/JSONTest.csproj b/lib/csharp/test/JSON/JSONTest.csproj
new file mode 100644 (file)
index 0000000..73303b8
--- /dev/null
@@ -0,0 +1,67 @@
+\feff<?xml version="1.0" encoding="utf-8"?>\r
+<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
+  <PropertyGroup>\r
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>\r
+    <Platform Condition=" '$(Platform)' == '' ">x86</Platform>\r
+    <ProductVersion>8.0.30703</ProductVersion>\r
+    <SchemaVersion>2.0</SchemaVersion>\r
+    <ProjectGuid>{E37A0034-DCBF-4886-A0DA-25A03D12D975}</ProjectGuid>\r
+    <OutputType>Exe</OutputType>\r
+    <AppDesignerFolder>Properties</AppDesignerFolder>\r
+    <RootNamespace>JSONTest</RootNamespace>\r
+    <AssemblyName>JSONTest</AssemblyName>\r
+    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>\r
+    <TargetFrameworkProfile>\r
+    </TargetFrameworkProfile>\r
+    <FileAlignment>512</FileAlignment>\r
+  </PropertyGroup>\r
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">\r
+    <PlatformTarget>x86</PlatformTarget>\r
+    <DebugSymbols>true</DebugSymbols>\r
+    <DebugType>full</DebugType>\r
+    <Optimize>false</Optimize>\r
+    <OutputPath>bin\Debug\</OutputPath>\r
+    <DefineConstants>DEBUG;TRACE</DefineConstants>\r
+    <ErrorReport>prompt</ErrorReport>\r
+    <WarningLevel>4</WarningLevel>\r
+  </PropertyGroup>\r
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">\r
+    <PlatformTarget>x86</PlatformTarget>\r
+    <DebugType>pdbonly</DebugType>\r
+    <Optimize>true</Optimize>\r
+    <OutputPath>bin\Release\</OutputPath>\r
+    <DefineConstants>TRACE</DefineConstants>\r
+    <ErrorReport>prompt</ErrorReport>\r
+    <WarningLevel>4</WarningLevel>\r
+  </PropertyGroup>\r
+  <ItemGroup>\r
+    <Reference Include="System" />\r
+    <Reference Include="System.Core" />\r
+    <Reference Include="System.Xml.Linq" />\r
+    <Reference Include="System.Data.DataSetExtensions" />\r
+    <Reference Include="Microsoft.CSharp" />\r
+    <Reference Include="System.Data" />\r
+    <Reference Include="System.Xml" />\r
+  </ItemGroup>\r
+  <ItemGroup>\r
+    <Compile Include="Program.cs" />\r
+    <Compile Include="Properties\AssemblyInfo.cs" />\r
+  </ItemGroup>\r
+  <ItemGroup>\r
+    <None Include="app.config" />\r
+  </ItemGroup>\r
+  <ItemGroup>\r
+    <ProjectReference Include="..\..\src\Thrift.csproj">\r
+      <Project>{499EB63C-D74C-47E8-AE48-A2FC94538E9D}</Project>\r
+      <Name>Thrift</Name>\r
+    </ProjectReference>\r
+  </ItemGroup>\r
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />\r
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. \r
+       Other similar extension points exist, see Microsoft.Common.targets.\r
+  <Target Name="BeforeBuild">\r
+  </Target>\r
+  <Target Name="AfterBuild">\r
+  </Target>\r
+  -->\r
+</Project>
\ No newline at end of file
diff --git a/lib/csharp/test/JSON/Program.cs b/lib/csharp/test/JSON/Program.cs
new file mode 100644 (file)
index 0000000..7bdb7f5
--- /dev/null
@@ -0,0 +1,55 @@
+\feff/**\r
+ * Licensed to the Apache Software Foundation (ASF) under one\r
+ * or more contributor license agreements. See the NOTICE file\r
+ * distributed with this work for additional information\r
+ * regarding copyright ownership. The ASF licenses this file\r
+ * to you under the Apache License, Version 2.0 (the\r
+ * "License"); you may not use this file except in compliance\r
+ * with the License. You may obtain a copy of the License at\r
+ *\r
+ *   http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing,\r
+ * software distributed under the License is distributed on an\r
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\r
+ * KIND, either express or implied. See the License for the\r
+ * specific language governing permissions and limitations\r
+ * under the License.\r
+ */\r
+\r
+using System;\r
+using System.Collections.Generic;\r
+using System.Diagnostics;\r
+using System.IO;\r
+using System.Linq;\r
+using System.Text;\r
+using Thrift.Protocol;\r
+using Thrift.Transport;\r
+\r
+namespace JSONTest\r
+{\r
+    class Program\r
+    {\r
+        static void Main(string[] args)\r
+        {\r
+            TestThrift2336();\r
+        }\r
+\r
+        public static void TestThrift2336()\r
+        {\r
+            const string RUSSIAN_TEXT = "\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435";\r
+            const string RUSSIAN_JSON = "\"\\u0420\\u0443\\u0441\\u0441\\u043a\\u043e\\u0435 \\u041d\\u0430\\u0437\\u0432\\u0430\\u043d\\u0438\\u0435\"";\r
+            \r
+            // prepare buffer with JOSN data\r
+            byte[] rawBytes = new byte[RUSSIAN_JSON.Length];\r
+            for (var i = 0; i < RUSSIAN_JSON.Length; ++i)\r
+                rawBytes[i] = (byte)(RUSSIAN_JSON[i] & (char)0xFF);  // only low bytes\r
+\r
+            // parse and check\r
+            var stm = new MemoryStream(rawBytes);\r
+            var trans = new TStreamTransport(stm, null);\r
+            var prot = new TJSONProtocol(trans);\r
+            Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit");\r
+        }\r
+    }\r
+}\r
diff --git a/lib/csharp/test/JSON/Properties/AssemblyInfo.cs b/lib/csharp/test/JSON/Properties/AssemblyInfo.cs
new file mode 100644 (file)
index 0000000..a60ebc1
--- /dev/null
@@ -0,0 +1,36 @@
+\feffusing System.Reflection;\r
+using System.Runtime.CompilerServices;\r
+using System.Runtime.InteropServices;\r
+\r
+// Allgemeine Informationen über eine Assembly werden über die folgenden \r
+// Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern,\r
+// die mit einer Assembly verknüpft sind.\r
+[assembly: AssemblyTitle("JSONTest")]\r
+[assembly: AssemblyDescription("")]\r
+[assembly: AssemblyConfiguration("")]\r
+[assembly: AssemblyCompany("")]\r
+[assembly: AssemblyProduct("JSONTest")]\r
+[assembly: AssemblyCopyright("Copyright ©  2014")]\r
+[assembly: AssemblyTrademark("")]\r
+[assembly: AssemblyCulture("")]\r
+\r
+// Durch Festlegen von ComVisible auf "false" werden die Typen in dieser Assembly unsichtbar \r
+// für COM-Komponenten. Wenn Sie auf einen Typ in dieser Assembly von \r
+// COM zugreifen müssen, legen Sie das ComVisible-Attribut für diesen Typ auf "true" fest.\r
+[assembly: ComVisible(false)]\r
+\r
+// Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird\r
+[assembly: Guid("2b2e7d56-3e65-4368-92d7-e34d56b7105e")]\r
+\r
+// Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten:\r
+//\r
+//      Hauptversion\r
+//      Nebenversion \r
+//      Buildnummer\r
+//      Revision\r
+//\r
+// Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern \r
+// übernehmen, indem Sie "*" eingeben:\r
+// [assembly: AssemblyVersion("1.0.*")]\r
+[assembly: AssemblyVersion("1.0.0.0")]\r
+[assembly: AssemblyFileVersion("1.0.0.0")]\r
diff --git a/lib/csharp/test/JSON/app.config b/lib/csharp/test/JSON/app.config
new file mode 100644 (file)
index 0000000..cb2586b
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>\r
+<configuration>\r
+<startup><supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.0"/></startup></configuration>\r