From 7bb44a33bf2225d6c2220e4c5166669f7975f561 Mon Sep 17 00:00:00 2001 From: Jens Geyer Date: Fri, 7 Feb 2014 22:24:37 +0100 Subject: [PATCH] THRIFT-2345 Delphi: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol Patch: Jens Geyer --- lib/delphi/src/Thrift.Protocol.JSON.pas | 48 ++++++++++++++----------- lib/delphi/test/TestClient.pas | 34 ++++++++++++++++-- 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas index cce6c3cd..6d305d82 100644 --- a/lib/delphi/src/Thrift.Protocol.JSON.pas +++ b/lib/delphi/src/Thrift.Protocol.JSON.pas @@ -254,7 +254,6 @@ var RBRACKET : TBytes; QUOTE : TBytes; BACKSLASH : TBytes; - ZERO : TBytes; ESCSEQ : TBytes; const @@ -815,7 +814,8 @@ end; function TJSONProtocolImpl.ReadJSONString( skipContext : Boolean) : TBytes; var buffer : TMemoryStream; - ch : Byte; + ch : Byte; + wch : Word; off : Integer; tmp : TBytes; begin @@ -832,25 +832,34 @@ begin if (ch = QUOTE[0]) then Break; - if (ch = ESCSEQ[0]) + // check for escapes + if (ch <> ESCSEQ[0]) then begin + buffer.Write( ch, 1); + Continue; + end; + + // distuinguish between \uNNNN and \? + ch := FReader.Read; + if (ch <> ESCSEQ[1]) then begin - ch := FReader.Read; - if (ch = ESCSEQ[1]) - then begin - ReadJSONSyntaxChar( ZERO[0]); - ReadJSONSyntaxChar( ZERO[0]); - SetLength( tmp, 2); - Transport.ReadAll( tmp, 0, 2); - ch := (HexVal(tmp[0]) shl 4) + HexVal(tmp[1]); - end - else begin - off := Pos( Char(ch), ESCAPE_CHARS); - if off < 1 - then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected control char'); - ch := Byte( ESCAPE_CHAR_VALS[off]); - end; + off := Pos( Char(ch), ESCAPE_CHARS); + if off < 1 + then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected control char'); + ch := Byte( ESCAPE_CHAR_VALS[off]); + buffer.Write( ch, 1); + Continue; end; - buffer.Write( ch, 1); + + // it is \uXXXX + SetLength( tmp, 4); + Transport.ReadAll( tmp, 0, 4); + wch := (HexVal(tmp[0]) shl 12) + + (HexVal(tmp[1]) shl 8) + + (HexVal(tmp[2]) shl 4) + + HexVal(tmp[3]); + // we need to make UTF8 bytes from it, to be decoded later + tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch)); + buffer.Write( tmp[0], length(tmp)); end; SetLength( result, buffer.Size); @@ -1174,6 +1183,5 @@ initialization InitBytes( RBRACKET, [Byte(']')]); InitBytes( QUOTE, [Byte('"')]); InitBytes( BACKSLASH, [Byte('\')]); - InitBytes( ZERO, [Byte('0')]); InitBytes( ESCSEQ, [Byte('\'),Byte('u'),Byte('0'),Byte('0')]); end. diff --git a/lib/delphi/test/TestClient.pas b/lib/delphi/test/TestClient.pas index 0f09489a..9fb0b7a1 100644 --- a/lib/delphi/test/TestClient.pas +++ b/lib/delphi/test/TestClient.pas @@ -1,4 +1,4 @@ -(* +(* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -898,6 +898,9 @@ const TEST_DOUBLE = -1.234e-56; DELTA_DOUBLE = TEST_DOUBLE * 1e-14; TEST_STRING = 'abc-'#$00E4#$00f6#$00fc; // german umlauts (en-us: "funny chars") + // Test THRIFT-2336 with 'Русское Название'; + RUSSIAN_TEXT = #$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435; + RUSSIAN_JSON = '"\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"'; // test both possible solidus encodings SOLIDUS_JSON_DATA = '"one/two\/three"'; SOLIDUS_EXCPECTED = 'one/two/three'; @@ -965,6 +968,7 @@ begin Expect( stm.Position = stm.Size, 'Stream position after read'); + // Solidus can be encoded in two ways. Make sure we can read both stm.Position := 0; stm.Size := 0; @@ -976,6 +980,32 @@ begin Expect( prot.ReadString = SOLIDUS_EXCPECTED, 'Solidus encoding'); + // Widechars should work too. Do they? + // After writing, we ensure that we are able to read it back + // We can't assume hex-encoding, since (nearly) any Unicode char is valid JSON + stm.Position := 0; + stm.Size := 0; + prot := TJSONProtocolImpl.Create( + TStreamTransportImpl.Create( + nil, TThriftStreamAdapterDelphi.Create( stm, FALSE))); + prot.WriteString( RUSSIAN_TEXT); + stm.Position := 0; + prot := TJSONProtocolImpl.Create( + TStreamTransportImpl.Create( + TThriftStreamAdapterDelphi.Create( stm, FALSE), nil)); + Expect( prot.ReadString = RUSSIAN_TEXT, 'Writing JSON with chars > 8 bit'); + + // Widechars should work with hex-encoding too. Do they? + stm.Position := 0; + stm.Size := 0; + stm.WriteString( RUSSIAN_JSON); + stm.Position := 0; + prot := TJSONProtocolImpl.Create( + TStreamTransportImpl.Create( + TThriftStreamAdapterDelphi.Create( stm, FALSE), nil)); + Expect( prot.ReadString = RUSSIAN_TEXT, 'Reading JSON with chars > 8 bit'); + + finally stm.Free; prot := nil; //-> Release @@ -1068,10 +1098,10 @@ var begin // perform all tests try + JSONProtocolReadWriteTest; for i := 0 to FNumIteration - 1 do begin ClientTest; - JSONProtocolReadWriteTest; end; except on e:Exception do Expect( FALSE, 'unexpected exception: "'+e.message+'"'); -- 2.17.1