From: Bryan Duxbury Date: Sat, 1 May 2010 13:45:38 +0000 (+0000) Subject: THRIFT-765. java: Revert the changes applied by THRIFT-765, as they appear to be... X-Git-Tag: 0.3.0~21 X-Git-Url: https://source.supwisdom.com/gerrit/gitweb?a=commitdiff_plain;h=06491d7bb591f8b55d4be6b116b246c56af66511;p=common%2Fthrift.git THRIFT-765. java: Revert the changes applied by THRIFT-765, as they appear to be unstable git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@940013 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/lib/java/src/org/apache/thrift/Utf8Helper.java b/lib/java/src/org/apache/thrift/Utf8Helper.java deleted file mode 100644 index 2d3fd267..00000000 --- a/lib/java/src/org/apache/thrift/Utf8Helper.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.apache.thrift; - -public final class Utf8Helper { - private Utf8Helper() {} - - public static final int getByteLength(final String s) { - int byteLength = 0; - int codePoint; - for (int i = 0; i < s.length(); i++) { - codePoint = s.charAt(i); - if (codePoint >= 0x07FF) { - codePoint = s.codePointAt(i); - if (Character.isSupplementaryCodePoint(codePoint)) { - i++; - } - } - if (codePoint >= 0 && codePoint <= 0x007F) { - byteLength++; - } else if (codePoint >= 0x80 && codePoint <= 0x07FF) { - byteLength += 2; - } else if ((codePoint >= 0x0800 && codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) { - byteLength+=3; - } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) { - byteLength+=4; - } else { - throw new RuntimeException("Unknown unicode codepoint in string! " - + Integer.toHexString(codePoint)); - } - } - return byteLength; - } - - public static byte[] encode(String s) { - byte[] buf = new byte[getByteLength(s)]; - encode(s, buf, 0); - return buf; - } - - public static void encode(final String s, final byte[] buf, final int offset) { - int nextByte = 0; - int codePoint; - final int strLen = s.length(); - for (int i = 0; i < strLen; i++) { - codePoint = s.charAt(i); - if (codePoint >= 0x07FF) { - codePoint = s.codePointAt(i); - if (Character.isSupplementaryCodePoint(codePoint)) { - i++; - } - } - if (codePoint <= 0x007F) { - buf[offset + nextByte] = (byte)codePoint; - nextByte++; - } else if (codePoint <= 0x7FF) { - buf[offset + nextByte ] = (byte)(0xC0 | ((codePoint >> 6) & 0x1F)); - buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 0) & 0x3F)); - nextByte+=2; - } else if ((codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) { - buf[offset + nextByte ] = (byte)(0xE0 | ((codePoint >> 12) & 0x0F)); - buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 6) & 0x3F)); - buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >> 0) & 0x3F)); - nextByte+=3; - } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) { - buf[offset + nextByte ] = (byte)(0xF0 | ((codePoint >> 18) & 0x07)); - buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 12) & 0x3F)); - buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >> 6) & 0x3F)); - buf[offset + nextByte + 3] = (byte)(0x80 | ((codePoint >> 0) & 0x3F)); - nextByte+=4; - } else { - throw new RuntimeException("Unknown unicode codepoint in string! " - + Integer.toHexString(codePoint)); - } - } - } - - public static String decode(byte[] buf) { - char[] charBuf = new char[buf.length]; - int charsDecoded = decode(buf, 0, buf.length, charBuf); - return new String(charBuf, 0, charsDecoded); - } - - public static final int UNI_SUR_HIGH_START = 0xD800; - public static final int UNI_SUR_HIGH_END = 0xDBFF; - public static final int UNI_SUR_LOW_START = 0xDC00; - public static final int UNI_SUR_LOW_END = 0xDFFF; - public static final int UNI_REPLACEMENT_CHAR = 0xFFFD; - - private static final int HALF_BASE = 0x0010000; - private static final long HALF_SHIFT = 10; - private static final long HALF_MASK = 0x3FFL; - - public static int decode(final byte[] buf, final int offset, final int byteLength, final char[] charBuf) { - int curByteIdx = offset; - int endByteIdx = offset + byteLength; - - int curCharIdx = 0; - - while (curByteIdx < endByteIdx) { - final int b = buf[curByteIdx++]&0xff; - final int ch; - - if (b < 0xC0) { - ch = b; - } else if (b < 0xE0) { - ch = ((b & 0x1F) << 6) + (buf[curByteIdx++] & 0x3F); - } else if (b < 0xf0) { - ch = ((b & 0xF) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F); - } else { - ch = ((b & 0x7) << 18) + ((buf[curByteIdx++]& 0x3F) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F); - } - - if (ch <= 0xFFFF) { - // target is a character <= 0xFFFF - charBuf[curCharIdx++] = (char) ch; - } else { - // target is a character in range 0xFFFF - 0x10FFFF - final int chHalf = ch - HALF_BASE; - charBuf[curCharIdx++] = (char) ((chHalf >> HALF_SHIFT) + UNI_SUR_HIGH_START); - charBuf[curCharIdx++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START); - } - } - return curCharIdx; - } -} diff --git a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java index 9e763480..1cfa69dc 100644 --- a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java +++ b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java @@ -19,8 +19,9 @@ package org.apache.thrift.protocol; +import java.io.UnsupportedEncodingException; + import org.apache.thrift.TException; -import org.apache.thrift.Utf8Helper; import org.apache.thrift.transport.TTransport; /** @@ -179,9 +180,13 @@ public class TBinaryProtocol extends TProtocol { } public void writeString(String str) throws TException { - byte[] dat = Utf8Helper.encode(str); - writeI32(dat.length); - trans_.write(dat, 0, dat.length); + try { + byte[] dat = str.getBytes("UTF-8"); + writeI32(dat.length); + trans_.write(dat, 0, dat.length); + } catch (UnsupportedEncodingException uex) { + throw new TException("JVM DOES NOT SUPPORT UTF-8"); + } } public void writeBinary(byte[] bin) throws TException { @@ -328,20 +333,27 @@ public class TBinaryProtocol extends TProtocol { int size = readI32(); if (trans_.getBytesRemainingInBuffer() >= size) { - char[] charBuf = new char[size]; - int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), size, charBuf); - trans_.consumeBuffer(size); - return new String(charBuf, 0, charsDecoded); + try { + String s = new String(trans_.getBuffer(), trans_.getBufferPosition(), size, "UTF-8"); + trans_.consumeBuffer(size); + return s; + } catch (UnsupportedEncodingException e) { + throw new TException("JVM DOES NOT SUPPORT UTF-8"); + } } return readStringBody(size); } public String readStringBody(int size) throws TException { - checkReadLength(size); - byte[] buf = new byte[size]; - trans_.readAll(buf, 0, size); - return Utf8Helper.decode(buf); + try { + checkReadLength(size); + byte[] buf = new byte[size]; + trans_.readAll(buf, 0, size); + return new String(buf, "UTF-8"); + } catch (UnsupportedEncodingException uex) { + throw new TException("JVM DOES NOT SUPPORT UTF-8"); + } } public byte[] readBinary() throws TException { diff --git a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java index e81ed828..f4979423 100755 --- a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java +++ b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java @@ -20,9 +20,10 @@ package org.apache.thrift.protocol; +import java.io.UnsupportedEncodingException; + import org.apache.thrift.ShortStack; import org.apache.thrift.TException; -import org.apache.thrift.Utf8Helper; import org.apache.thrift.transport.TTransport; /** @@ -292,7 +293,11 @@ public final class TCompactProtocol extends TProtocol { * Write a string to the wire with a varint size preceeding. */ public void writeString(String str) throws TException { - writeBinary(Utf8Helper.encode(str)); + try { + writeBinary(str.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + throw new TException("UTF-8 not supported!"); + } } /** @@ -605,13 +610,16 @@ public final class TCompactProtocol extends TProtocol { return ""; } - if (trans_.getBytesRemainingInBuffer() >= length) { - char[] charBuf = new char[length]; - int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), length, charBuf); - trans_.consumeBuffer(length); - return new String(charBuf, 0, charsDecoded); - } else { - return Utf8Helper.decode(readBinary(length)); + try { + if (trans_.getBytesRemainingInBuffer() >= length) { + String str = new String(trans_.getBuffer(), trans_.getBufferPosition(), length, "UTF-8"); + trans_.consumeBuffer(length); + return str; + } else { + return new String(readBinary(length), "UTF-8"); + } + } catch (UnsupportedEncodingException e) { + throw new TException("UTF-8 not supported!"); } } diff --git a/lib/java/test/org/apache/thrift/BenchStringEncoding.java b/lib/java/test/org/apache/thrift/BenchStringEncoding.java deleted file mode 100644 index 3ae22c77..00000000 --- a/lib/java/test/org/apache/thrift/BenchStringEncoding.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.apache.thrift; - -import java.io.UnsupportedEncodingException; - -public class BenchStringEncoding { - private static final String STRING = "a moderately long (but not overly long) string"; - private static final int HOW_MANY = 100000; - private static final byte[] BYTES; - static { - try { - BYTES = STRING.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - - public static void main(String[] args) throws UnsupportedEncodingException { - for (int trial = 0; trial < 5; trial++) { - benchGetBytes(); - benchFromBytes(); - benchEncode(); - benchDecode(); - } - } - - private static void benchDecode() { - char[] charBuf = new char[256]; - long start = System.currentTimeMillis(); - for (int i = 0; i < HOW_MANY; i++) { - Utf8Helper.decode(BYTES, 0, BYTES.length, charBuf); - } - long end = System.currentTimeMillis(); - System.out.println("decode: decode: " + (end-start) + "ms"); - } - - private static void benchFromBytes() { - long start = System.currentTimeMillis(); - for (int i = 0; i < HOW_MANY; i++) { - try { - new String(BYTES, "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - long end = System.currentTimeMillis(); - System.out.println("decode: fromBytes: " + (end-start) + "ms"); - } - - private static void benchEncode() { - long start = System.currentTimeMillis(); - byte[] outbuf = new byte[256]; - for (int i = 0; i < HOW_MANY; i++) { - Utf8Helper.encode(STRING, outbuf, 0); - } - long end = System.currentTimeMillis(); - System.out.println("encode: directEncode: " + (end-start) + "ms"); - } - - private static void benchGetBytes() throws UnsupportedEncodingException { - long start = System.currentTimeMillis(); - for (int i = 0; i < HOW_MANY; i++) { - STRING.getBytes("UTF-8"); - } - long end = System.currentTimeMillis(); - System.out.println("encode: getBytes(UTF-8): " + (end-start) + "ms"); - } -} diff --git a/lib/java/test/org/apache/thrift/TestUtf8Helper.java b/lib/java/test/org/apache/thrift/TestUtf8Helper.java deleted file mode 100644 index bdfd35a4..00000000 --- a/lib/java/test/org/apache/thrift/TestUtf8Helper.java +++ /dev/null @@ -1,74 +0,0 @@ -package org.apache.thrift; - -import java.io.UnsupportedEncodingException; -import java.util.Arrays; - -import junit.framework.TestCase; - -public class TestUtf8Helper extends TestCase { - private static final String NON_UNICODE_STRING = "here's some text"; - - private static final byte[] kUnicodeBytes = { - (byte)0xd3, (byte)0x80, (byte)0xe2, (byte)0x85, (byte)0xae, (byte)0xce, - (byte)0x9d, (byte)0x20, (byte)0xd0, (byte)0x9d, (byte)0xce, (byte)0xbf, - (byte)0xe2, (byte)0x85, (byte)0xbf, (byte)0xd0, (byte)0xbe, (byte)0xc9, - (byte)0xa1, (byte)0xd0, (byte)0xb3, (byte)0xd0, (byte)0xb0, (byte)0xcf, - (byte)0x81, (byte)0xe2, (byte)0x84, (byte)0x8e, (byte)0x20, (byte)0xce, - (byte)0x91, (byte)0x74, (byte)0x74, (byte)0xce, (byte)0xb1, (byte)0xe2, - (byte)0x85, (byte)0xbd, (byte)0xce, (byte)0xba, (byte)0x83, (byte)0xe2, - (byte)0x80, (byte)0xbc - }; - - private static final String UNICODE_STRING = "abc\u5639\u563b"; - private static final byte[] UNICODE_STRING_BYTES; - - private static final String UNICODE_STRING_2; - private static final byte[] UNICODE_STRING_BYTES_2; - - private static final String REALLY_WHACKY_ONE = "\uD841\uDC91"; - private static final byte[] REALLY_WHACKY_ONE_BYTES; - - private static final String TWO_CHAR_CHAR = "\uD801\uDC00"; - private static final byte[] TWO_CHAR_CHAR_BYTES; - - static { - try { - UNICODE_STRING_BYTES = UNICODE_STRING.getBytes("UTF-8"); - UNICODE_STRING_2 = new String(kUnicodeBytes, "UTF-8"); - UNICODE_STRING_BYTES_2 = UNICODE_STRING_2.getBytes("UTF-8"); - REALLY_WHACKY_ONE_BYTES = REALLY_WHACKY_ONE.getBytes("UTF-8"); - TWO_CHAR_CHAR_BYTES = TWO_CHAR_CHAR.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - - - public void testEncode() throws Exception { - byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8"); - byte[] otherBytes = Utf8Helper.encode(NON_UNICODE_STRING); - assertTrue(Arrays.equals(bytes, otherBytes)); - - otherBytes = Utf8Helper.encode(UNICODE_STRING); - assertTrue(Arrays.equals(UNICODE_STRING_BYTES, otherBytes)); - - otherBytes = Utf8Helper.encode(UNICODE_STRING_2); - assertTrue(Arrays.equals(UNICODE_STRING_BYTES_2, otherBytes)); - - otherBytes = Utf8Helper.encode(REALLY_WHACKY_ONE); - assertTrue(Arrays.equals(REALLY_WHACKY_ONE_BYTES, otherBytes)); - - otherBytes = Utf8Helper.encode(TWO_CHAR_CHAR); - assertTrue(Arrays.equals(TWO_CHAR_CHAR_BYTES, otherBytes)); - } - - public void testDecode() throws Exception { - byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8"); - assertEquals(NON_UNICODE_STRING, Utf8Helper.decode(bytes)); - - assertEquals(UNICODE_STRING, Utf8Helper.decode(UNICODE_STRING_BYTES)); - assertEquals(UNICODE_STRING_2, Utf8Helper.decode(UNICODE_STRING_BYTES_2)); - assertEquals(REALLY_WHACKY_ONE, Utf8Helper.decode(REALLY_WHACKY_ONE_BYTES)); - assertEquals(TWO_CHAR_CHAR, Utf8Helper.decode(TWO_CHAR_CHAR_BYTES)); - } -}