From c707855048258ab60e78d602509513b1394ec962 Mon Sep 17 00:00:00 2001 From: Bryan Duxbury Date: Tue, 10 Feb 2009 18:10:57 +0000 Subject: [PATCH] THRIFT-318. java: Performance of HashSet for enumeration VALID_VALUES seems poor Instead of a HashSet, enums will now use the special IntRangeSet implementation. git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@743037 13f79535-47bb-0310-9956-ffa450edef68 --- compiler/cpp/src/generate/t_java_generator.cc | 16 +- .../src/org/apache/thrift/IntRangeSet.java | 152 ++++++++++++++++++ 2 files changed, 162 insertions(+), 6 deletions(-) create mode 100644 lib/java/src/org/apache/thrift/IntRangeSet.java diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc index d6d7aea9..a9ad54f3 100644 --- a/compiler/cpp/src/generate/t_java_generator.cc +++ b/compiler/cpp/src/generate/t_java_generator.cc @@ -314,7 +314,8 @@ void t_java_generator::generate_enum(t_enum* tenum) { f_enum << string() + "import java.util.Set;\n" + "import java.util.HashSet;\n" + - "import java.util.Collections;\n" << endl; + "import java.util.Collections;\n" + + "import org.apache.thrift.IntRangeSet;\n"<< endl; f_enum << "public class " << tenum->get_name() << " "; @@ -337,15 +338,18 @@ void t_java_generator::generate_enum(t_enum* tenum) { // Create a static Set with all valid values for this enum f_enum << endl; - indent(f_enum) << "public static final Set VALID_VALUES = Collections.unmodifiableSet(new HashSet(){{" << endl; + indent(f_enum) << "public static final IntRangeSet VALID_VALUES = new IntRangeSet("; indent_up(); + bool first = true; for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) { - // populate set - if ((*c_iter)->has_value()) - indent(f_enum) << "add(" << (*c_iter)->get_value() << ");" << endl; + // populate set + if ((*c_iter)->has_value()) { + f_enum << (first ? "" : ", ") << (*c_iter)->get_name(); + first = false; + } } indent_down(); - indent(f_enum) << "}});" << endl; + f_enum << ");" << endl; scope_down(f_enum); f_enum.close(); diff --git a/lib/java/src/org/apache/thrift/IntRangeSet.java b/lib/java/src/org/apache/thrift/IntRangeSet.java new file mode 100644 index 00000000..a12a6271 --- /dev/null +++ b/lib/java/src/org/apache/thrift/IntRangeSet.java @@ -0,0 +1,152 @@ +package org.apache.thrift; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * IntRangeSet is a specialized Set implementation designed + * specifically to make the generated validate() method calls faster. It groups + * the set values into ranges, and in the contains() call, it does + * num ranges * 2 comparisons max. For the common case, which is a single, + * contiguous range, this approach is about 60% faster than using a HashSet. If + * you had a very ragged value set, like all the odd numbers, for instance, + * then you would end up with pretty poor running time. + */ +public class IntRangeSet implements Set { + /** + * This array keeps the bounds of each extent in alternating cells, always + * increasing. Example: [0,5,10,15], which corresponds to 0-5, 10-15. + */ + private int[] extents; + + /** + * We'll keep a duplicate, real HashSet around internally to satisfy some of + * the other set operations. + */ + private Set realSet = new HashSet(); + + public IntRangeSet(int... values) { + Arrays.sort(values); + + List extent_list = new ArrayList(); + + int ext_start = values[0]; + int ext_end_so_far = values[0]; + for (int i = 1; i < values.length; i++) { + realSet.add(values[i]); + + if (values[i] == ext_end_so_far + 1) { + // advance the end so far + ext_end_so_far = values[i]; + } else { + // create an extent for everything we saw so far, move on to the next one + extent_list.add(ext_start); + extent_list.add(ext_end_so_far); + ext_start = values[i]; + ext_end_so_far = values[i]; + } + } + extent_list.add(ext_start); + extent_list.add(ext_end_so_far); + + extents = new int[extent_list.size()]; + for (int i = 0; i < extent_list.size(); i++) { + extents[i] = extent_list.get(i); + } + } + + public boolean add(Integer i) { + throw new UnsupportedOperationException(); + } + + public void clear() { + throw new UnsupportedOperationException(); + } + + public boolean addAll(Collection arg0) { + throw new UnsupportedOperationException(); + } + + /** + * While this method is here for Set interface compatibility, you should avoid + * using it. It incurs boxing overhead! Use the int method directly, instead. + */ + public boolean contains(Object arg0) { + return contains(((Integer)arg0).intValue()); + } + + /** + * This is much faster, since it doesn't stop at Integer on the way through. + * @param val the value you want to check set membership for + * @return true if val was found, false otherwise + */ + public boolean contains(int val) { + for (int i = 0; i < extents.length / 2; i++) { + if (val < extents[i*2]) { + return false; + } else if (val <= extents[i*2+1]) { + return true; + } + } + + return false; + } + + public boolean containsAll(Collection arg0) { + for (Object o : arg0) { + if (!contains(o)) { + return false; + } + } + return true; + } + + public boolean isEmpty() { + return realSet.isEmpty(); + } + + public Iterator iterator() { + return realSet.iterator(); + } + + public boolean remove(Object arg0) { + throw new UnsupportedOperationException(); + } + + public boolean removeAll(Collection arg0) { + throw new UnsupportedOperationException(); + } + + public boolean retainAll(Collection arg0) { + throw new UnsupportedOperationException(); + } + + public int size() { + return realSet.size(); + } + + public Object[] toArray() { + return realSet.toArray(); + } + + public T[] toArray(T[] arg0) { + return realSet.toArray(arg0); + } + + @Override + public String toString() { + String buf = ""; + for (int i = 0; i < extents.length / 2; i++) { + if (i != 0) { + buf += ", "; + } + buf += "[" + extents[i*2] + "," + extents[i*2+1] + "]"; + } + return buf; + } +} -- 2.17.1