2010-04-29

Histogram :: array based

For any reasonably sized Histogram, you probably want to look at this Histogram class based on a HashMap. However, if your histograms are tiny, say, less than 8 elements occur frequently, this array based histogram is an order of magnitude faster.

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;

public class TinyHistogram<T>
{
   private T[]   items;
   private int[] usage;
   private int   size;

   public TinyHistogram()
   {
      this.items = (T[]) new Object[4];
      this.usage = new int[4];
      this.size = 0;
   }

   public int put(T item)
   {
      if (item == null)
         throw new IllegalArgumentException();

      int io = this.indexOfItem(item);

      if (io != -1)
      {
         int result = ++this.usage[io];
         this.swapIncreasedValue(io);
         return result;
      }

      // add item to the end
      if (this.size == this.items.length)
         this.grow();
      this.items[this.size] = item;
      this.usage[this.size] = 1;
      this.size += 1;
      return 1;
   }

   public int get(T item)
   {
      if (item == null)
         throw new IllegalArgumentException();

      int io = this.indexOfItem(item);
      if (io == -1)
         return 0; // not -1, as the object occurred zero times
      return this.usage[io];
   }

   public List<T> topKeys()
   {
      return this.topKeys(Integer.MAX_VALUE);
   }

   public List<T> topKeys(int amount)
   {
      int end = Math.min(this.size, amount);

      // make a copy, the items are already sorted
      List<T> tops = new ArrayList<T>();
      for (int i = 0; i < end; i++)
         tops.add(this.items[i]);
      return tops;
   }

   public List<Pair<T, Integer>> topEntries()
   {
      return this.topEntries(Integer.MAX_VALUE);
   }

   public List<Pair<T, Integer>> topEntries(int amount)
   {
      int end = Math.min(this.size, amount);

      // make a copy, the items are already sorted
      List<Pair<T, Integer>> tops = new ArrayList<Pair<T, Integer>>();
      for (int i = 0; i < end; i++)
         tops.add(new Pair<T, Integer>(this.items[i], Integer.valueOf(this.usage[i])));
      return tops;
   }

   public int remove(T item)
   {
      int io = this.indexOfItem(item);
      if (io == -1)
         throw new NoSuchElementException(String.valueOf(item));

      int result = --this.usage[io];
      if (result == 0)
         return this.removeIndex(io);

      this.swapDecreasedValue(io);
      return result;
   }

   public int reset(T item)
   {
      int io = this.indexOfItem(item);
      if (io == -1)
         throw new NoSuchElementException(String.valueOf(item));
      return this.removeIndex(io);
   }

   //

   private final void swapDecreasedValue(int index)
   {
      while ((index + 1) < size && usage[index + 0] < usage[index + 1])
      {
         int a = index + 0;
         int b = index + 1;

         swapObj(items, a, b);
         swapInt(usage, a, b);

         index++;
      }
   }

   private final void swapIncreasedValue(int index)
   {
      while (index > 0 && usage[index + 0] > usage[index - 1])
      {
         int a = index - 0;
         int b = index - 1;

         swapObj(items, a, b);
         swapInt(usage, a, b);

         index--;
      }
   }

   private static final void swapInt(int[] array, int a, int b)
   {
      int temp = array[a];
      array[a] = array[b];
      array[b] = temp;
   }

   private static final void swapObj(Object[] array, int a, int b)
   {
      Object temp = array[a];
      array[a] = array[b];
      array[b] = temp;
   }

   private void grow()
   {
      this.items = Arrays.copyOf(this.items, this.items.length * 2);
      this.usage = Arrays.copyOf(this.usage, this.usage.length * 2);
   }

   private int removeIndex(int index)
   {
      int count = this.usage[index];
      int shift = --this.size - index;
      System.arraycopy(this.items, index + 1, this.items, index, shift);
      System.arraycopy(this.usage, index + 1, this.usage, index, shift);
      return count;
   }

   private int indexOfItem(T item)
   {
      for (int i = 0; i < this.size; i++)
         if (this.items[i] == item || this.items[i].equals(item))
            return i;
      return -1;
   }
}

Slicing direct buffers to workaround 4K overhead

Direct ByteBuffers can have an unexpected massive overhead. For every allocation, a number of bytes equal to the system pagesize (typically 4K) is added. The reason for this is that MappedByteBuffers must be page-aligned. The code used behind the scenes in ByteBuffer.allocateDirect(int size) looks a little something like this:

private static ByteBuffer malloc(int size)
{
   int pageSize = unsafe.getPageSize(); // typically 4096
   long pointer = sun.misc.Unsafe.malloc(size + pageSize);
   long base = (pointer + (pageSize-1)) / pageSize * pageSize;
   ByteBuffer buffer = unsafe.createBufferAt(base);
   return buffer;
}

Code like this will have a massive overhead:
for(int i=0; i<count; i++)
   buffers[i] = ByteBuffer.allocateDirect(64);

Instead use an approach like below:
public class DirectBufferProvider
{
   private static final int ALLOCATION_SIZE = 1024*1024;

   private ByteBuffer currentBuffer = null;

   public ByteBuffer allocate(int size)
   {
      if(size >= ALLOCATION_SIZE)
         return ByteBuffer.allocateDirect(size);

      if(currentBuffer == null || size > currentBuffer.remaining())
         currentBuffer = ByteBuffer.allocateDirect(ALLOCATION_SIZE);

      currentBuffer.limit(currentBuffer.position() + size);
      ByteBuffer result = currentBuffer.slice();
      currentBuffer.position(currentBuffer.limit());
      currentBuffer.limit(currentBuffer.capacity());
      return result;
   }



   private static DirectBufferProvider global_synced = new DirectBufferProvider();

   public static ByteBuffer allocateDirect(int size)
   {
      synchronized(global_synced)
      {
         return global_synced.allocate(size);
      }
   }
}

Note that unlike ByteBuffer.allocateDirect(), the code in DirectBufferProvider.allocate() is not threadsafe. The static (convenience) method is synchronized and should thus not be used in heavily multithreaded code. Using ThreadLocals is even worse, performance wise, so just make new DirectBufferProvider instances when you need them.