getCurrentStats();
+
+ /**
+ * Update the current statistics with the next {@link KeyValue} to be written
+ * @param kv next {@link KeyValue} to be written
+ */
+ public void updateStatistic(KeyValue kv);
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticValue.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticValue.java
new file mode 100644
index 00000000..009f9ccb
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticValue.java
@@ -0,0 +1,43 @@
+package com.salesforce.hbase.stats;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Simple holder class for a single statistics on a column in a region.
+ *
+ * If you are build a histogram, should use the HistogramStat to store information, which internally
+ * uses a collection of {@link StatisticValue}s to build a larger histogram
+ */
+public class StatisticValue {
+
+ protected byte[] name;
+ protected byte[] info;
+ protected byte[] value;
+
+ public StatisticValue(byte[] name, byte[] info, byte[] value) {
+ this.name = name;
+ this.info = info;
+ this.value = value;
+ }
+
+ public byte[] getType() {
+ return name;
+ }
+
+ public byte[] getInfo() {
+ return info;
+ }
+
+ public byte[] getValue() {
+ return value;
+ }
+
+ protected void setValue(byte[] value) {
+ this.value = value;
+ }
+
+ public String toString(){
+ return "stat " + Bytes.toString(name) + ": [info:" + Bytes.toString(info) + ", value:"
+ + Bytes.toString(value) + "]";
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticsTable.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticsTable.java
new file mode 100644
index 00000000..069fdd52
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/StatisticsTable.java
@@ -0,0 +1,160 @@
+package com.salesforce.hbase.stats;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.PrefixFilter;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.salesforce.hbase.stats.serialization.IndividualStatisticWriter;
+import com.salesforce.hbase.stats.util.Constants;
+
+
+/**
+ * Wrapper to access the statistics table for an HTable.
+ *
+ * Each {@link StatisticsTable} is bound to access the statistics for a single 'primary' table. This
+ * helps decrease the chances of reading/writing the wrong statistic for the source table
+ *
+ * Each statistic is prefixed with the tablename and region from whence it came.
+ */
+public class StatisticsTable implements Closeable {
+
+ private static final Log LOG = LogFactory.getLog(StatisticsTable.class);
+ /** Map of the currently open statistics tables */
+ private static final Map tableMap = new HashMap();
+
+ /**
+ * @param env Environment wherein the coprocessor is attempting to update the stats table.
+ * @param primaryTableName name of the primary table on which we should collect stats
+ * @return the {@link StatisticsTable} for the given primary table.
+ * @throws IOException if the table cannot be created due to an underlying HTable creation error
+ */
+ public synchronized static StatisticsTable getStatisticsTableForCoprocessor(
+ CoprocessorEnvironment env, byte[] primaryTableName) throws IOException {
+ StatisticsTable table = tableMap.get(primaryTableName);
+ if (table == null) {
+ table = new StatisticsTable(env.getTable(Constants.STATS_TABLE_NAME_BYTES), primaryTableName);
+ tableMap.put(Bytes.toString(primaryTableName), table);
+ }
+ return table;
+ }
+
+ private final HTableInterface target;
+ private final byte[] sourceTableName;
+
+ private StatisticsTable(HTableInterface target, byte[] sourceTableName) {
+ this.target = target;
+ this.sourceTableName = sourceTableName;
+ }
+
+ public StatisticsTable(Configuration conf, HTableDescriptor source) throws IOException {
+ this(new HTable(conf, Constants.STATS_TABLE_NAME), source.getName());
+ }
+
+ /**
+ * Close the connection to the table
+ */
+ @Override
+ public void close() throws IOException {
+ target.close();
+ }
+
+ public void removeStats() throws IOException {
+ removeRowsForPrefix(sourceTableName);
+ }
+
+ public void removeStatsForRegion(HRegionInfo region) throws IOException {
+ removeRowsForPrefix(sourceTableName, region.getRegionName());
+ }
+
+ private void removeRowsForPrefix(byte[]... arrays) throws IOException {
+ byte[] row = null;
+ for (byte[] array : arrays) {
+ row = ArrayUtils.addAll(row, array);
+ }
+ Scan scan = new Scan(row);
+ scan.setFilter(new PrefixFilter(row));
+ cleanupRows(scan);
+ }
+
+ /**
+ * Delete all the rows that we find from the scanner
+ * @param scan scan used on the statistics table to determine which keys need to be deleted
+ * @throws IOException if we fail to communicate with the HTable
+ */
+ private void cleanupRows(Scan scan) throws IOException {
+ // Because each region has, potentially, a bunch of different statistics, we need to go through
+ // an delete each of them as we find them
+
+ // TODO switch this to a CP that lets us just do a filtered delete
+
+ // first we have to scan the table to find the rows to delete
+ ResultScanner scanner = target.getScanner(scan);
+ Delete d = null;
+ // XXX possible memory issues here - we could be loading a LOT of stuff as we are doing a
+ // copy for each result
+ for (Result r : scanner) {
+ // create a delete for each result
+ d = new Delete(r.getRow());
+ // let the table figure out when it wants to flush that stuff
+ target.delete(d);
+ }
+ }
+
+ /**
+ * Update a list of statistics for the given region
+ * @param serializer to convert the actual statistics to puts in the statistics table
+ * @param data Statistics for the region that we should update. The type of the
+ * {@link StatisticValue} (T1), is used as a suffix on the row key; this groups different
+ * types of metrics together on a per-region basis. Then the
+ * {@link StatisticValue#getInfo()}is used as the column qualifier. Finally,
+ * {@link StatisticValue#getValue()} is used for the the value of the {@link Put}. This
+ * can be null or empty.
+ * @throws IOException if we fail to do any of the puts. Any single failure will prevent any
+ * future attempts for the remaining list of stats to update
+ */
+ public void updateStats(IndividualStatisticWriter serializer, List data)
+ throws IOException {
+ // short circuit if we have nothing to write
+ if (data == null || data.size() == 0) {
+ return;
+ }
+
+ // serialize each of the metrics with the associated serializer
+ for (StatisticValue metric : data) {
+ LOG.info("Writing statistic: " + metric);
+ target.put(serializer.serialize(metric));
+ }
+ // make sure it all reaches the target table when we are done
+ target.flushCommits();
+ }
+
+ /**
+ * @return the underlying {@link HTableInterface} to which this table is writing
+ */
+ HTableInterface getUnderlyingTable() {
+ return target;
+ }
+
+ byte[] getSourceTableName() {
+ return this.sourceTableName;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/CleanupStatistics.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/CleanupStatistics.java
new file mode 100644
index 00000000..a4135a68
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/CleanupStatistics.java
@@ -0,0 +1,74 @@
+package com.salesforce.hbase.stats.cleanup;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Wrapper class around the necessary cleanup coprocessors.
+ *
+ * We cleanup stats for a table on a couple different instances:
+ *
+ * - On table delete
+ *
+ * - This requires adding a coprocessor on the HMaster and must occure before HMaster startup. Use
+ * {@link #setupClusterConfiguration(Configuration)} to ensure this coprocessor is enabled.
+ *
+ *
+ * - On region split
+ *
+ * - The stats for the parent region of the split are removed from the stats
+ * - This is via a region coprocessor, so it is merely added to the table descriptor via
+ * {@link #setupTable(HTableDescriptor)}
+ *
+ *
+ */
+public class CleanupStatistics {
+ private static final Log LOG = LogFactory.getLog(CleanupStatistics.class);
+
+ public static void verifyConfiguration(Configuration conf) {
+ String[] classes = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
+ List contains = Lists.newArrayList(classes);
+ String removeTableCleanupClassName = RemoveTableOnDelete.class.getName();
+ if (!contains.contains(removeTableCleanupClassName)) {
+ throw new IllegalArgumentException(
+ removeTableCleanupClassName
+ + " must be specified as a master observer to cleanup table statistics, but its missing from the configuration! We only found: "
+ + classes);
+ }
+ }
+
+ public static void setupClusterConfiguration(Configuration conf) {
+ String[] classes = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
+ List toAdd = classes == null ? new ArrayList() : Lists.newArrayList(classes);
+ String removeTableCleanupClassName = RemoveTableOnDelete.class.getName();
+ if (!toAdd.contains(removeTableCleanupClassName)) {
+ toAdd.add(removeTableCleanupClassName);
+ conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, toAdd.toArray(new String[0]));
+ }
+
+ // make sure we didn't screw anything up
+ verifyConfiguration(conf);
+ }
+
+ /**
+ * Add all the necessary cleanup coprocessors to the table
+ * @param desc primary table for which we should cleanup
+ */
+ public static void setupTable(HTableDescriptor desc) {
+ String clazz = RemoveRegionOnSplit.class.getName();
+ try {
+ desc.addCoprocessor(clazz);
+ }catch(IOException e) {
+ LOG.info(clazz +" already added to table, not adding again.");
+ }
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveRegionOnSplit.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveRegionOnSplit.java
new file mode 100644
index 00000000..ee43615c
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveRegionOnSplit.java
@@ -0,0 +1,68 @@
+package com.salesforce.hbase.stats.cleanup;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
+import org.apache.hadoop.hbase.regionserver.ScanType;
+import org.apache.hadoop.hbase.regionserver.Store;
+
+import com.salesforce.hbase.stats.StatisticsTable;
+import com.salesforce.hbase.stats.util.SetupTableUtil;
+
+/**
+ * Cleanup the stats for the parent region on region split
+ */
+public class RemoveRegionOnSplit extends BaseRegionObserver {
+
+ protected StatisticsTable stats;
+
+ @Override
+ public void start(CoprocessorEnvironment e) throws IOException {
+ HTableDescriptor desc = ((RegionCoprocessorEnvironment) e).getRegion().getTableDesc();
+ if (SetupTableUtil.getStatsEnabled(desc)) {
+ stats = StatisticsTable.getStatisticsTableForCoprocessor(e, desc.getName());
+ }
+ }
+
+ @Override
+ public void stop(CoprocessorEnvironment e) throws IOException {
+ if (stats != null) {
+ stats.close();
+ }
+ }
+
+ @Override
+ public void postSplit(ObserverContext e, HRegion l, HRegion r)
+ throws IOException {
+ // stats aren't enabled on the table, so we are done
+ if (stats == null) {
+ return;
+ }
+ // get the parent
+ HRegion parent = e.getEnvironment().getRegion();
+ // and remove it from the stats
+ stats.removeStatsForRegion(parent.getRegionInfo());
+ }
+
+ /**
+ * We override this method to ensure that any scanner from a previous coprocessor is returned. The
+ * default behavior is to return null, which completely hoses any other coprocessors
+ * setup before, making ordering of coprocessors very important. By returning the passed scanner,
+ * we can avoid easy to make configuration errors.
+ */
+ @Override
+ public InternalScanner preCompactScannerOpen(ObserverContext c,
+ Store store, List extends KeyValueScanner> scanners, ScanType scanType, long earliestPutTs,
+ InternalScanner s) throws IOException {
+
+ return s;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveTableOnDelete.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveTableOnDelete.java
new file mode 100644
index 00000000..53482444
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/cleanup/RemoveTableOnDelete.java
@@ -0,0 +1,33 @@
+package com.salesforce.hbase.stats.cleanup;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.salesforce.hbase.stats.StatisticsTable;
+import com.salesforce.hbase.stats.util.SetupTableUtil;
+
+public class RemoveTableOnDelete extends BaseMasterObserver {
+
+ @Override
+ public void preDeleteTable(ObserverContext ctx, byte[] tableName)
+ throws IOException {
+ HTableDescriptor desc = ctx.getEnvironment().getMasterServices().getTableDescriptors()
+ .get(tableName);
+ if (desc == null) {
+ throw new IOException("Can't find table descriptor for table '" + Bytes.toString(tableName)
+ + "' that is about to be deleted!");
+ }
+ // if we have turned on stats for this table
+ if (SetupTableUtil.getStatsEnabled(desc)) {
+ StatisticsTable stats = StatisticsTable.getStatisticsTableForCoprocessor(
+ ctx.getEnvironment(), desc.getName());
+ stats.removeStats();
+ stats.close();
+ }
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/EqualByteDepthHistogramStatisticTracker.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/EqualByteDepthHistogramStatisticTracker.java
new file mode 100644
index 00000000..83e77203
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/EqualByteDepthHistogramStatisticTracker.java
@@ -0,0 +1,96 @@
+package com.salesforce.hbase.stats.impl;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.Coprocessor;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.protobuf.ByteString;
+import com.salesforce.hbase.stats.BaseStatistic;
+import com.salesforce.hbase.stats.HistogramStatisticValue;
+import com.salesforce.hbase.stats.StatisticReader;
+import com.salesforce.hbase.stats.StatisticTracker;
+import com.salesforce.hbase.stats.StatisticValue;
+import com.salesforce.hbase.stats.StatisticsTable;
+import com.salesforce.hbase.stats.serialization.HistogramStatisticReader;
+
+/**
+ * {@link StatisticTracker} that keeps track of an equal depth histogram.
+ *
+ * This is different from a traditional histogram in that we just keep track of the key at every 'n'
+ * bytes; another name for this is region "guide posts".
+ *
+ * When using this statistic, be very careful when selecting the byte width of each column -
+ * it could lead to an incredibly large histogram, which could crash the region server.
+ */
+public class EqualByteDepthHistogramStatisticTracker extends BaseStatistic {
+
+ public static final String BYTE_DEPTH_CONF_KEY = "com.salesforce.guidepost.width";
+
+ private final static byte[] NAME = Bytes.toBytes("equal_depth_histogram");
+
+ private static final long DEFAULT_BYTE_DEPTH = 100;
+
+ private long guidepostDepth;
+ private long byteCount = 0;
+ private HistogramStatisticValue histogram;
+
+ public static void addToTable(HTableDescriptor desc, long depth) throws IOException {
+ Map props = Collections.singletonMap(BYTE_DEPTH_CONF_KEY, Long.toString(depth));
+ desc.addCoprocessor(EqualByteDepthHistogramStatisticTracker.class.getName(), null,
+ Coprocessor.PRIORITY_USER, props);
+ }
+
+ /**
+ * Get a reader for the statistic
+ * @param stats statistics table from which you want to read the stats
+ * @return a {@link StatisticReader} to get the raw Histogram stats.
+ */
+ public static StatisticReader getStatistcReader(StatisticsTable stats) {
+ return new StatisticReader(stats,
+ new HistogramStatisticReader(), NAME);
+ }
+
+ @Override
+ public void start(CoprocessorEnvironment e) throws IOException {
+ super.start(e);
+ //get the byte depth for this histogram
+ guidepostDepth = e.getConfiguration().getLong(BYTE_DEPTH_CONF_KEY, DEFAULT_BYTE_DEPTH);
+ this.histogram = newHistogram();
+ }
+
+ private HistogramStatisticValue newHistogram() {
+ return new HistogramStatisticValue(NAME, Bytes.toBytes("equal_width_histogram_"
+ + guidepostDepth + "bytes"), guidepostDepth);
+ }
+
+ @Override
+ public List getCurrentStats() {
+ return Collections.singletonList((StatisticValue) histogram);
+ }
+
+ @Override
+ public void clear() {
+ this.histogram = newHistogram();
+ this.byteCount = 0;
+ }
+
+ @Override
+ public void updateStatistic(KeyValue kv) {
+ byteCount += kv.getLength();
+ // if we are at the next guide-post, add it to the histogram
+ if (byteCount >= guidepostDepth) {
+ // update the histogram
+ this.histogram.addColumn(ByteString.copyFrom(kv.getBuffer(), kv.getOffset(), kv.getLength()));
+
+ //reset the count for the next key
+ byteCount = 0;
+ }
+ }
+}
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/MinMaxKey.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/MinMaxKey.java
new file mode 100644
index 00000000..76cebe7f
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/MinMaxKey.java
@@ -0,0 +1,121 @@
+package com.salesforce.hbase.stats.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.salesforce.hbase.stats.BaseStatistic;
+import com.salesforce.hbase.stats.ColumnFamilyStatistic;
+import com.salesforce.hbase.stats.StatisticReader;
+import com.salesforce.hbase.stats.StatisticValue;
+import com.salesforce.hbase.stats.StatisticsTable;
+import com.salesforce.hbase.stats.serialization.PointStatisticReader;
+
+/**
+ * Coprocessor that just keeps track of the min/max key on a per-column family basis.
+ *
+ * This can then also be used to find the per-table min/max key for the table.
+ */
+public class MinMaxKey extends BaseStatistic {
+
+ public static void addToTable(HTableDescriptor desc) throws IOException {
+ desc.addCoprocessor(MinMaxKey.class.getName());
+ }
+
+ private static final byte[] MAX_SUFFIX = Bytes.toBytes("max_region_key");
+ private static final byte[] MIN_SUFFIX = Bytes.toBytes("min_region_key");
+ private final static byte[] NAME = Bytes.toBytes("min_max_stat");
+
+ private byte[] min;
+ private byte[] max;
+
+ @Override
+ public List getCurrentStats() {
+ List data = new ArrayList(2);
+ data.add(new StatisticValue(NAME, MIN_SUFFIX, min));
+ data.add(new StatisticValue(NAME, MAX_SUFFIX, max));
+ return data;
+ }
+
+ @Override
+ public void clear() {
+ this.max = null;
+ this.min = null;
+ }
+
+ @Override
+ public void updateStatistic(KeyValue kv) {
+ // first time through, so both are null
+ if (min == null) {
+ min = TrackerUtil.copyRow(kv);
+ max = TrackerUtil.copyRow(kv);
+ return;
+ }
+ if (Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), min, 0, min.length) < 0) {
+ min = TrackerUtil.copyRow(kv);
+ }
+ if (Bytes.compareTo(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), max, 0, max.length) > 0) {
+ max = TrackerUtil.copyRow(kv);
+ }
+ }
+
+ /**
+ * Find a reader for the the min/max key based on the type of serialization of the key.
+ * @param stats table from which you want to read the stats
+ * @return a {@link StatisticReader} to get the raw Min/Max stats. Use {@link #interpret(List)} to
+ * get a list of the most recent min/max values on a per-column, per-region basis.
+ */
+ public static StatisticReader getStatistcReader(StatisticsTable stats) {
+ return new StatisticReader(stats,
+ new PointStatisticReader(), NAME);
+ }
+
+ /**
+ * Combine the results from {@link #getStatistcReader(StatisticsTable)} into {@link MinMaxStat}
+ * results for easy digestion
+ * @param stat statistics from {@link #getStatistcReader(StatisticsTable)}.
+ * @return the min/max per column family per region
+ */
+ public static List interpret(List> stat) {
+ List stats = new ArrayList();
+ for (int i = 0; i < stat.size(); i++) {
+ // every two column family statistic is actually one statistic, so we need to combine them
+ ColumnFamilyStatistic minmax = stat.get(i++);
+ StatisticValue max = minmax.getValues().get(0);
+ StatisticValue min = minmax.getValues().get(1);
+ // we only return the most recent min/max combination for the column family/region
+ stats.add(new MinMaxStat(minmax.getRegion(), minmax.getColumnfamily(), max, min));
+ }
+ return stats;
+
+ }
+
+ /**
+ * Abstraction of a statistic that combines two {@link StatisticValue}s to generate a single
+ * min/max stat for a single column family of a region.
+ */
+ public static class MinMaxStat {
+
+ public final byte[] region;
+ public final byte[] family;
+ public final byte[] max;
+ public final byte[] min;
+
+ /**
+ * @param region region where the stat was obtained
+ * @param columnfamily column family for which the stat was calculated
+ * @param min the min key as a {@link StatisticValue}
+ * @param max the max key as a {@link StatisticValue}
+ */
+ public MinMaxStat(byte[] region, byte[] columnfamily, StatisticValue max, StatisticValue min) {
+ this.region = region;
+ this.family = columnfamily;
+ this.max = max.getValue();
+ this.min = min.getValue();
+ }
+ }
+}
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/TrackerUtil.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/TrackerUtil.java
new file mode 100644
index 00000000..b4e8ba8e
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/impl/TrackerUtil.java
@@ -0,0 +1,23 @@
+package com.salesforce.hbase.stats.impl;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hbase.KeyValue;
+
+import com.salesforce.hbase.stats.StatisticTracker;
+
+/**
+ * Utilities for {@link StatisticTracker}s.
+ */
+public class TrackerUtil {
+
+ private TrackerUtil() {
+ // private ctor for utils
+ }
+
+ public static byte[] copyRow(KeyValue kv) {
+ return Arrays.copyOfRange(kv.getBuffer(), kv.getRowOffset(),
+ kv.getRowOffset() + kv.getRowLength());
+ }
+
+}
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/HistogramStatisticReader.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/HistogramStatisticReader.java
new file mode 100644
index 00000000..17c82130
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/HistogramStatisticReader.java
@@ -0,0 +1,37 @@
+package com.salesforce.hbase.stats.serialization;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.client.Result;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.salesforce.hbase.stats.ColumnFamilyStatistic;
+import com.salesforce.hbase.stats.HistogramStatisticValue;
+import com.salesforce.hbase.stats.StatisticValue;
+
+/**
+ * Get {@link HistogramStatisticValue}s from the underlying bytes. Expects serialization with the
+ * {@link IndividualStatisticWriter}.
+ */
+public class HistogramStatisticReader implements IndividualStatisticReader {
+ private final PointStatisticReader delegate;
+
+ public HistogramStatisticReader() {
+ delegate = new PointStatisticReader();
+ }
+
+ public ColumnFamilyStatistic deserialize(Result r) throws IOException {
+ ColumnFamilyStatistic raw = delegate.deserialize(r);
+ // then re-wrap the results so we can read histograms
+ ColumnFamilyStatistic ret =
+ new ColumnFamilyStatistic(raw.getRegion(), raw.getColumnfamily());
+ for (StatisticValue value : raw.getValues()) {
+ try {
+ ret.add(new HistogramStatisticValue(value));
+ } catch (InvalidProtocolBufferException e) {
+ throw new IOException(e);
+ }
+ }
+ return ret;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticReader.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticReader.java
new file mode 100644
index 00000000..9e56d3bd
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticReader.java
@@ -0,0 +1,17 @@
+package com.salesforce.hbase.stats.serialization;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.client.Result;
+
+import com.salesforce.hbase.stats.ColumnFamilyStatistic;
+import com.salesforce.hbase.stats.StatisticValue;
+
+/**
+ * Deserializer for a {@link StatisticValue} from the raw {@link Result}. This is the complement
+ * to the {@link IndividualStatisticWriter}.
+ * @param type of statistic value to deserialize
+ */
+public interface IndividualStatisticReader {
+ public ColumnFamilyStatistic deserialize(Result r) throws IOException;
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticWriter.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticWriter.java
new file mode 100644
index 00000000..ca02d6e7
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/IndividualStatisticWriter.java
@@ -0,0 +1,31 @@
+package com.salesforce.hbase.stats.serialization;
+
+import org.apache.hadoop.hbase.client.Put;
+
+import com.salesforce.hbase.stats.StatisticValue;
+import com.salesforce.hbase.stats.util.Constants;
+
+/**
+ * Simple serializer that always puts generates the same formatted key for an individual
+ * statistic. This writer is used to write a single {@link StatisticValue} to the statistics
+ * table. They should be read back via an {@link IndividualStatisticReader}.
+ */
+public class IndividualStatisticWriter {
+ private final byte[] source;
+ private byte[] region;
+ private byte[] column;
+
+ public IndividualStatisticWriter(byte[] sourcetable, byte[] region, byte[] column) {
+ this.source = sourcetable;
+ this.region = region;
+ this.column = column;
+ }
+
+ public Put serialize(StatisticValue value) {
+ byte[] prefix = StatisticSerDe.getRowKey(source, region, column, value.getType());
+ Put put = new Put(prefix);
+ put.add(Constants.STATS_DATA_COLUMN_FAMILY, value.getInfo(), value.getValue());
+ return put;
+ }
+
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/PointStatisticReader.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/PointStatisticReader.java
new file mode 100644
index 00000000..0b7400bc
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/PointStatisticReader.java
@@ -0,0 +1,52 @@
+package com.salesforce.hbase.stats.serialization;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.salesforce.hbase.stats.ColumnFamilyStatistic;
+import com.salesforce.hbase.stats.StatisticValue;
+
+/**
+ * Read simple {@link StatisticValue}s from raw {@link Result}s. Expects serialization with the
+ * {@link IndividualStatisticWriter}.
+ */
+public class PointStatisticReader implements IndividualStatisticReader {
+
+ public ColumnFamilyStatistic deserialize(Result r) {
+ // break out the key based on its parts
+ // 1. start with getting the lengths of the key parts
+ byte[] row = r.getRow();
+ int sizes[] = new int[StatisticSerDe.NUM_KEY_PARTS];
+ int start = row.length - Bytes.SIZEOF_INT;
+ for (int i = StatisticSerDe.NUM_KEY_PARTS - 1; i >= 0; i--) {
+ sizes[i] = Bytes.toInt(row, start, Bytes.SIZEOF_INT);
+ start -= Bytes.SIZEOF_INT;
+ }
+
+ // 1b. break out each part of the key so we can rebuild the statistic
+ start = sizes[0]; // this is the end of the table name, so we can just skip it immediately
+ int end = start + sizes[1];
+ // for right now, we just copy the array over - its a bit inefficient, but we can always go to
+ // ByteBuffers later.
+ byte[] statname = Arrays.copyOfRange(row, start,end);
+ start += sizes[1];
+ end= start+ sizes[2];
+ byte[] region = Arrays.copyOfRange(row, start, end);
+ start += sizes[2];
+ end= start+ sizes[3];
+ byte[] family = Arrays.copyOfRange(row, start, end);
+ ColumnFamilyStatistic stat =
+ new ColumnFamilyStatistic(region, family);
+ for (KeyValue kv : r.list()) {
+ byte[] info = Arrays.copyOfRange(kv.getBuffer(), kv.getQualifierOffset(),
+ kv.getQualifierOffset() + kv.getQualifierLength());
+ byte[] value = Arrays.copyOfRange(kv.getBuffer(), kv.getValueOffset(), kv.getValueOffset()
+ + kv.getValueLength());
+ stat.add(new StatisticValue(statname, info, value));
+ }
+ return stat;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/StatisticSerDe.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/StatisticSerDe.java
new file mode 100644
index 00000000..15ca6e21
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/StatisticSerDe.java
@@ -0,0 +1,70 @@
+package com.salesforce.hbase.stats.serialization;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+
+/**
+ * Simple utility class for managing multiple key parts of the statistic
+ */
+public class StatisticSerDe {
+
+ private StatisticSerDe() {
+ // private ctor for utility classes
+ }
+
+ /** Number of parts in our complex key */
+ protected static final int NUM_KEY_PARTS = 4;
+
+ /**
+ * Get the prefix based on the region, column and name of the statistic
+ * @param table name of the source table
+ * @param statName name of the statistic
+ * @return the row key that should be used for this statistic
+ */
+ public static byte[] getRowPrefix(byte[] table, byte[] statName) {
+ byte[] prefix = table;
+ prefix = Bytes.add(prefix, statName);
+ return prefix;
+ }
+
+ /**
+ * Get the prefix based on the region, column and name of the statistic
+ * @param table name of the source table
+ * @param regionname name of the region where the statistic was gathered
+ * @param statName name of the statistic
+ * @return the row key that should be used for this statistic
+ */
+ public static byte[] getRowPrefix(byte[] table, byte[] regionname, byte[] statName) {
+ byte[] prefix = table;
+ prefix = Bytes.add(prefix, statName);
+ prefix = Bytes.add(prefix, regionname);
+ return prefix;
+ }
+
+ /**
+ * Get the prefix based on the region, column and name of the statistic
+ * @param table name of the source table
+ * @param region name of the region where the statistic was gathered
+ * @param column column for which the statistic was gathered
+ * @param statName name of the statistic
+ * @return the row key that should be used for this statistic
+ */
+ public static byte[] getRowKey(byte[] table, byte[] region, byte[] column, byte[] statName) {
+ // always starts with the source table
+ byte[] prefix = new byte[0];
+ // then append each part of the key and
+ byte[][] parts = new byte[][] { table, statName, region, column };
+ int[] sizes = new int[NUM_KEY_PARTS];
+ // XXX - this where we would use orderly to get the sorting consistent
+ for (int i = 0; i < NUM_KEY_PARTS; i++) {
+ prefix = Bytes.add(prefix, parts[i]);
+ sizes[i] = parts[i].length;
+ }
+ // then we add on the sizes to the end of the key
+ for (int size : sizes) {
+ prefix = Bytes.add(prefix, Bytes.toBytes(size));
+ }
+
+ return prefix;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/package-info.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/package-info.java
new file mode 100644
index 00000000..f74245b1
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/serialization/package-info.java
@@ -0,0 +1,51 @@
+package com.salesforce.hbase.stats.serialization;
+
+
+/**
+Overview of Statistics Serialization
+
+
+- Overview
+- Reading the StatisticsTable
+- Overview These are the various pieces necessary to serialize a
+statistic to a list of {@link KeyValue}s. Right now, this is implemented with a single
+serialization order in the row key:
+
+
+table | statistic name | region | column family
+
+
+where table, region and column family are in reference to the source table.
+
+This lets you easily aggregate a a single statistics over a given region or quickly access a
+single statistic for a given column in a given region.
+
+For cases you you want to know a statistic for a single family, but across all regions, you would
+need to do the same scan as in the above case, but filter out other columns, which can be
+inefficient, but isn't a killer because we won't have that many stores (perhaps on the
+order of several thousand across all regions).
+
+We could extend this serialization to be more flexible (different key-part ordering for different
+statistics based on desired access patterns), but this is orders of magnitude simpler.
+
Some statistics can be read directly
+from the statistics table since they are just simple point values. For instance, the
+{@link com.salesforce.hbase.stats.impl.EqualDepthHistogramStatisticTracker} can be read using a
+simple
+{@link org.apache.hadoop.hbase.statistics.serialization.IndividualStatisticReader.HistogramStatisticReader}
+. like this:
+
+
+
+Other statistics have a slightly more complicated internal structure - i.e the use multiple
+column qualifiers - and should provide a special reader. For instance,
+{@link com.salesforce.hbase.stats.impl.MinMaxKey} provides a custom reader than can be used like:
+
+
+ StatisticReader<StatisticValue> reader = MinMaxKey.getStatistcReader(primary);
+ StatisticsTable statTable = new StatisticsTable(UTIL.getConfiguration(), primary);
+ List<MinMaxStat> results = MinMaxKey.interpret(statTable.read(reader));
+
+ */
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/Constants.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/Constants.java
new file mode 100644
index 00000000..46765219
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/Constants.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.salesforce.hbase.stats.util;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * General constants for hbase-stat
+ */
+public class Constants {
+
+
+ private Constants() {
+ // private ctor for utility class
+ }
+
+
+ /** Name of the column family to store all the statistics data */
+ public static final byte[] STATS_DATA_COLUMN_FAMILY = Bytes.toBytes("STAT");
+
+ /** Name of the statistics table */
+ public static final String STATS_TABLE_NAME = "_stats_";
+
+ public static final byte[] STATS_TABLE_NAME_BYTES = Bytes.toBytes(STATS_TABLE_NAME);
+}
diff --git a/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/SetupTableUtil.java b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/SetupTableUtil.java
new file mode 100644
index 00000000..62aaf958
--- /dev/null
+++ b/contrib/hbase-stat/src/main/java/com/salesforce/hbase/stats/util/SetupTableUtil.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.salesforce.hbase.stats.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.common.base.Preconditions;
+import com.salesforce.hbase.stats.cleanup.CleanupStatistics;
+
+import static com.salesforce.hbase.stats.util.Constants.STATS_TABLE_NAME;
+
+/**
+ * Utility helper class to ensure that your primary and statistics table is setup correctly
+ */
+public class SetupTableUtil {
+
+ private static final String TABLE_STATS_ENABLED_DESC_KEY = "com.salesforce.hbase.stats.cleanup";
+
+ private SetupTableUtil() {
+ // private ctor for util classes
+ }
+
+
+ /**
+ * Ensure all the necessary coprocessors are added to a cluster's configuration
+ * @param conf {@link Configuration} to update
+ */
+ public static void setupCluster(Configuration conf){
+ CleanupStatistics.setupClusterConfiguration(conf);
+ }
+
+ public static void setupTable(HBaseAdmin admin, HTableDescriptor primaryTable,
+ boolean ensureStatTable, boolean createStatTable)
+ throws IOException {
+ // add the right keys to the primary table
+ primaryTable.setValue(TABLE_STATS_ENABLED_DESC_KEY, "true");
+ CleanupStatistics.setupTable(primaryTable);
+
+ if (!ensureStatTable) {
+ return;
+ }
+
+ // ensure that the stats table is setup correctly
+ boolean exists = admin.tableExists(STATS_TABLE_NAME);
+ HTableDescriptor statDesc = null;
+ if (exists) {
+ if (createStatTable) {
+ throw new IllegalStateException("Statistics table '" + STATS_TABLE_NAME
+ + " was requested to be created, but already exists!");
+ } else {
+ // get the descriptor so we can verify it has the right properties
+ statDesc = admin.getTableDescriptor(Bytes.toBytes(STATS_TABLE_NAME));
+ }
+ } else {
+ if (createStatTable) {
+ statDesc = createStatsTable(admin);
+ }
+ }
+ verifyStatsTable(statDesc);
+ }
+
+ public static HTableDescriptor createStatsTable(HBaseAdmin admin) throws IOException {
+ HTableDescriptor statDesc = new HTableDescriptor(STATS_TABLE_NAME);
+ HColumnDescriptor col = new HColumnDescriptor(Constants.STATS_DATA_COLUMN_FAMILY);
+ col.setMaxVersions(1);
+ statDesc.addFamily(col);
+ admin.createTable(statDesc);
+ return statDesc;
+ }
+
+ /**
+ * @param desc {@link HTableDescriptor} of the statistics table to verify
+ */
+ public static void verifyStatsTable(HTableDescriptor desc) {
+ if (!desc.hasFamily(Constants.STATS_DATA_COLUMN_FAMILY)) {
+ throw new IllegalStateException("Statistics table '" + desc
+ + "' doesn't have expected column family: " + Bytes.toString(Constants.STATS_DATA_COLUMN_FAMILY));
+ }
+ // only keep around a single version
+ int versions = desc.getFamily(Constants.STATS_DATA_COLUMN_FAMILY).getMaxVersions();
+ Preconditions.checkState(versions == 1,
+ "Stats rows should only have a single version, but set to: " + versions);
+ }
+
+
+ /**
+ * @param desc {@link HTableDescriptor} to check
+ * @return true if statistics have been turned on for the table
+ */
+ public static boolean getStatsEnabled(HTableDescriptor desc) {
+ String hasStats = desc.getValue(TABLE_STATS_ENABLED_DESC_KEY);
+ if (hasStats != null && hasStats.equals("true")) {
+ return true;
+ }
+ return false;
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/protobuf/README.txt b/contrib/hbase-stat/src/main/protobuf/README.txt
new file mode 100644
index 00000000..de45ecc8
--- /dev/null
+++ b/contrib/hbase-stat/src/main/protobuf/README.txt
@@ -0,0 +1,26 @@
+These are the protobuf definition files used by hbase-stat. The produced java
+classes are generated into src/main/java/com/salesforce/hbase/protobuf/generated
+and then checked in. The reasoning is that they change infrequently.
+
+To regnerate the classes after making definition file changes, ensure first that
+the protobuf protoc tool is in your $PATH (You may need to download it and build
+it first; its part of the protobuf package obtainable from here:
+http://code.google.com/p/protobuf/downloads/list). Then run the following from
+contrib/hbase-stat (You should be able to just copy and paste the below into a
+terminal and hit return -- the protoc compiler runs fast):
+
+ UNIX_PROTO_DIR=src/main/protobuf
+ JAVA_DIR=src/main/java/
+ mkdir -p $JAVA_DIR 2> /dev/null
+ if which cygpath 2> /dev/null; then
+ PROTO_DIR=`cygpath --windows $UNIX_PROTO_DIR`
+ JAVA_DIR=`cygpath --windows $JAVA_DIR`
+ else
+ PROTO_DIR=$UNIX_PROTO_DIR
+ fi
+ for PROTO_FILE in $UNIX_PROTO_DIR/*.proto
+ do
+ protoc -I$PROTO_DIR --java_out=$JAVA_DIR $PROTO_FILE
+ done
+
+After you've done the above, check it in.
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/main/protobuf/stats.proto b/contrib/hbase-stat/src/main/protobuf/stats.proto
new file mode 100644
index 00000000..7677fb63
--- /dev/null
+++ b/contrib/hbase-stat/src/main/protobuf/stats.proto
@@ -0,0 +1,14 @@
+// Statistics protobufs - used for compatability with HBase 0.96
+
+option java_package = "com.salesforce.hbase.protobuf.generated";
+option java_outer_classname = "StatisticProtos";
+option java_generate_equals_and_hash = true;
+option optimize_for = SPEED;
+
+message Histogram{
+ //all histograms have either a fixed depth or width
+ required int64 depthOrWidth = 1;
+ // fixed depth will have different values as row keys
+ //fixed width will just have counts
+ repeated bytes value = 2;
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramOnTable.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramOnTable.java
new file mode 100644
index 00000000..7132f151
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramOnTable.java
@@ -0,0 +1,91 @@
+package com.salesforce.hbase.stats;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.protobuf.ByteString;
+import com.salesforce.hbase.protobuf.generated.StatisticProtos.Histogram;
+import com.salesforce.hbase.stats.impl.EqualByteDepthHistogramStatisticTracker;
+import com.salesforce.hbase.stats.util.Constants;
+import com.salesforce.hbase.stats.util.StatsTestUtil;
+
+/**
+ * A full, real table test of the the {@link EqualByteDepthHistogramStatisticTracker}. This is the
+ * complement to {@link TestEqualWidthHistogramStat}.
+ */
+public class TestEqualWidthHistogramOnTable extends TestTrackerImpl {
+
+ // number of keys in each column
+ private final int columnWidth = 676;
+ // depth is the width (count of keys) times the number of bytes of each key, which in this case is
+ // fixed to 32 bytes, so we know the depth in all cases
+ private final int columnDepth = columnWidth * 32;
+
+ @Override
+ protected void preparePrimaryTableDescriptor(HTableDescriptor primary) throws Exception {
+ EqualByteDepthHistogramStatisticTracker.addToTable(primary, columnDepth);
+ }
+
+ @Override
+ protected void verifyStatistics(HTableDescriptor primary) throws Exception {
+ // scan the stats table for a raw count
+ HTable statTable = new HTable(UTIL.getConfiguration(), Constants.STATS_TABLE_NAME);
+ int count = StatsTestUtil.getKeyValueCount(statTable);
+
+ // we should have just 1 stat - our histogram
+ assertEquals("Got an unexpected amount of stats!", 1, count);
+ StatisticsTable table = new StatisticsTable(UTIL.getConfiguration(), primary);
+
+ // now get a custom reader to interpret the results
+ StatisticReader reader = EqualByteDepthHistogramStatisticTracker
+ .getStatistcReader(table);
+ List> stats = reader.read();
+
+ // should only have a single column family
+ assertEquals("More than one column family has statistics!", 1, stats.size());
+ List values = stats.get(0).getValues();
+ assertEquals("Wrong number of histograms for the column family/region", 1, values.size());
+ Histogram histogram = values.get(0).getHistogram();
+ assertEquals("Got an incorrect number of guideposts! Got: " + toStringFixedDepth(histogram),
+ 26, histogram.getValueList().size());
+
+ // make sure we got the correct guideposts
+ byte counter = 'a';
+ for (ByteString column : histogram.getValueList()) {
+ byte[] guidepost = new byte[] { counter, 'z', 'z' };
+ byte[] data = column.toByteArray();
+ // row key is actually stored flipped, so we flip it here
+ byte[] actual = new byte[] { data[data.length - 3], data[data.length - 2],
+ data[data.length - 1] };
+ assertArrayEquals(
+ "Guidepost should be:" + Bytes.toString(guidepost) + " , but was: "
+ + Bytes.toString(actual), guidepost, actual);
+ counter++;
+ }
+
+ // cleanup
+ statTable.close();
+ table.close();
+ }
+
+ /**
+ * @param histogram to print
+ * @return a string representation of the fixed depth histogram which stores keyvalues
+ */
+ private String toStringFixedDepth(Histogram histogram) {
+ StringBuilder sb = new StringBuilder("histogram: " + histogram.getDepthOrWidth() + " depth, ");
+ for (ByteString bs : histogram.getValueList()) {
+ sb.append(new KeyValue(bs.toByteArray()).toString());
+ sb.append(",");
+ }
+ return sb.toString();
+ }
+
+}
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramStat.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramStat.java
new file mode 100644
index 00000000..33b40f93
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestEqualWidthHistogramStat.java
@@ -0,0 +1,105 @@
+package com.salesforce.hbase.stats;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.salesforce.hbase.protobuf.generated.StatisticProtos.Histogram;
+import com.salesforce.hbase.stats.impl.EqualByteDepthHistogramStatisticTracker;
+
+/**
+ * Simple unit test of equal width histograms. Doesn't test against a full cluster, but rather is
+ * just simple interface testing.
+ */
+public class TestEqualWidthHistogramStat {
+
+ // number of keys in each column
+ private final int columnWidth = 676;
+ // depth is the width (count of keys) times the number of bytes of each key, which in this case is
+ // fixed to 3 bytes, so we know the depth in all cases
+ private final int columnDepth = columnWidth * 3;
+
+ @Test
+ public void testSimpleStat() throws IOException {
+ EqualByteDepthHistogramStatisticTracker tracker = new EqualByteDepthHistogramStatisticTracker();
+ // unfortunately, need to mock a lot here
+ RegionCoprocessorEnvironment env = Mockito.mock(RegionCoprocessorEnvironment.class);
+ HRegion mockRegion = Mockito.mock(HRegion.class);
+ String tableName = "testSimpleStatPrimary";
+ HTableDescriptor primary = new HTableDescriptor(tableName);
+ Mockito.when(env.getRegion()).thenReturn(mockRegion);
+ Mockito.when(mockRegion.getTableDesc()).thenReturn(primary);
+ HTableInterface mockTable = Mockito.mock(HTableInterface.class);
+ Mockito.when(env.getTable((byte[]) Mockito.any())).thenReturn(mockTable);
+
+ // setup the actual configuration that we care about
+ Configuration conf = new Configuration(false);
+ // setup our byte width == to [letter]zz
+ conf.setLong(EqualByteDepthHistogramStatisticTracker.BYTE_DEPTH_CONF_KEY, columnDepth);
+ Mockito.when(env.getConfiguration()).thenReturn(conf);
+
+ // setup the tracker
+ tracker.start(env);
+
+ // put some data in the tracker and check the histograms that come out
+ loadAndVerifyTracker(tracker);
+
+ // should be able to clear it and get the exact same results
+ tracker.clear();
+ loadAndVerifyTracker(tracker);
+ }
+
+ /**
+ * @param tracker tracker to load with data and then validate
+ * @throws InvalidProtocolBufferException if protobufs are broken - should not be thrown since we
+ * are not serializing information
+ */
+ private void loadAndVerifyTracker(EqualByteDepthHistogramStatisticTracker tracker)
+ throws InvalidProtocolBufferException {
+ // now feed the tracker a bunch of bytes
+ KeyValue kv = new KeyValue();
+ byte[] k = new byte[3];
+ for (byte b1 = 'a'; b1 <= 'z'; b1++) {
+ for (byte b2 = 'a'; b2 <= 'z'; b2++) {
+ for (byte b3 = 'a'; b3 <= 'z'; b3++) {
+ k[0] = b1;
+ k[1] = b2;
+ k[2] = b3;
+ kv = new KeyValue(k, 0, 3);
+ tracker.updateStatistic(kv);
+ }
+ }
+ }
+
+ List stats = tracker.getCurrentStats();
+ assertEquals("Got more than one histogram!", 1, stats.size());
+ HistogramStatisticValue stat = (HistogramStatisticValue) stats.get(0);
+ Histogram histogram = stat.getHistogram();
+ assertEquals("Got an incorrect number of guideposts!", 26, histogram.getValueList().size());
+
+ // make sure we got the correct guideposts
+ byte counter = 'a';
+ for (ByteString column : histogram.getValueList()) {
+ byte[] guidepost = new byte[] { counter, 'z', 'z' };
+ byte[] actual = column.toByteArray();
+ assertArrayEquals(
+ "Guidepost should be:" + Bytes.toString(guidepost) + " , but was: "
+ + Bytes.toString(actual), guidepost, actual);
+ counter++;
+ }
+ }
+}
\ No newline at end of file
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestMinMaxKeyStats.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestMinMaxKeyStats.java
new file mode 100644
index 00000000..e573da22
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestMinMaxKeyStats.java
@@ -0,0 +1,53 @@
+package com.salesforce.hbase.stats;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.HTable;
+
+import com.salesforce.hbase.stats.impl.MinMaxKey;
+import com.salesforce.hbase.stats.impl.MinMaxKey.MinMaxStat;
+import com.salesforce.hbase.stats.util.Constants;
+import com.salesforce.hbase.stats.util.StatsTestUtil;
+
+/**
+ * Test the min/max key on a real table
+ */
+public class TestMinMaxKeyStats extends TestTrackerImpl {
+
+ @Override
+ protected void preparePrimaryTableDescriptor(HTableDescriptor primary) throws IOException {
+ // just track the Min/Max Key
+ MinMaxKey.addToTable(primary);
+ }
+
+ @Override
+ protected void verifyStatistics(HTableDescriptor primary) throws IOException {
+ // scan the stats table for a raw count
+ HTable stats = new HTable(UTIL.getConfiguration(), Constants.STATS_TABLE_NAME);
+ int count = StatsTestUtil.getKeyValueCount(stats);
+
+ // we should have 2 stats - a min and a max for the one column of the one region of the table
+ assertEquals("Got an unexpected amount of stats!", 2, count);
+
+ // then do a read with the actual statistics
+ // we know we are going to collect MinMaxKey so reading ensures we are collecting correctly
+ StatisticsTable statTable = new StatisticsTable(UTIL.getConfiguration(), primary);
+ StatisticReader reader = MinMaxKey.getStatistcReader(statTable);
+ List results = MinMaxKey.interpret(reader.read());
+ assertEquals("Unexpected number of min/max results!", 1, results.size());
+ assertArrayEquals("Unexpected number of min result!", new byte[] { 'a', 'a', 'a' },
+ results.get(0).min);
+ assertArrayEquals("Unexpected number of min result!", new byte[] { 'z', 'z', 'z' },
+ results.get(0).max);
+
+ // cleanup after ourselves
+ stats.close();
+ statTable.close();
+ }
+
+}
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestTrackerImpl.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestTrackerImpl.java
new file mode 100644
index 00000000..d292e2ed
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/TestTrackerImpl.java
@@ -0,0 +1,145 @@
+package com.salesforce.hbase.stats;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.salesforce.hbase.stats.util.Constants;
+import com.salesforce.hbase.stats.util.SetupTableUtil;
+import com.salesforce.hbase.stats.util.StatsTestUtil;
+
+/**
+ * Helper test for testing an implementation of a statistic.
+ *
+ * Uses the {@link HBaseTestingUtility#loadTable(HTable, byte[])} to load the {@link #FAM} column
+ * family with data. The table is then flushed and compacted, ensuring statistics are gathered
+ * through the normal mechanisms.
+ *
+ * Use {@link #preparePrimaryTableDescriptor(HTableDescriptor)} to add your custom
+ * {@link StatisticTracker} to the table.
+ *
+ * Use {@link #verifyStatistics(HTableDescriptor)} to verify that all the correct statistics have
+ * been collected on the table, after it has been loaded, flushed and compacted.
+ */
+@SuppressWarnings("javadoc")
+public abstract class TestTrackerImpl {
+ protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ public static final byte[] FAM = Bytes.toBytes("FAMILY");
+ public static final Log LOG = LogFactory.getLog(TestTrackerImpl.class);
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ SetupTableUtil.setupCluster(UTIL.getConfiguration());
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void teardownCluster() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void setupTables() throws Exception {
+ HBaseAdmin admin = UTIL.getHBaseAdmin();
+ // setup the stats table
+ SetupTableUtil.createStatsTable(admin);
+ // make sure the stats table got created
+ assertTrue("Stats table didn't get created!", admin.tableExists(Constants.STATS_TABLE_NAME));
+ }
+
+ @After
+ public void cleanupTables() throws Exception {
+ HBaseAdmin admin = UTIL.getHBaseAdmin();
+ admin.disableTable(Constants.STATS_TABLE_NAME);
+ admin.deleteTable(Constants.STATS_TABLE_NAME);
+ admin.close();
+ }
+
+ /**
+ * Goes through a full end-to-end test of gathering statistics on a table.
+ *
+ * First, we create and verify the statistics table. Then we write some data to the primary table.
+ * Finally, we check that the given statistic is enabled and working correctly by reading the
+ * stats table.
+ * @throws Exception on failure
+ */
+ @Test
+ public void testSimplePrimaryAndStatsTables() throws Exception {
+ HBaseAdmin admin = UTIL.getHBaseAdmin();
+
+ // setup our primary table
+ HTableDescriptor primary = new HTableDescriptor("testSimplePrimaryAndStatsTables");
+ primary.addFamily(new HColumnDescriptor(FAM));
+
+ // make sure stats are enabled on the table
+ SetupTableUtil.setupTable(UTIL.getHBaseAdmin(), primary, false, false);
+
+ // do any further setup on the table
+ preparePrimaryTableDescriptor(primary);
+
+ // create the primary table
+ admin.createTable(primary);
+
+ // load some data into our primary table
+ HTable primaryTable = new HTable(UTIL.getConfiguration(), primary.getName());
+ UTIL.loadTable(primaryTable, FAM);
+
+ // now flush and compact our table
+ HRegionServer server = UTIL.getRSForFirstRegionInTable(primary.getName());
+ List regions = server.getOnlineRegions(primary.getName());
+ assertTrue("Didn't find any regions for primary table!", regions.size() > 0);
+ // flush and compact all the regions of the primary table
+ for (HRegion region : regions) {
+ region.flushcache();
+ region.compactStores(true);
+ }
+
+ // make sure all the stats that we expect got written
+ verifyStatistics(primary);
+
+ // then delete the table and make sure we don't have any more stats in our table
+ admin.disableTable(primary.getName());
+ admin.deleteTable(primary.getName());
+
+ // make sure that we cleanup the stats on table delete
+ HTable stats = new HTable(UTIL.getConfiguration(), Constants.STATS_TABLE_NAME);
+ assertEquals("Stats table still has values after primary table delete", 0,
+ StatsTestUtil.getKeyValueCount(stats));
+
+ // and cleanup after ourselves
+ stats.close();
+ }
+
+ /**
+ * Prepare the primary table descriptor for the test. For instance, add the
+ * {@link StatisticTracker} to the table. This is called before the primary table is created
+ * @throws Exception on failure
+ */
+ protected abstract void preparePrimaryTableDescriptor(HTableDescriptor primary) throws Exception;
+
+ /**
+ * Verify the statistics on the given primary table after the table has been loaded, flushed, and
+ * compacted.
+ * @param primary {@link HTableDescriptor} for the primary table for which we were collecting
+ * statistics
+ * @throws Exception on failure
+ */
+ protected abstract void verifyStatistics(HTableDescriptor primary) throws Exception;
+}
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/StatsTestUtil.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/StatsTestUtil.java
new file mode 100644
index 00000000..cb3c634c
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/StatsTestUtil.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.salesforce.hbase.stats.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+
+import com.salesforce.hbase.stats.TestTrackerImpl;
+
+/**
+ * Helper utility for testing
+ */
+public class StatsTestUtil {
+
+ /**
+ * @return a valid {@link HTableDescriptor} for the primary table on which we want to collect
+ * statistics
+ */
+ public static HTableDescriptor getValidPrimaryTableDescriptor() {
+ HTableDescriptor table = new HTableDescriptor("primary_table_for_test");
+ return table;
+ }
+
+ /**
+ * Count the total number of rows in the table
+ * @param table the table to count
+ * @return the number of {@link KeyValue}s in the table
+ * @throws IOException if the table has an error while reading
+ */
+ public static int getKeyValueCount(HTable table) throws IOException {
+ Scan scan = new Scan();
+ scan.setMaxVersions(Integer.MAX_VALUE - 1);
+
+ ResultScanner results = table.getScanner(scan);
+ int count = 0;
+ for (Result res : results) {
+ count += res.list().size();
+ TestTrackerImpl.LOG.info(count + ") " + res);
+ }
+ results.close();
+
+ return count;
+ }
+
+}
diff --git a/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/TestSetupTable.java b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/TestSetupTable.java
new file mode 100644
index 00000000..b9075dd5
--- /dev/null
+++ b/contrib/hbase-stat/src/test/java/com/salesforce/hbase/stats/util/TestSetupTable.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
+ * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
+ * for the specific language governing permissions and limitations under the License.
+ */
+package com.salesforce.hbase.stats.util;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Ensure that we verify the tables are setup correctly
+ */
+public class TestSetupTable {
+
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void teardownCluster() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testCreatesStatTable() throws Exception {
+ HTableDescriptor primary = StatsTestUtil.getValidPrimaryTableDescriptor();
+ HBaseAdmin admin = UTIL.getHBaseAdmin();
+ SetupTableUtil.setupTable(admin, primary, true, true);
+
+ assertTrue("Statistics table didn't get created!", admin.tableExists(Constants.STATS_TABLE_NAME_BYTES));
+ // make sure it it is a valid table
+ HTableDescriptor statDesc = admin.getTableDescriptor(Constants.STATS_TABLE_NAME_BYTES);
+ try {
+ SetupTableUtil.verifyStatsTable(statDesc);
+ } catch (Exception e) {
+ fail("Created statistics table isn't considered valid! Maybe missing a check in the creation?");
+ }
+
+ // cleanup after ourselves
+ admin.disableTable(Constants.STATS_TABLE_NAME_BYTES);
+ admin.deleteTable(Constants.STATS_TABLE_NAME_BYTES);
+ }
+}
\ No newline at end of file