From 83db224e1f5476a3ebebaaa1d92ece95a821de81 Mon Sep 17 00:00:00 2001 From: Haoqiong Bian Date: Tue, 23 Jul 2019 02:35:47 +0800 Subject: [PATCH] add comments --- .../ruc/iir/pixels/cache/PixelsCacheReader.java | 4 ++++ .../cn/edu/ruc/iir/pixels/cache/PixelsRadix.java | 14 ++++++++------ .../iir/pixels/hive/mapred/PixelsInputFormat.java | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsCacheReader.java b/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsCacheReader.java index 3eac38efe..d56665316 100644 --- a/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsCacheReader.java +++ b/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsCacheReader.java @@ -137,6 +137,7 @@ private PixelsCacheIdx search(ByteBuffer keyBuffer) int bytesMatchedInNodeFound = 0; // get root + // TODO: does root node have an edge? int currentNodeHeader = indexFile.getInt(currentNodeOffset); dramAccessCounter++; int currentNodeChildrenNum = currentNodeHeader & 0x000001FF; @@ -179,10 +180,13 @@ private PixelsCacheIdx search(ByteBuffer keyBuffer) dramAccessCounter++; currentNodeChildrenNum = currentNodeHeader & 0x000001FF; currentNodeEdgeSize = (currentNodeHeader & 0x7FFFFE00) >>> 9; + // TODO: does max length of edge = 12? can we move currentNodeEdge allocation out before this loop? byte[] currentNodeEdge = new byte[currentNodeEdgeSize]; + // TODO: can we get header, edge and children of a node in one memory access? indexFile.getBytes(currentNodeOffset + 4 + currentNodeChildrenNum * 8, currentNodeEdge, 0, currentNodeEdgeSize); dramAccessCounter++; + // TODO: numEdgeBytes seems redundant. for (int i = 0, numEdgeBytes = currentNodeEdgeSize; i < numEdgeBytes && bytesMatched < keyLen; i++) { if (currentNodeEdge[i] != keyBuffer.get(bytesMatched)) diff --git a/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsRadix.java b/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsRadix.java index 5610151d5..1e7ff0429 100644 --- a/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsRadix.java +++ b/pixels-cache/src/main/java/cn/edu/ruc/iir/pixels/cache/PixelsRadix.java @@ -47,30 +47,32 @@ public void putIfAbsent(long blockId, short rowGroupId, short columnId, PixelsCa * |root| -> |te| * -> |am| * -> |st| + *

* 1. If we put into the original tree, it's a EXACT_MATCH with the |st| node. * Then we add the new_value into the node or replace the node. - *

+ *

* 2. If we put into the original tree, it's a KEY_ENDS_AT_MID_EDGE with the |am| node. * Then we split the |am| node into two. * |root| -> |te| * -> |a| * -> |m| * -> |st| - *

+ *

* 3. If we put into the original tree, it's a MATCH_END_AT_MID_EDGE with the |am| node. * Then we split the |am| node into three. * |root| -> |te| * -> |a| - * -> |m| - * -> |k| + * -> |m| + * -> |k| * -> |st| - *

+ *

* 4. If we put into the original tree, it's a MATCH_END_AT_END_EDGE with the |am| node. * Then we add a new node containing the trailing bytes from the key, and append it to the |am| node. * |root| -> |te| * -> |am| - * -> |ster| + * -> |ster| * -> |st| + *

*/ private void putInternal(long blockId, short rowGroupId, short columnId, PixelsCacheIdx cacheIdx, boolean overwrite) { diff --git a/pixels-hive/src/main/java/cn/edu/ruc/iir/pixels/hive/mapred/PixelsInputFormat.java b/pixels-hive/src/main/java/cn/edu/ruc/iir/pixels/hive/mapred/PixelsInputFormat.java index eafe077e6..813f08438 100644 --- a/pixels-hive/src/main/java/cn/edu/ruc/iir/pixels/hive/mapred/PixelsInputFormat.java +++ b/pixels-hive/src/main/java/cn/edu/ruc/iir/pixels/hive/mapred/PixelsInputFormat.java @@ -354,7 +354,7 @@ private List getLayouts(SchemaTableName st) } /** - * + * TODO: reload input paths so that LOCACTION in a hive table can be empty or any path. * @param job */ protected void init(JobConf job) throws IOException