This speeds up grouping by bytes valued fields (keyword, text, ip, and wildcard) when the input is an ordinal block: ``` bytes_refs 22.213 ± 0.322 -> 19.848 ± 0.205 ns/op (*maybe* real, maybe noise. still good) ordinal didn't exist -> 2.988 ± 0.011 ns/op ``` I see this as 20ns -> 3ns, an 85% speed up. We never hard the ordinals branch before so I'm expecting the same performance there - about 20ns per op. This also speeds up grouping by a pair of byte valued fields: ``` two_bytes_refs 83.112 ± 42.348 -> 46.521 ± 0.386 ns/op two_ordinals 83.531 ± 23.473 -> 8.617 ± 0.105 ns/op ``` The speed up is much better when the fields are ordinals because hashing bytes is comparatively slow. I believe the ordinals case is quite common. I've run into it in quite a few profiles.
This commit is contained in:
parent
0e2f832516
commit
1212dee8b4
|
@ -30,10 +30,13 @@ import org.elasticsearch.compute.data.BlockFactory;
|
||||||
import org.elasticsearch.compute.data.BooleanBlock;
|
import org.elasticsearch.compute.data.BooleanBlock;
|
||||||
import org.elasticsearch.compute.data.BooleanVector;
|
import org.elasticsearch.compute.data.BooleanVector;
|
||||||
import org.elasticsearch.compute.data.BytesRefBlock;
|
import org.elasticsearch.compute.data.BytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
import org.elasticsearch.compute.data.DoubleBlock;
|
import org.elasticsearch.compute.data.DoubleBlock;
|
||||||
import org.elasticsearch.compute.data.ElementType;
|
import org.elasticsearch.compute.data.ElementType;
|
||||||
import org.elasticsearch.compute.data.IntBlock;
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
|
import org.elasticsearch.compute.data.IntVector;
|
||||||
import org.elasticsearch.compute.data.LongBlock;
|
import org.elasticsearch.compute.data.LongBlock;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
|
||||||
import org.elasticsearch.compute.data.Page;
|
import org.elasticsearch.compute.data.Page;
|
||||||
import org.elasticsearch.compute.operator.AggregationOperator;
|
import org.elasticsearch.compute.operator.AggregationOperator;
|
||||||
import org.elasticsearch.compute.operator.DriverContext;
|
import org.elasticsearch.compute.operator.DriverContext;
|
||||||
|
@ -78,7 +81,10 @@ public class AggregatorBenchmark {
|
||||||
private static final String DOUBLES = "doubles";
|
private static final String DOUBLES = "doubles";
|
||||||
private static final String BOOLEANS = "booleans";
|
private static final String BOOLEANS = "booleans";
|
||||||
private static final String BYTES_REFS = "bytes_refs";
|
private static final String BYTES_REFS = "bytes_refs";
|
||||||
|
private static final String ORDINALS = "ordinals";
|
||||||
private static final String TWO_LONGS = "two_" + LONGS;
|
private static final String TWO_LONGS = "two_" + LONGS;
|
||||||
|
private static final String TWO_BYTES_REFS = "two_" + BYTES_REFS;
|
||||||
|
private static final String TWO_ORDINALS = "two_" + ORDINALS;
|
||||||
private static final String LONGS_AND_BYTES_REFS = LONGS + "_and_" + BYTES_REFS;
|
private static final String LONGS_AND_BYTES_REFS = LONGS + "_and_" + BYTES_REFS;
|
||||||
private static final String TWO_LONGS_AND_BYTES_REFS = "two_" + LONGS + "_and_" + BYTES_REFS;
|
private static final String TWO_LONGS_AND_BYTES_REFS = "two_" + LONGS + "_and_" + BYTES_REFS;
|
||||||
|
|
||||||
|
@ -119,7 +125,21 @@ public class AggregatorBenchmark {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Param({ NONE, LONGS, INTS, DOUBLES, BOOLEANS, BYTES_REFS, TWO_LONGS, LONGS_AND_BYTES_REFS, TWO_LONGS_AND_BYTES_REFS })
|
@Param(
|
||||||
|
{
|
||||||
|
NONE,
|
||||||
|
LONGS,
|
||||||
|
INTS,
|
||||||
|
DOUBLES,
|
||||||
|
BOOLEANS,
|
||||||
|
BYTES_REFS,
|
||||||
|
ORDINALS,
|
||||||
|
TWO_LONGS,
|
||||||
|
TWO_BYTES_REFS,
|
||||||
|
TWO_ORDINALS,
|
||||||
|
LONGS_AND_BYTES_REFS,
|
||||||
|
TWO_LONGS_AND_BYTES_REFS }
|
||||||
|
)
|
||||||
public String grouping;
|
public String grouping;
|
||||||
|
|
||||||
@Param({ COUNT, COUNT_DISTINCT, MIN, MAX, SUM })
|
@Param({ COUNT, COUNT_DISTINCT, MIN, MAX, SUM })
|
||||||
|
@ -144,8 +164,12 @@ public class AggregatorBenchmark {
|
||||||
case INTS -> List.of(new BlockHash.GroupSpec(0, ElementType.INT));
|
case INTS -> List.of(new BlockHash.GroupSpec(0, ElementType.INT));
|
||||||
case DOUBLES -> List.of(new BlockHash.GroupSpec(0, ElementType.DOUBLE));
|
case DOUBLES -> List.of(new BlockHash.GroupSpec(0, ElementType.DOUBLE));
|
||||||
case BOOLEANS -> List.of(new BlockHash.GroupSpec(0, ElementType.BOOLEAN));
|
case BOOLEANS -> List.of(new BlockHash.GroupSpec(0, ElementType.BOOLEAN));
|
||||||
case BYTES_REFS -> List.of(new BlockHash.GroupSpec(0, ElementType.BYTES_REF));
|
case BYTES_REFS, ORDINALS -> List.of(new BlockHash.GroupSpec(0, ElementType.BYTES_REF));
|
||||||
case TWO_LONGS -> List.of(new BlockHash.GroupSpec(0, ElementType.LONG), new BlockHash.GroupSpec(1, ElementType.LONG));
|
case TWO_LONGS -> List.of(new BlockHash.GroupSpec(0, ElementType.LONG), new BlockHash.GroupSpec(1, ElementType.LONG));
|
||||||
|
case TWO_BYTES_REFS, TWO_ORDINALS -> List.of(
|
||||||
|
new BlockHash.GroupSpec(0, ElementType.BYTES_REF),
|
||||||
|
new BlockHash.GroupSpec(1, ElementType.BYTES_REF)
|
||||||
|
);
|
||||||
case LONGS_AND_BYTES_REFS -> List.of(
|
case LONGS_AND_BYTES_REFS -> List.of(
|
||||||
new BlockHash.GroupSpec(0, ElementType.LONG),
|
new BlockHash.GroupSpec(0, ElementType.LONG),
|
||||||
new BlockHash.GroupSpec(1, ElementType.BYTES_REF)
|
new BlockHash.GroupSpec(1, ElementType.BYTES_REF)
|
||||||
|
@ -218,6 +242,10 @@ public class AggregatorBenchmark {
|
||||||
checkGroupingBlock(prefix, LONGS, page.getBlock(0));
|
checkGroupingBlock(prefix, LONGS, page.getBlock(0));
|
||||||
checkGroupingBlock(prefix, LONGS, page.getBlock(1));
|
checkGroupingBlock(prefix, LONGS, page.getBlock(1));
|
||||||
}
|
}
|
||||||
|
case TWO_BYTES_REFS, TWO_ORDINALS -> {
|
||||||
|
checkGroupingBlock(prefix, BYTES_REFS, page.getBlock(0));
|
||||||
|
checkGroupingBlock(prefix, BYTES_REFS, page.getBlock(1));
|
||||||
|
}
|
||||||
case LONGS_AND_BYTES_REFS -> {
|
case LONGS_AND_BYTES_REFS -> {
|
||||||
checkGroupingBlock(prefix, LONGS, page.getBlock(0));
|
checkGroupingBlock(prefix, LONGS, page.getBlock(0));
|
||||||
checkGroupingBlock(prefix, BYTES_REFS, page.getBlock(1));
|
checkGroupingBlock(prefix, BYTES_REFS, page.getBlock(1));
|
||||||
|
@ -379,7 +407,7 @@ public class AggregatorBenchmark {
|
||||||
throw new AssertionError(prefix + "bad group expected [true] but was [" + groups.getBoolean(1) + "]");
|
throw new AssertionError(prefix + "bad group expected [true] but was [" + groups.getBoolean(1) + "]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case BYTES_REFS -> {
|
case BYTES_REFS, ORDINALS -> {
|
||||||
BytesRefBlock groups = (BytesRefBlock) block;
|
BytesRefBlock groups = (BytesRefBlock) block;
|
||||||
for (int g = 0; g < GROUPS; g++) {
|
for (int g = 0; g < GROUPS; g++) {
|
||||||
if (false == groups.getBytesRef(g, new BytesRef()).equals(bytesGroup(g))) {
|
if (false == groups.getBytesRef(g, new BytesRef()).equals(bytesGroup(g))) {
|
||||||
|
@ -508,6 +536,8 @@ public class AggregatorBenchmark {
|
||||||
private static List<Block> groupingBlocks(String grouping, String blockType) {
|
private static List<Block> groupingBlocks(String grouping, String blockType) {
|
||||||
return switch (grouping) {
|
return switch (grouping) {
|
||||||
case TWO_LONGS -> List.of(groupingBlock(LONGS, blockType), groupingBlock(LONGS, blockType));
|
case TWO_LONGS -> List.of(groupingBlock(LONGS, blockType), groupingBlock(LONGS, blockType));
|
||||||
|
case TWO_BYTES_REFS -> List.of(groupingBlock(BYTES_REFS, blockType), groupingBlock(BYTES_REFS, blockType));
|
||||||
|
case TWO_ORDINALS -> List.of(groupingBlock(ORDINALS, blockType), groupingBlock(ORDINALS, blockType));
|
||||||
case LONGS_AND_BYTES_REFS -> List.of(groupingBlock(LONGS, blockType), groupingBlock(BYTES_REFS, blockType));
|
case LONGS_AND_BYTES_REFS -> List.of(groupingBlock(LONGS, blockType), groupingBlock(BYTES_REFS, blockType));
|
||||||
case TWO_LONGS_AND_BYTES_REFS -> List.of(
|
case TWO_LONGS_AND_BYTES_REFS -> List.of(
|
||||||
groupingBlock(LONGS, blockType),
|
groupingBlock(LONGS, blockType),
|
||||||
|
@ -570,6 +600,19 @@ public class AggregatorBenchmark {
|
||||||
}
|
}
|
||||||
yield builder.build();
|
yield builder.build();
|
||||||
}
|
}
|
||||||
|
case ORDINALS -> {
|
||||||
|
IntVector.Builder ordinals = blockFactory.newIntVectorBuilder(BLOCK_LENGTH * valuesPerGroup);
|
||||||
|
for (int i = 0; i < BLOCK_LENGTH; i++) {
|
||||||
|
for (int v = 0; v < valuesPerGroup; v++) {
|
||||||
|
ordinals.appendInt(i % GROUPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BytesRefVector.Builder bytes = blockFactory.newBytesRefVectorBuilder(BLOCK_LENGTH * valuesPerGroup);
|
||||||
|
for (int i = 0; i < GROUPS; i++) {
|
||||||
|
bytes.appendBytesRef(bytesGroup(i));
|
||||||
|
}
|
||||||
|
yield new OrdinalBytesRefVector(ordinals.build(), bytes.build()).asBlock();
|
||||||
|
}
|
||||||
default -> throw new UnsupportedOperationException("unsupported grouping [" + grouping + "]");
|
default -> throw new UnsupportedOperationException("unsupported grouping [" + grouping + "]");
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
pr: 114021
|
||||||
|
summary: "ESQL: Speed up grouping by bytes"
|
||||||
|
area: ES|QL
|
||||||
|
type: enhancement
|
||||||
|
issues: []
|
|
@ -23,15 +23,18 @@ import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
import org.elasticsearch.compute.data.IntBlock;
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
import org.elasticsearch.compute.data.IntVector;
|
import org.elasticsearch.compute.data.IntVector;
|
||||||
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
|
||||||
import org.elasticsearch.compute.data.Page;
|
import org.elasticsearch.compute.data.Page;
|
||||||
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe;
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe;
|
||||||
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeBytesRef;
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeBytesRef;
|
||||||
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeInt;
|
||||||
import org.elasticsearch.core.ReleasableIterator;
|
import org.elasticsearch.core.ReleasableIterator;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a {@link BytesRefBlock} column to group ids.
|
* Maps a {@link BytesRefBlock} column to group ids.
|
||||||
|
* This class is generated. Do not edit it.
|
||||||
*/
|
*/
|
||||||
final class BytesRefBlockHash extends BlockHash {
|
final class BytesRefBlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
@ -54,6 +57,7 @@ final class BytesRefBlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
// TODO track raw counts and which implementation we pick for the profiler - #114008
|
||||||
var block = page.getBlock(channel);
|
var block = page.getBlock(channel);
|
||||||
if (block.areAllValuesNull()) {
|
if (block.areAllValuesNull()) {
|
||||||
seenNull = true;
|
seenNull = true;
|
||||||
|
@ -76,6 +80,10 @@ final class BytesRefBlockHash extends BlockHash {
|
||||||
}
|
}
|
||||||
|
|
||||||
IntVector add(BytesRefVector vector) {
|
IntVector add(BytesRefVector vector) {
|
||||||
|
var ordinals = vector.asOrdinals();
|
||||||
|
if (ordinals != null) {
|
||||||
|
return addOrdinalsVector(ordinals);
|
||||||
|
}
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
int positions = vector.getPositionCount();
|
int positions = vector.getPositionCount();
|
||||||
try (var builder = blockFactory.newIntVectorFixedBuilder(positions)) {
|
try (var builder = blockFactory.newIntVectorFixedBuilder(positions)) {
|
||||||
|
@ -113,15 +121,29 @@ final class BytesRefBlockHash extends BlockHash {
|
||||||
return ReleasableIterator.single(lookup(vector));
|
return ReleasableIterator.single(lookup(vector));
|
||||||
}
|
}
|
||||||
|
|
||||||
private IntBlock addOrdinalsBlock(OrdinalBytesRefBlock inputBlock) {
|
private IntVector addOrdinalsVector(OrdinalBytesRefVector inputBlock) {
|
||||||
var inputOrds = inputBlock.getOrdinalsBlock();
|
IntVector inputOrds = inputBlock.getOrdinalsVector();
|
||||||
try (
|
try (
|
||||||
var builder = blockFactory.newIntBlockBuilder(inputOrds.getPositionCount());
|
var builder = blockFactory.newIntVectorBuilder(inputOrds.getPositionCount());
|
||||||
var hashOrds = add(inputBlock.getDictionaryVector())
|
var hashOrds = add(inputBlock.getDictionaryVector())
|
||||||
) {
|
) {
|
||||||
for (int i = 0; i < inputOrds.getPositionCount(); i++) {
|
for (int p = 0; p < inputOrds.getPositionCount(); p++) {
|
||||||
int valueCount = inputOrds.getValueCount(i);
|
int ord = hashOrds.getInt(inputOrds.getInt(p));
|
||||||
int firstIndex = inputOrds.getFirstValueIndex(i);
|
builder.appendInt(ord);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private IntBlock addOrdinalsBlock(OrdinalBytesRefBlock inputBlock) {
|
||||||
|
try (
|
||||||
|
IntBlock inputOrds = new MultivalueDedupeInt(inputBlock.getOrdinalsBlock()).dedupeToBlockAdaptive(blockFactory);
|
||||||
|
IntBlock.Builder builder = blockFactory.newIntBlockBuilder(inputOrds.getPositionCount());
|
||||||
|
IntVector hashOrds = add(inputBlock.getDictionaryVector())
|
||||||
|
) {
|
||||||
|
for (int p = 0; p < inputOrds.getPositionCount(); p++) {
|
||||||
|
int valueCount = inputOrds.getValueCount(p);
|
||||||
|
int firstIndex = inputOrds.getFirstValueIndex(p);
|
||||||
switch (valueCount) {
|
switch (valueCount) {
|
||||||
case 0 -> {
|
case 0 -> {
|
||||||
builder.appendInt(0);
|
builder.appendInt(0);
|
||||||
|
@ -132,9 +154,11 @@ final class BytesRefBlockHash extends BlockHash {
|
||||||
builder.appendInt(ord);
|
builder.appendInt(ord);
|
||||||
}
|
}
|
||||||
default -> {
|
default -> {
|
||||||
|
int start = firstIndex;
|
||||||
|
int end = firstIndex + valueCount;
|
||||||
builder.beginPositionEntry();
|
builder.beginPositionEntry();
|
||||||
for (int v = 0; v < valueCount; v++) {
|
for (int i = start; i < end; i++) {
|
||||||
int ord = hashOrds.getInt(inputOrds.getInt(firstIndex + i));
|
int ord = hashOrds.getInt(inputOrds.getInt(i));
|
||||||
builder.appendInt(ord);
|
builder.appendInt(ord);
|
||||||
}
|
}
|
||||||
builder.endPositionEntry();
|
builder.endPositionEntry();
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.BitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a {@link DoubleBlock} column to group ids.
|
* Maps a {@link DoubleBlock} column to group ids.
|
||||||
|
* This class is generated. Do not edit it.
|
||||||
*/
|
*/
|
||||||
final class DoubleBlockHash extends BlockHash {
|
final class DoubleBlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
@ -50,6 +51,7 @@ final class DoubleBlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
// TODO track raw counts and which implementation we pick for the profiler - #114008
|
||||||
var block = page.getBlock(channel);
|
var block = page.getBlock(channel);
|
||||||
if (block.areAllValuesNull()) {
|
if (block.areAllValuesNull()) {
|
||||||
seenNull = true;
|
seenNull = true;
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.util.BitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a {@link IntBlock} column to group ids.
|
* Maps a {@link IntBlock} column to group ids.
|
||||||
|
* This class is generated. Do not edit it.
|
||||||
*/
|
*/
|
||||||
final class IntBlockHash extends BlockHash {
|
final class IntBlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
@ -48,6 +49,7 @@ final class IntBlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
// TODO track raw counts and which implementation we pick for the profiler - #114008
|
||||||
var block = page.getBlock(channel);
|
var block = page.getBlock(channel);
|
||||||
if (block.areAllValuesNull()) {
|
if (block.areAllValuesNull()) {
|
||||||
seenNull = true;
|
seenNull = true;
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.BitSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a {@link LongBlock} column to group ids.
|
* Maps a {@link LongBlock} column to group ids.
|
||||||
|
* This class is generated. Do not edit it.
|
||||||
*/
|
*/
|
||||||
final class LongBlockHash extends BlockHash {
|
final class LongBlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
@ -50,6 +51,7 @@ final class LongBlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
// TODO track raw counts and which implementation we pick for the profiler - #114008
|
||||||
var block = page.getBlock(channel);
|
var block = page.getBlock(channel);
|
||||||
if (block.areAllValuesNull()) {
|
if (block.areAllValuesNull()) {
|
||||||
seenNull = true;
|
seenNull = true;
|
||||||
|
|
|
@ -11,6 +11,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
|
||||||
import org.elasticsearch.common.util.BigArrays;
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
import org.elasticsearch.common.util.BitArray;
|
import org.elasticsearch.common.util.BitArray;
|
||||||
import org.elasticsearch.common.util.BytesRefHash;
|
import org.elasticsearch.common.util.BytesRefHash;
|
||||||
|
import org.elasticsearch.common.util.Int3Hash;
|
||||||
import org.elasticsearch.common.util.LongHash;
|
import org.elasticsearch.common.util.LongHash;
|
||||||
import org.elasticsearch.common.util.LongLongHash;
|
import org.elasticsearch.common.util.LongLongHash;
|
||||||
import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
|
import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
|
||||||
|
@ -28,14 +29,37 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A specialized hash table implementation maps values of a {@link Block} to ids (in longs).
|
* Specialized hash table implementations that map rows to a <strong>set</strong>
|
||||||
* This class delegates to {@link LongHash} or {@link BytesRefHash}.
|
* of bucket IDs to which they belong to implement {@code GROUP BY} expressions.
|
||||||
*
|
* <p>
|
||||||
* @see LongHash
|
* A row is always in at least one bucket so the results are never {@code null}.
|
||||||
* @see BytesRefHash
|
* {@code null} valued key columns will map to some integer bucket id.
|
||||||
|
* If none of key columns are multivalued then the output is always an
|
||||||
|
* {@link IntVector}. If any of the key are multivalued then a row is
|
||||||
|
* in a bucket for each value. If more than one key is multivalued then
|
||||||
|
* the row is in the combinatorial explosion of all value combinations.
|
||||||
|
* Luckily for the number of values rows can only be in each bucket once.
|
||||||
|
* Unluckily, it's the responsibility of {@link BlockHash} to remove those
|
||||||
|
* duplicates.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* These classes typically delegate to some combination of {@link BytesRefHash},
|
||||||
|
* {@link LongHash}, {@link LongLongHash}, {@link Int3Hash}. They don't
|
||||||
|
* <strong>technically</strong> have to be hash tables, so long as they
|
||||||
|
* implement the deduplication semantics above and vend integer ids.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The integer ids are assigned to offsets into arrays of aggregation states
|
||||||
|
* so its permissible to have gaps in the ints. But large gaps are a bad
|
||||||
|
* idea because they'll waste space in the aggregations that use these
|
||||||
|
* positions. For example, {@link BooleanBlockHash} assigns {@code 0} to
|
||||||
|
* {@code null}, {@code 1} to {@code false}, and {@code 1} to {@code true}
|
||||||
|
* and that's <strong>fine</strong> and simple and good because it'll never
|
||||||
|
* leave a big gap, even if we never see {@code null}.
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public abstract sealed class BlockHash implements Releasable, SeenGroupIds //
|
public abstract sealed class BlockHash implements Releasable, SeenGroupIds //
|
||||||
permits BooleanBlockHash, BytesRefBlockHash, DoubleBlockHash, IntBlockHash, LongBlockHash, BytesRef3BlockHash, //
|
permits BooleanBlockHash, BytesRefBlockHash, DoubleBlockHash, IntBlockHash, LongBlockHash, BytesRef2BlockHash, BytesRef3BlockHash, //
|
||||||
NullBlockHash, PackedValuesBlockHash, BytesRefLongBlockHash, LongLongBlockHash, TimeSeriesBlockHash {
|
NullBlockHash, PackedValuesBlockHash, BytesRefLongBlockHash, LongLongBlockHash, TimeSeriesBlockHash {
|
||||||
|
|
||||||
protected final BlockFactory blockFactory;
|
protected final BlockFactory blockFactory;
|
||||||
|
@ -98,8 +122,19 @@ public abstract sealed class BlockHash implements Releasable, SeenGroupIds //
|
||||||
if (groups.size() == 1) {
|
if (groups.size() == 1) {
|
||||||
return newForElementType(groups.get(0).channel(), groups.get(0).elementType(), blockFactory);
|
return newForElementType(groups.get(0).channel(), groups.get(0).elementType(), blockFactory);
|
||||||
}
|
}
|
||||||
if (groups.size() == 3 && groups.stream().allMatch(g -> g.elementType == ElementType.BYTES_REF)) {
|
if (groups.stream().allMatch(g -> g.elementType == ElementType.BYTES_REF)) {
|
||||||
return new BytesRef3BlockHash(blockFactory, groups.get(0).channel, groups.get(1).channel, groups.get(2).channel, emitBatchSize);
|
switch (groups.size()) {
|
||||||
|
case 2:
|
||||||
|
return new BytesRef2BlockHash(blockFactory, groups.get(0).channel, groups.get(1).channel, emitBatchSize);
|
||||||
|
case 3:
|
||||||
|
return new BytesRef3BlockHash(
|
||||||
|
blockFactory,
|
||||||
|
groups.get(0).channel,
|
||||||
|
groups.get(1).channel,
|
||||||
|
groups.get(2).channel,
|
||||||
|
emitBatchSize
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (allowBrokenOptimizations && groups.size() == 2) {
|
if (allowBrokenOptimizations && groups.size() == 2) {
|
||||||
var g1 = groups.get(0);
|
var g1 = groups.get(0);
|
||||||
|
|
|
@ -25,8 +25,9 @@ import static org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeBoolea
|
||||||
import static org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeBoolean.TRUE_ORD;
|
import static org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeBoolean.TRUE_ORD;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps a {@link BooleanBlock} column to group ids. Assigns group
|
* Maps a {@link BooleanBlock} column to group ids. Assigns
|
||||||
* {@code 0} to {@code false} and group {@code 1} to {@code true}.
|
* {@code 0} to {@code null}, {@code 1} to {@code false}, and
|
||||||
|
* {@code 2} to {@code true}.
|
||||||
*/
|
*/
|
||||||
final class BooleanBlockHash extends BlockHash {
|
final class BooleanBlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.compute.aggregation.blockhash;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||||
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
|
import org.elasticsearch.common.util.BitArray;
|
||||||
|
import org.elasticsearch.common.util.LongHash;
|
||||||
|
import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
|
||||||
|
import org.elasticsearch.compute.data.Block;
|
||||||
|
import org.elasticsearch.compute.data.BlockFactory;
|
||||||
|
import org.elasticsearch.compute.data.BytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
|
import org.elasticsearch.compute.data.IntVector;
|
||||||
|
import org.elasticsearch.compute.data.Page;
|
||||||
|
import org.elasticsearch.core.ReleasableIterator;
|
||||||
|
import org.elasticsearch.core.Releasables;
|
||||||
|
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps two {@link BytesRefBlock}s to group ids.
|
||||||
|
*/
|
||||||
|
final class BytesRef2BlockHash extends BlockHash {
|
||||||
|
private final int emitBatchSize;
|
||||||
|
private final int channel1;
|
||||||
|
private final int channel2;
|
||||||
|
private final BytesRefBlockHash hash1;
|
||||||
|
private final BytesRefBlockHash hash2;
|
||||||
|
private final LongHash finalHash;
|
||||||
|
|
||||||
|
BytesRef2BlockHash(BlockFactory blockFactory, int channel1, int channel2, int emitBatchSize) {
|
||||||
|
super(blockFactory);
|
||||||
|
this.emitBatchSize = emitBatchSize;
|
||||||
|
this.channel1 = channel1;
|
||||||
|
this.channel2 = channel2;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
this.hash1 = new BytesRefBlockHash(channel1, blockFactory);
|
||||||
|
this.hash2 = new BytesRefBlockHash(channel2, blockFactory);
|
||||||
|
this.finalHash = new LongHash(1, blockFactory.bigArrays());
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (success == false) {
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
Releasables.close(hash1, hash2, finalHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
BytesRefBlock b1 = page.getBlock(channel1);
|
||||||
|
BytesRefBlock b2 = page.getBlock(channel2);
|
||||||
|
BytesRefVector v1 = b1.asVector();
|
||||||
|
BytesRefVector v2 = b2.asVector();
|
||||||
|
if (v1 != null && v2 != null) {
|
||||||
|
addVectors(v1, v2, addInput);
|
||||||
|
} else {
|
||||||
|
try (IntBlock k1 = hash1.add(b1); IntBlock k2 = hash2.add(b2)) {
|
||||||
|
try (AddWork work = new AddWork(k1, k2, addInput)) {
|
||||||
|
work.add();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addVectors(BytesRefVector v1, BytesRefVector v2, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
final int positionCount = v1.getPositionCount();
|
||||||
|
try (IntVector.FixedBuilder ordsBuilder = blockFactory.newIntVectorFixedBuilder(positionCount)) {
|
||||||
|
try (IntVector k1 = hash1.add(v1); IntVector k2 = hash2.add(v2)) {
|
||||||
|
for (int p = 0; p < positionCount; p++) {
|
||||||
|
long ord = ord(k1.getInt(p), k2.getInt(p));
|
||||||
|
ordsBuilder.appendInt(p, Math.toIntExact(ord));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try (IntVector ords = ordsBuilder.build()) {
|
||||||
|
addInput.add(0, ords);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class AddWork extends AddPage {
|
||||||
|
final IntBlock b1;
|
||||||
|
final IntBlock b2;
|
||||||
|
|
||||||
|
AddWork(IntBlock b1, IntBlock b2, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
super(blockFactory, emitBatchSize, addInput);
|
||||||
|
this.b1 = b1;
|
||||||
|
this.b2 = b2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add() {
|
||||||
|
int positionCount = b1.getPositionCount();
|
||||||
|
for (int i = 0; i < positionCount; i++) {
|
||||||
|
int v1 = b1.getValueCount(i);
|
||||||
|
int v2 = b2.getValueCount(i);
|
||||||
|
int first1 = b1.getFirstValueIndex(i);
|
||||||
|
int first2 = b2.getFirstValueIndex(i);
|
||||||
|
if (v1 == 1 && v2 == 1) {
|
||||||
|
long ord = ord(b1.getInt(first1), b2.getInt(first2));
|
||||||
|
appendOrdSv(i, Math.toIntExact(ord));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int i1 = 0; i1 < v1; i1++) {
|
||||||
|
int k1 = b1.getInt(first1 + i1);
|
||||||
|
for (int i2 = 0; i2 < v2; i2++) {
|
||||||
|
int k2 = b2.getInt(first2 + i2);
|
||||||
|
long ord = ord(k1, k2);
|
||||||
|
appendOrdInMv(i, Math.toIntExact(ord));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finishMv();
|
||||||
|
}
|
||||||
|
flushRemaining();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long ord(int k1, int k2) {
|
||||||
|
return hashOrdToGroup(finalHash.add((long) k2 << 32 | k1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ReleasableIterator<IntBlock> lookup(Page page, ByteSizeValue targetBlockSize) {
|
||||||
|
throw new UnsupportedOperationException("TODO");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Block[] getKeys() {
|
||||||
|
// TODO Build Ordinals blocks #114010
|
||||||
|
final int positions = (int) finalHash.size();
|
||||||
|
final BytesRef scratch = new BytesRef();
|
||||||
|
final BytesRefBlock[] outputBlocks = new BytesRefBlock[2];
|
||||||
|
try {
|
||||||
|
try (BytesRefBlock.Builder b1 = blockFactory.newBytesRefBlockBuilder(positions)) {
|
||||||
|
for (int i = 0; i < positions; i++) {
|
||||||
|
int k1 = (int) (finalHash.get(i) & 0xffffL);
|
||||||
|
if (k1 == 0) {
|
||||||
|
b1.appendNull();
|
||||||
|
} else {
|
||||||
|
b1.appendBytesRef(hash1.hash.get(k1 - 1, scratch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputBlocks[0] = b1.build();
|
||||||
|
}
|
||||||
|
try (BytesRefBlock.Builder b2 = blockFactory.newBytesRefBlockBuilder(positions)) {
|
||||||
|
for (int i = 0; i < positions; i++) {
|
||||||
|
int k2 = (int) (finalHash.get(i) >>> 32);
|
||||||
|
if (k2 == 0) {
|
||||||
|
b2.appendNull();
|
||||||
|
} else {
|
||||||
|
b2.appendBytesRef(hash2.hash.get(k2 - 1, scratch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputBlocks[1] = b2.build();
|
||||||
|
}
|
||||||
|
return outputBlocks;
|
||||||
|
} finally {
|
||||||
|
if (outputBlocks[outputBlocks.length - 1] == null) {
|
||||||
|
Releasables.close(outputBlocks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BitArray seenGroupIds(BigArrays bigArrays) {
|
||||||
|
return new Range(0, Math.toIntExact(finalHash.size())).seenGroupIds(bigArrays);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntVector nonEmpty() {
|
||||||
|
return IntVector.range(0, Math.toIntExact(finalHash.size()), blockFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format(
|
||||||
|
Locale.ROOT,
|
||||||
|
"BytesRef2BlockHash{keys=[channel1=%d, channel2=%d], entries=%d}",
|
||||||
|
channel1,
|
||||||
|
channel2,
|
||||||
|
finalHash.size()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -85,7 +85,6 @@ final class BytesRef3BlockHash extends BlockHash {
|
||||||
private void addVectors(BytesRefVector v1, BytesRefVector v2, BytesRefVector v3, GroupingAggregatorFunction.AddInput addInput) {
|
private void addVectors(BytesRefVector v1, BytesRefVector v2, BytesRefVector v3, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
final int positionCount = v1.getPositionCount();
|
final int positionCount = v1.getPositionCount();
|
||||||
try (IntVector.FixedBuilder ordsBuilder = blockFactory.newIntVectorFixedBuilder(positionCount)) {
|
try (IntVector.FixedBuilder ordsBuilder = blockFactory.newIntVectorFixedBuilder(positionCount)) {
|
||||||
// TODO: enable ordinal vectors in BytesRefBlockHash
|
|
||||||
try (IntVector k1 = hash1.add(v1); IntVector k2 = hash2.add(v2); IntVector k3 = hash3.add(v3)) {
|
try (IntVector k1 = hash1.add(v1); IntVector k2 = hash2.add(v2); IntVector k3 = hash3.add(v3)) {
|
||||||
for (int p = 0; p < positionCount; p++) {
|
for (int p = 0; p < positionCount; p++) {
|
||||||
long ord = hashOrdToGroup(finalHash.add(k1.getInt(p), k2.getInt(p), k3.getInt(p)));
|
long ord = hashOrdToGroup(finalHash.add(k1.getInt(p), k2.getInt(p), k3.getInt(p)));
|
||||||
|
@ -148,6 +147,7 @@ final class BytesRef3BlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Block[] getKeys() {
|
public Block[] getKeys() {
|
||||||
|
// TODO Build Ordinals blocks #114010
|
||||||
final int positions = (int) finalHash.size();
|
final int positions = (int) finalHash.size();
|
||||||
final BytesRef scratch = new BytesRef();
|
final BytesRef scratch = new BytesRef();
|
||||||
final BytesRefBlock[] outputBlocks = new BytesRefBlock[3];
|
final BytesRefBlock[] outputBlocks = new BytesRefBlock[3];
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
import org.elasticsearch.compute.data.IntBlock;
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
import org.elasticsearch.compute.data.IntVector;
|
import org.elasticsearch.compute.data.IntVector;
|
||||||
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
|
||||||
$elseif(double)$
|
$elseif(double)$
|
||||||
import org.elasticsearch.compute.data.Block;
|
import org.elasticsearch.compute.data.Block;
|
||||||
import org.elasticsearch.compute.data.BlockFactory;
|
import org.elasticsearch.compute.data.BlockFactory;
|
||||||
|
@ -51,6 +52,9 @@ $endif$
|
||||||
import org.elasticsearch.compute.data.Page;
|
import org.elasticsearch.compute.data.Page;
|
||||||
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe;
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe;
|
||||||
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe$Type$;
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe$Type$;
|
||||||
|
$if(BytesRef)$
|
||||||
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeInt;
|
||||||
|
$endif$
|
||||||
import org.elasticsearch.core.ReleasableIterator;
|
import org.elasticsearch.core.ReleasableIterator;
|
||||||
|
|
||||||
$if(BytesRef)$
|
$if(BytesRef)$
|
||||||
|
@ -62,6 +66,7 @@ import java.util.BitSet;
|
||||||
$endif$
|
$endif$
|
||||||
/**
|
/**
|
||||||
* Maps a {@link $Type$Block} column to group ids.
|
* Maps a {@link $Type$Block} column to group ids.
|
||||||
|
* This class is generated. Do not edit it.
|
||||||
*/
|
*/
|
||||||
final class $Type$BlockHash extends BlockHash {
|
final class $Type$BlockHash extends BlockHash {
|
||||||
private final int channel;
|
private final int channel;
|
||||||
|
@ -84,6 +89,7 @@ final class $Type$BlockHash extends BlockHash {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
public void add(Page page, GroupingAggregatorFunction.AddInput addInput) {
|
||||||
|
// TODO track raw counts and which implementation we pick for the profiler - #114008
|
||||||
var block = page.getBlock(channel);
|
var block = page.getBlock(channel);
|
||||||
if (block.areAllValuesNull()) {
|
if (block.areAllValuesNull()) {
|
||||||
seenNull = true;
|
seenNull = true;
|
||||||
|
@ -107,6 +113,10 @@ final class $Type$BlockHash extends BlockHash {
|
||||||
|
|
||||||
IntVector add($Type$Vector vector) {
|
IntVector add($Type$Vector vector) {
|
||||||
$if(BytesRef)$
|
$if(BytesRef)$
|
||||||
|
var ordinals = vector.asOrdinals();
|
||||||
|
if (ordinals != null) {
|
||||||
|
return addOrdinalsVector(ordinals);
|
||||||
|
}
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
$endif$
|
$endif$
|
||||||
int positions = vector.getPositionCount();
|
int positions = vector.getPositionCount();
|
||||||
|
@ -154,15 +164,29 @@ $endif$
|
||||||
}
|
}
|
||||||
|
|
||||||
$if(BytesRef)$
|
$if(BytesRef)$
|
||||||
private IntBlock addOrdinalsBlock(OrdinalBytesRefBlock inputBlock) {
|
private IntVector addOrdinalsVector(OrdinalBytesRefVector inputBlock) {
|
||||||
var inputOrds = inputBlock.getOrdinalsBlock();
|
IntVector inputOrds = inputBlock.getOrdinalsVector();
|
||||||
try (
|
try (
|
||||||
var builder = blockFactory.newIntBlockBuilder(inputOrds.getPositionCount());
|
var builder = blockFactory.newIntVectorBuilder(inputOrds.getPositionCount());
|
||||||
var hashOrds = add(inputBlock.getDictionaryVector())
|
var hashOrds = add(inputBlock.getDictionaryVector())
|
||||||
) {
|
) {
|
||||||
for (int i = 0; i < inputOrds.getPositionCount(); i++) {
|
for (int p = 0; p < inputOrds.getPositionCount(); p++) {
|
||||||
int valueCount = inputOrds.getValueCount(i);
|
int ord = hashOrds.getInt(inputOrds.getInt(p));
|
||||||
int firstIndex = inputOrds.getFirstValueIndex(i);
|
builder.appendInt(ord);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private IntBlock addOrdinalsBlock(OrdinalBytesRefBlock inputBlock) {
|
||||||
|
try (
|
||||||
|
IntBlock inputOrds = new MultivalueDedupeInt(inputBlock.getOrdinalsBlock()).dedupeToBlockAdaptive(blockFactory);
|
||||||
|
IntBlock.Builder builder = blockFactory.newIntBlockBuilder(inputOrds.getPositionCount());
|
||||||
|
IntVector hashOrds = add(inputBlock.getDictionaryVector())
|
||||||
|
) {
|
||||||
|
for (int p = 0; p < inputOrds.getPositionCount(); p++) {
|
||||||
|
int valueCount = inputOrds.getValueCount(p);
|
||||||
|
int firstIndex = inputOrds.getFirstValueIndex(p);
|
||||||
switch (valueCount) {
|
switch (valueCount) {
|
||||||
case 0 -> {
|
case 0 -> {
|
||||||
builder.appendInt(0);
|
builder.appendInt(0);
|
||||||
|
@ -173,9 +197,11 @@ $if(BytesRef)$
|
||||||
builder.appendInt(ord);
|
builder.appendInt(ord);
|
||||||
}
|
}
|
||||||
default -> {
|
default -> {
|
||||||
|
int start = firstIndex;
|
||||||
|
int end = firstIndex + valueCount;
|
||||||
builder.beginPositionEntry();
|
builder.beginPositionEntry();
|
||||||
for (int v = 0; v < valueCount; v++) {
|
for (int i = start; i < end; i++) {
|
||||||
int ord = hashOrds.getInt(inputOrds.getInt(firstIndex + i));
|
int ord = hashOrds.getInt(inputOrds.getInt(i));
|
||||||
builder.appendInt(ord);
|
builder.appendInt(ord);
|
||||||
}
|
}
|
||||||
builder.endPositionEntry();
|
builder.endPositionEntry();
|
||||||
|
|
|
@ -21,10 +21,13 @@ import org.elasticsearch.compute.data.BasicBlockTests;
|
||||||
import org.elasticsearch.compute.data.Block;
|
import org.elasticsearch.compute.data.Block;
|
||||||
import org.elasticsearch.compute.data.BlockFactory;
|
import org.elasticsearch.compute.data.BlockFactory;
|
||||||
import org.elasticsearch.compute.data.BlockTestUtils;
|
import org.elasticsearch.compute.data.BlockTestUtils;
|
||||||
|
import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
import org.elasticsearch.compute.data.ElementType;
|
import org.elasticsearch.compute.data.ElementType;
|
||||||
import org.elasticsearch.compute.data.IntBlock;
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
import org.elasticsearch.compute.data.MockBlockFactory;
|
import org.elasticsearch.compute.data.MockBlockFactory;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
||||||
import org.elasticsearch.compute.data.Page;
|
import org.elasticsearch.compute.data.Page;
|
||||||
|
import org.elasticsearch.compute.data.TestBlockFactory;
|
||||||
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeTests;
|
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupeTests;
|
||||||
import org.elasticsearch.core.ReleasableIterator;
|
import org.elasticsearch.core.ReleasableIterator;
|
||||||
import org.elasticsearch.core.Releasables;
|
import org.elasticsearch.core.Releasables;
|
||||||
|
@ -38,11 +41,13 @@ import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NavigableSet;
|
import java.util.NavigableSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static org.elasticsearch.test.ListMatcher.matchesList;
|
import static org.elasticsearch.test.ListMatcher.matchesList;
|
||||||
import static org.elasticsearch.test.MapMatcher.assertMap;
|
import static org.elasticsearch.test.MapMatcher.assertMap;
|
||||||
|
@ -58,26 +63,40 @@ import static org.mockito.Mockito.when;
|
||||||
public class BlockHashRandomizedTests extends ESTestCase {
|
public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
@ParametersFactory
|
@ParametersFactory
|
||||||
public static List<Object[]> params() {
|
public static List<Object[]> params() {
|
||||||
List<Object[]> params = new ArrayList<>();
|
List<List<? extends Type>> allowedTypesChoices = List.of(
|
||||||
|
/*
|
||||||
|
* Run with only `LONG` elements because we have some
|
||||||
|
* optimizations that hit if you only have those.
|
||||||
|
*/
|
||||||
|
List.of(new Basic(ElementType.LONG)),
|
||||||
|
/*
|
||||||
|
* Run with only `BYTES_REF` elements because we have some
|
||||||
|
* optimizations that hit if you only have those.
|
||||||
|
*/
|
||||||
|
List.of(new Basic(ElementType.BYTES_REF)),
|
||||||
|
/*
|
||||||
|
* Run with only `BYTES_REF` elements in an OrdinalBytesRefBlock
|
||||||
|
* because we have a few optimizations that use it.
|
||||||
|
*/
|
||||||
|
List.of(new Ordinals(10)),
|
||||||
|
/*
|
||||||
|
* Run with only `LONG` and `BYTES_REF` elements because
|
||||||
|
* we have some optimizations that hit if you only have
|
||||||
|
* those.
|
||||||
|
*/
|
||||||
|
List.of(new Basic(ElementType.LONG), new Basic(ElementType.BYTES_REF)),
|
||||||
|
/*
|
||||||
|
* Any random source.
|
||||||
|
*/
|
||||||
|
Stream.concat(Stream.of(new Ordinals(10)), MultivalueDedupeTests.supportedTypes().stream().map(Basic::new)).toList()
|
||||||
|
);
|
||||||
|
|
||||||
|
List<Object[]> params = new ArrayList<>();
|
||||||
for (boolean forcePackedHash : new boolean[] { false, true }) {
|
for (boolean forcePackedHash : new boolean[] { false, true }) {
|
||||||
for (int groups : new int[] { 1, 2, 3, 4, 5, 10 }) {
|
for (int groups : new int[] { 1, 2, 3, 4, 5, 10 }) {
|
||||||
for (int maxValuesPerPosition : new int[] { 1, 3 }) {
|
for (int maxValuesPerPosition : new int[] { 1, 3 }) {
|
||||||
for (int dups : new int[] { 0, 2 }) {
|
for (int dups : new int[] { 0, 2 }) {
|
||||||
for (List<ElementType> allowedTypes : List.of(
|
for (List<? extends Type> allowedTypes : allowedTypesChoices) {
|
||||||
/*
|
|
||||||
* Run with only `LONG` elements because we have some
|
|
||||||
* optimizations that hit if you only have those.
|
|
||||||
*/
|
|
||||||
List.of(ElementType.LONG),
|
|
||||||
/*
|
|
||||||
* Run with only `LONG` and `BYTES_REF` elements because
|
|
||||||
* we have some optimizations that hit if you only have
|
|
||||||
* those.
|
|
||||||
*/
|
|
||||||
List.of(ElementType.LONG, ElementType.BYTES_REF),
|
|
||||||
MultivalueDedupeTests.supportedTypes()
|
|
||||||
)) {
|
|
||||||
params.add(new Object[] { forcePackedHash, groups, maxValuesPerPosition, dups, allowedTypes });
|
params.add(new Object[] { forcePackedHash, groups, maxValuesPerPosition, dups, allowedTypes });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,18 +106,33 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The type of {@link Block} being tested.
|
||||||
|
*/
|
||||||
|
interface Type {
|
||||||
|
/**
|
||||||
|
* The type of the {@link ElementType elements} in the {@link Block}.
|
||||||
|
*/
|
||||||
|
ElementType elementType();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a random {@link Block}.
|
||||||
|
*/
|
||||||
|
BasicBlockTests.RandomBlock randomBlock(int positionCount, int maxValuesPerPosition, int dups);
|
||||||
|
}
|
||||||
|
|
||||||
private final boolean forcePackedHash;
|
private final boolean forcePackedHash;
|
||||||
private final int groups;
|
private final int groups;
|
||||||
private final int maxValuesPerPosition;
|
private final int maxValuesPerPosition;
|
||||||
private final int dups;
|
private final int dups;
|
||||||
private final List<ElementType> allowedTypes;
|
private final List<? extends Type> allowedTypes;
|
||||||
|
|
||||||
public BlockHashRandomizedTests(
|
public BlockHashRandomizedTests(
|
||||||
@Name("forcePackedHash") boolean forcePackedHash,
|
@Name("forcePackedHash") boolean forcePackedHash,
|
||||||
@Name("groups") int groups,
|
@Name("groups") int groups,
|
||||||
@Name("maxValuesPerPosition") int maxValuesPerPosition,
|
@Name("maxValuesPerPosition") int maxValuesPerPosition,
|
||||||
@Name("dups") int dups,
|
@Name("dups") int dups,
|
||||||
@Name("allowedTypes") List<ElementType> allowedTypes
|
@Name("allowedTypes") List<Type> allowedTypes
|
||||||
) {
|
) {
|
||||||
this.forcePackedHash = forcePackedHash;
|
this.forcePackedHash = forcePackedHash;
|
||||||
this.groups = groups;
|
this.groups = groups;
|
||||||
|
@ -127,21 +161,22 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void test(MockBlockFactory blockFactory) {
|
private void test(MockBlockFactory blockFactory) {
|
||||||
List<ElementType> types = randomList(groups, groups, () -> randomFrom(allowedTypes));
|
List<Type> types = randomList(groups, groups, () -> randomFrom(allowedTypes));
|
||||||
|
List<ElementType> elementTypes = types.stream().map(Type::elementType).toList();
|
||||||
BasicBlockTests.RandomBlock[] randomBlocks = new BasicBlockTests.RandomBlock[types.size()];
|
BasicBlockTests.RandomBlock[] randomBlocks = new BasicBlockTests.RandomBlock[types.size()];
|
||||||
Block[] blocks = new Block[types.size()];
|
Block[] blocks = new Block[types.size()];
|
||||||
int pageCount = between(1, 10);
|
int pageCount = between(1, groups < 10 ? 10 : 5);
|
||||||
int positionCount = 100;
|
int positionCount = 100;
|
||||||
int emitBatchSize = 100;
|
int emitBatchSize = 100;
|
||||||
try (BlockHash blockHash = newBlockHash(blockFactory, emitBatchSize, types)) {
|
try (BlockHash blockHash = newBlockHash(blockFactory, emitBatchSize, elementTypes)) {
|
||||||
/*
|
/*
|
||||||
* Only the long/long, long/bytes_ref, and bytes_ref/long implementations don't collect nulls.
|
* Only the long/long, long/bytes_ref, and bytes_ref/long implementations don't collect nulls.
|
||||||
*/
|
*/
|
||||||
Oracle oracle = new Oracle(
|
Oracle oracle = new Oracle(
|
||||||
forcePackedHash
|
forcePackedHash
|
||||||
|| false == (types.equals(List.of(ElementType.LONG, ElementType.LONG))
|
|| false == (elementTypes.equals(List.of(ElementType.LONG, ElementType.LONG))
|
||||||
|| types.equals(List.of(ElementType.LONG, ElementType.BYTES_REF))
|
|| elementTypes.equals(List.of(ElementType.LONG, ElementType.BYTES_REF))
|
||||||
|| types.equals(List.of(ElementType.BYTES_REF, ElementType.LONG)))
|
|| elementTypes.equals(List.of(ElementType.BYTES_REF, ElementType.LONG)))
|
||||||
);
|
);
|
||||||
/*
|
/*
|
||||||
* Expected ordinals for checking lookup. Skipped if we have more than 5 groups because
|
* Expected ordinals for checking lookup. Skipped if we have more than 5 groups because
|
||||||
|
@ -151,15 +186,7 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
|
|
||||||
for (int p = 0; p < pageCount; p++) {
|
for (int p = 0; p < pageCount; p++) {
|
||||||
for (int g = 0; g < blocks.length; g++) {
|
for (int g = 0; g < blocks.length; g++) {
|
||||||
randomBlocks[g] = BasicBlockTests.randomBlock(
|
randomBlocks[g] = types.get(g).randomBlock(positionCount, maxValuesPerPosition, dups);
|
||||||
types.get(g),
|
|
||||||
positionCount,
|
|
||||||
types.get(g) == ElementType.NULL ? true : randomBoolean(),
|
|
||||||
1,
|
|
||||||
maxValuesPerPosition,
|
|
||||||
0,
|
|
||||||
dups
|
|
||||||
);
|
|
||||||
blocks[g] = randomBlocks[g].block();
|
blocks[g] = randomBlocks[g].block();
|
||||||
}
|
}
|
||||||
oracle.add(randomBlocks);
|
oracle.add(randomBlocks);
|
||||||
|
@ -209,6 +236,7 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
|
|
||||||
if (blockHash instanceof LongLongBlockHash == false
|
if (blockHash instanceof LongLongBlockHash == false
|
||||||
&& blockHash instanceof BytesRefLongBlockHash == false
|
&& blockHash instanceof BytesRefLongBlockHash == false
|
||||||
|
&& blockHash instanceof BytesRef2BlockHash == false
|
||||||
&& blockHash instanceof BytesRef3BlockHash == false) {
|
&& blockHash instanceof BytesRef3BlockHash == false) {
|
||||||
assertLookup(blockFactory, expectedOrds, types, blockHash, oracle);
|
assertLookup(blockFactory, expectedOrds, types, blockHash, oracle);
|
||||||
}
|
}
|
||||||
|
@ -235,14 +263,14 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
private void assertLookup(
|
private void assertLookup(
|
||||||
BlockFactory blockFactory,
|
BlockFactory blockFactory,
|
||||||
Map<List<Object>, Set<Integer>> expectedOrds,
|
Map<List<Object>, Set<Integer>> expectedOrds,
|
||||||
List<ElementType> types,
|
List<Type> types,
|
||||||
BlockHash blockHash,
|
BlockHash blockHash,
|
||||||
Oracle oracle
|
Oracle oracle
|
||||||
) {
|
) {
|
||||||
Block.Builder[] builders = new Block.Builder[types.size()];
|
Block.Builder[] builders = new Block.Builder[types.size()];
|
||||||
try {
|
try {
|
||||||
for (int b = 0; b < builders.length; b++) {
|
for (int b = 0; b < builders.length; b++) {
|
||||||
builders[b] = types.get(b).newBlockBuilder(LOOKUP_POSITIONS, blockFactory);
|
builders[b] = types.get(b).elementType().newBlockBuilder(LOOKUP_POSITIONS, blockFactory);
|
||||||
}
|
}
|
||||||
for (int p = 0; p < LOOKUP_POSITIONS; p++) {
|
for (int p = 0; p < LOOKUP_POSITIONS; p++) {
|
||||||
/*
|
/*
|
||||||
|
@ -408,8 +436,8 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
return breakerService;
|
return breakerService;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Object> randomKey(List<ElementType> types) {
|
private static List<Object> randomKey(List<Type> types) {
|
||||||
return types.stream().map(BlockHashRandomizedTests::randomKeyElement).toList();
|
return types.stream().map(t -> randomKeyElement(t.elementType())).toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Object randomKeyElement(ElementType type) {
|
public static Object randomKeyElement(ElementType type) {
|
||||||
|
@ -423,4 +451,75 @@ public class BlockHashRandomizedTests extends ESTestCase {
|
||||||
default -> throw new IllegalArgumentException("unsupported element type [" + type + "]");
|
default -> throw new IllegalArgumentException("unsupported element type [" + type + "]");
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private record Basic(ElementType elementType) implements Type {
|
||||||
|
@Override
|
||||||
|
public BasicBlockTests.RandomBlock randomBlock(int positionCount, int maxValuesPerPosition, int dups) {
|
||||||
|
return BasicBlockTests.randomBlock(
|
||||||
|
elementType,
|
||||||
|
positionCount,
|
||||||
|
elementType == ElementType.NULL | randomBoolean(),
|
||||||
|
1,
|
||||||
|
maxValuesPerPosition,
|
||||||
|
0,
|
||||||
|
dups
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private record Ordinals(int dictionarySize) implements Type {
|
||||||
|
@Override
|
||||||
|
public ElementType elementType() {
|
||||||
|
return ElementType.BYTES_REF;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BasicBlockTests.RandomBlock randomBlock(int positionCount, int maxValuesPerPosition, int dups) {
|
||||||
|
List<Map.Entry<String, Integer>> dictionary = new ArrayList<>();
|
||||||
|
List<List<Object>> values = new ArrayList<>(positionCount);
|
||||||
|
try (
|
||||||
|
IntBlock.Builder ordinals = TestBlockFactory.getNonBreakingInstance()
|
||||||
|
.newIntBlockBuilder(positionCount * maxValuesPerPosition);
|
||||||
|
BytesRefVector.Builder bytes = TestBlockFactory.getNonBreakingInstance().newBytesRefVectorBuilder(maxValuesPerPosition);
|
||||||
|
) {
|
||||||
|
for (String value : dictionary(maxValuesPerPosition)) {
|
||||||
|
bytes.appendBytesRef(new BytesRef(value));
|
||||||
|
dictionary.add(Map.entry(value, dictionary.size()));
|
||||||
|
}
|
||||||
|
for (int p = 0; p < positionCount; p++) {
|
||||||
|
int valueCount = between(1, maxValuesPerPosition);
|
||||||
|
int dupCount = between(0, dups);
|
||||||
|
|
||||||
|
List<Integer> ordsAtPosition = new ArrayList<>();
|
||||||
|
List<Object> valuesAtPosition = new ArrayList<>();
|
||||||
|
values.add(valuesAtPosition);
|
||||||
|
if (valueCount != 1 || dupCount != 0) {
|
||||||
|
ordinals.beginPositionEntry();
|
||||||
|
}
|
||||||
|
for (int v = 0; v < valueCount; v++) {
|
||||||
|
Map.Entry<String, Integer> value = randomFrom(dictionary);
|
||||||
|
valuesAtPosition.add(new BytesRef(value.getKey()));
|
||||||
|
ordinals.appendInt(value.getValue());
|
||||||
|
ordsAtPosition.add(value.getValue());
|
||||||
|
}
|
||||||
|
for (int v = 0; v < dupCount; v++) {
|
||||||
|
ordinals.appendInt(randomFrom(ordsAtPosition));
|
||||||
|
}
|
||||||
|
if (valueCount != 1 || dupCount != 0) {
|
||||||
|
ordinals.endPositionEntry();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new BasicBlockTests.RandomBlock(values, new OrdinalBytesRefBlock(ordinals.build(), bytes.build()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> dictionary(int maxValuesPerPosition) {
|
||||||
|
int count = Math.max(dictionarySize, maxValuesPerPosition);
|
||||||
|
Set<String> values = new HashSet<>();
|
||||||
|
while (values.size() < count) {
|
||||||
|
values.add(randomAlphaOfLength(5));
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,15 @@ import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
|
||||||
import org.elasticsearch.compute.data.Block;
|
import org.elasticsearch.compute.data.Block;
|
||||||
import org.elasticsearch.compute.data.BooleanBlock;
|
import org.elasticsearch.compute.data.BooleanBlock;
|
||||||
import org.elasticsearch.compute.data.BytesRefBlock;
|
import org.elasticsearch.compute.data.BytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.BytesRefVector;
|
||||||
import org.elasticsearch.compute.data.DoubleBlock;
|
import org.elasticsearch.compute.data.DoubleBlock;
|
||||||
import org.elasticsearch.compute.data.ElementType;
|
import org.elasticsearch.compute.data.ElementType;
|
||||||
import org.elasticsearch.compute.data.IntBlock;
|
import org.elasticsearch.compute.data.IntBlock;
|
||||||
import org.elasticsearch.compute.data.IntVector;
|
import org.elasticsearch.compute.data.IntVector;
|
||||||
import org.elasticsearch.compute.data.LongBlock;
|
import org.elasticsearch.compute.data.LongBlock;
|
||||||
import org.elasticsearch.compute.data.MockBlockFactory;
|
import org.elasticsearch.compute.data.MockBlockFactory;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefBlock;
|
||||||
|
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
|
||||||
import org.elasticsearch.compute.data.Page;
|
import org.elasticsearch.compute.data.Page;
|
||||||
import org.elasticsearch.compute.data.TestBlockFactory;
|
import org.elasticsearch.compute.data.TestBlockFactory;
|
||||||
import org.elasticsearch.core.Releasable;
|
import org.elasticsearch.core.Releasable;
|
||||||
|
@ -460,6 +463,133 @@ public class BlockHashTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBasicOrdinals() {
|
||||||
|
try (
|
||||||
|
IntVector.Builder ords = blockFactory.newIntVectorFixedBuilder(8);
|
||||||
|
BytesRefVector.Builder bytes = blockFactory.newBytesRefVectorBuilder(8)
|
||||||
|
) {
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.appendInt(0);
|
||||||
|
ords.appendInt(3);
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.appendInt(3);
|
||||||
|
ords.appendInt(0);
|
||||||
|
ords.appendInt(2);
|
||||||
|
ords.appendInt(3);
|
||||||
|
bytes.appendBytesRef(new BytesRef("item-1"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("item-2"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("item-3"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("item-4"));
|
||||||
|
|
||||||
|
hash(ordsAndKeys -> {
|
||||||
|
if (forcePackedHash) {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("PackedValuesBlockHash{groups=[0:BYTES_REF], entries=4, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b}"));
|
||||||
|
assertOrds(ordsAndKeys.ords, 0, 1, 2, 0, 2, 1, 3, 2);
|
||||||
|
assertThat(ordsAndKeys.nonEmpty, equalTo(intRange(0, 4)));
|
||||||
|
assertKeys(ordsAndKeys.keys, "item-2", "item-1", "item-4", "item-3");
|
||||||
|
} else {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("BytesRefBlockHash{channel=0, entries=4, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b, seenNull=false}"));
|
||||||
|
assertOrds(ordsAndKeys.ords, 2, 1, 4, 2, 4, 1, 3, 4);
|
||||||
|
assertThat(ordsAndKeys.nonEmpty, equalTo(intRange(1, 5)));
|
||||||
|
assertKeys(ordsAndKeys.keys, "item-1", "item-2", "item-3", "item-4");
|
||||||
|
}
|
||||||
|
}, new OrdinalBytesRefVector(ords.build(), bytes.build()).asBlock());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOrdinalsWithNulls() {
|
||||||
|
try (
|
||||||
|
IntBlock.Builder ords = blockFactory.newIntBlockBuilder(4);
|
||||||
|
BytesRefVector.Builder bytes = blockFactory.newBytesRefVectorBuilder(2)
|
||||||
|
) {
|
||||||
|
ords.appendInt(0);
|
||||||
|
ords.appendNull();
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.appendNull();
|
||||||
|
bytes.appendBytesRef(new BytesRef("cat"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("dog"));
|
||||||
|
|
||||||
|
hash(ordsAndKeys -> {
|
||||||
|
if (forcePackedHash) {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("PackedValuesBlockHash{groups=[0:BYTES_REF], entries=3, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b}"));
|
||||||
|
assertOrds(ordsAndKeys.ords, 0, 1, 2, 1);
|
||||||
|
assertKeys(ordsAndKeys.keys, "cat", null, "dog");
|
||||||
|
} else {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("BytesRefBlockHash{channel=0, entries=2, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b, seenNull=true}"));
|
||||||
|
assertOrds(ordsAndKeys.ords, 1, 0, 2, 0);
|
||||||
|
assertKeys(ordsAndKeys.keys, null, "cat", "dog");
|
||||||
|
}
|
||||||
|
assertThat(ordsAndKeys.nonEmpty, equalTo(intRange(0, 3)));
|
||||||
|
}, new OrdinalBytesRefBlock(ords.build(), bytes.build()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOrdinalsWithMultiValuedFields() {
|
||||||
|
try (
|
||||||
|
IntBlock.Builder ords = blockFactory.newIntBlockBuilder(4);
|
||||||
|
BytesRefVector.Builder bytes = blockFactory.newBytesRefVectorBuilder(2)
|
||||||
|
) {
|
||||||
|
ords.appendInt(0);
|
||||||
|
ords.beginPositionEntry();
|
||||||
|
ords.appendInt(0);
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.endPositionEntry();
|
||||||
|
ords.beginPositionEntry();
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.appendInt(2);
|
||||||
|
ords.endPositionEntry();
|
||||||
|
ords.beginPositionEntry();
|
||||||
|
ords.appendInt(2);
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.endPositionEntry();
|
||||||
|
ords.appendNull();
|
||||||
|
ords.beginPositionEntry();
|
||||||
|
ords.appendInt(2);
|
||||||
|
ords.appendInt(2);
|
||||||
|
ords.appendInt(1);
|
||||||
|
ords.endPositionEntry();
|
||||||
|
|
||||||
|
bytes.appendBytesRef(new BytesRef("foo"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("bar"));
|
||||||
|
bytes.appendBytesRef(new BytesRef("bort"));
|
||||||
|
|
||||||
|
hash(ordsAndKeys -> {
|
||||||
|
if (forcePackedHash) {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("PackedValuesBlockHash{groups=[0:BYTES_REF], entries=4, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b}"));
|
||||||
|
assertOrds(
|
||||||
|
ordsAndKeys.ords,
|
||||||
|
new int[] { 0 },
|
||||||
|
new int[] { 0, 1 },
|
||||||
|
new int[] { 1, 2 },
|
||||||
|
new int[] { 2, 1 },
|
||||||
|
new int[] { 3 },
|
||||||
|
new int[] { 2, 1 }
|
||||||
|
);
|
||||||
|
assertKeys(ordsAndKeys.keys, "foo", "bar", "bort", null);
|
||||||
|
} else {
|
||||||
|
assertThat(ordsAndKeys.description, startsWith("BytesRefBlockHash{channel=0, entries=3, size="));
|
||||||
|
assertThat(ordsAndKeys.description, endsWith("b, seenNull=true}"));
|
||||||
|
assertOrds(
|
||||||
|
ordsAndKeys.ords,
|
||||||
|
new int[] { 1 },
|
||||||
|
new int[] { 1, 2 },
|
||||||
|
new int[] { 2, 3 },
|
||||||
|
new int[] { 3, 2 },
|
||||||
|
new int[] { 0 },
|
||||||
|
new int[] { 3, 2 }
|
||||||
|
);
|
||||||
|
assertKeys(ordsAndKeys.keys, null, "foo", "bar", "bort");
|
||||||
|
}
|
||||||
|
assertThat(ordsAndKeys.nonEmpty, equalTo(intRange(0, 4)));
|
||||||
|
}, new OrdinalBytesRefBlock(ords.build(), bytes.build()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testBooleanHashFalseFirst() {
|
public void testBooleanHashFalseFirst() {
|
||||||
boolean[] values = new boolean[] { false, true, true, true, true };
|
boolean[] values = new boolean[] { false, true, true, true, true };
|
||||||
hash(ordsAndKeys -> {
|
hash(ordsAndKeys -> {
|
||||||
|
@ -1315,6 +1445,7 @@ public class BlockHashTests extends ESTestCase {
|
||||||
});
|
});
|
||||||
if (blockHash instanceof LongLongBlockHash == false
|
if (blockHash instanceof LongLongBlockHash == false
|
||||||
&& blockHash instanceof BytesRefLongBlockHash == false
|
&& blockHash instanceof BytesRefLongBlockHash == false
|
||||||
|
&& blockHash instanceof BytesRef2BlockHash == false
|
||||||
&& blockHash instanceof BytesRef3BlockHash == false) {
|
&& blockHash instanceof BytesRef3BlockHash == false) {
|
||||||
Block[] keys = blockHash.getKeys();
|
Block[] keys = blockHash.getKeys();
|
||||||
try (ReleasableIterator<IntBlock> lookup = blockHash.lookup(new Page(keys), ByteSizeValue.ofKb(between(1, 100)))) {
|
try (ReleasableIterator<IntBlock> lookup = blockHash.lookup(new Page(keys), ByteSizeValue.ofKb(between(1, 100)))) {
|
||||||
|
|
Loading…
Reference in New Issue