From c87fe9402cbebc460b42cd3dd7c268e5e6e659d9 Mon Sep 17 00:00:00 2001 From: Adam Bellemare Date: Thu, 3 Oct 2019 18:59:31 -0400 Subject: [PATCH] KAFKA-3705 Added a foreignKeyJoin implementation for KTable. (#5527) https://issues.apache.org/jira/browse/KAFKA-3705 Allows for a KTable to map its value to a given foreign key and join on another KTable keyed on that foreign key. Applies the joiner, then returns the tuples keyed on the original key. This supports updates from both sides of the join. Reviewers: Guozhang Wang , Matthias J. Sax , John Roesler , Boyang Chen , Christopher Pettitt , Bill Bejeck , Jan Filipiak , pgwhalen, Alexei Daniline --- build.gradle | 6 + checkstyle/suppressions.xml | 12 +- .../org/apache/kafka/common/utils/Bytes.java | 28 +- .../apache/kafka/common/utils/BytesTest.java | 84 +++ .../scala/kafka/tools/StreamsResetter.java | 4 +- .../apache/kafka/streams/kstream/KTable.java | 88 +++ .../streams/kstream/internals/KTableImpl.java | 252 ++++++- .../KTableSourceValueGetterSupplier.java | 6 +- .../internals/foreignkeyjoin/CombinedKey.java | 55 ++ .../foreignkeyjoin/CombinedKeySchema.java | 96 +++ ...eignJoinSubscriptionProcessorSupplier.java | 114 +++ ...JoinSubscriptionSendProcessorSupplier.java | 116 +++ ...scriptionJoinForeignProcessorSupplier.java | 124 ++++ ...criptionResolverJoinProcessorSupplier.java | 107 +++ .../SubscriptionResponseWrapper.java | 62 ++ .../SubscriptionResponseWrapperSerde.java | 124 ++++ ...criptionStoreReceiveProcessorSupplier.java | 112 +++ .../foreignkeyjoin/SubscriptionWrapper.java | 111 +++ .../SubscriptionWrapperSerde.java | 119 +++ .../internals/graph/BaseRepartitionNode.java | 10 +- .../GroupedTableOperationRepartitionNode.java | 3 +- ...bleKTableForeignKeyJoinResolutionNode.java | 81 ++ .../graph/OptimizableRepartitionNode.java | 19 +- .../internals/graph/ProcessorGraphNode.java | 7 + .../graph/StatefulProcessorNode.java | 18 +- .../internals/graph/StreamSinkNode.java | 4 + .../internals/InternalProcessorContext.java | 13 + .../internals/InternalTopologyBuilder.java | 45 +- .../internals/RocksDBPrefixIterator.java | 54 ++ .../integration/ForeignKeyJoinSuite.java | 47 ++ ...reignKeyInnerJoinMultiIntegrationTest.java | 254 +++++++ ...leKTableForeignKeyJoinIntegrationTest.java | 699 ++++++++++++++++++ .../foreignkeyjoin/CombinedKeySchemaTest.java | 73 ++ .../SubscriptionResponseWrapperSerdeTest.java | 91 +++ .../SubscriptionWrapperSerdeTest.java | 86 +++ .../internals/RocksDBKeyValueStoreTest.java | 3 +- .../state/internals/RocksDBStoreTest.java | 2 +- .../scala/FunctionsCompatConversions.scala | 6 + .../kafka/streams/scala/kstream/KTable.scala | 38 +- 39 files changed, 3141 insertions(+), 32 deletions(-) create mode 100644 clients/src/test/java/org/apache/kafka/common/utils/BytesTest.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKey.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchema.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplier.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionStoreReceiveProcessorSupplier.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/KTableKTableForeignKeyJoinResolutionNode.java create mode 100644 streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBPrefixIterator.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/ForeignKeyJoinSuite.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchemaTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java diff --git a/build.gradle b/build.gradle index 2b7083f2921..210112bb13d 100644 --- a/build.gradle +++ b/build.gradle @@ -1224,6 +1224,12 @@ project(':streams') { if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() } standardOutput = new File(generatedDocsDir, "streams_config.html").newOutputStream() } + + test { + // The suites are for running sets of tests in IDEs. + // Gradle will run each test class, so we exclude the suites to avoid redundantly running the tests twice. + exclude '**/*Suite.class' + } } project(':streams:streams-scala') { diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index 6a24ef94c29..7a04f8d5b94 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -1,4 +1,4 @@ - + + @@ -90,6 +92,9 @@ + + @@ -149,7 +154,7 @@ files="(TopologyBuilder|KafkaStreams|KStreamImpl|KTableImpl|StreamThread|StreamTask).java"/> + files="(KTableImpl|StreamsPartitionAssignor.java)"/> @@ -229,7 +234,8 @@ - + { } /** - * A byte array comparator based on lexicographic ordering. + * Increment the underlying byte array by adding 1. Throws an IndexOutOfBoundsException if incrementing would cause + * the underlying input byte array to overflow. + * + * @param input - The byte array to increment + * @return A new copy of the incremented byte array. + */ + public static Bytes increment(Bytes input) throws IndexOutOfBoundsException { + byte[] inputArr = input.get(); + byte[] ret = new byte[inputArr.length]; + int carry = 1; + for (int i = inputArr.length - 1; i >= 0; i--) { + if (inputArr[i] == (byte) 0xFF && carry == 1) { + ret[i] = (byte) 0x00; + } else { + ret[i] = (byte) (inputArr[i] + carry); + carry = 0; + } + } + if (carry == 0) { + return wrap(ret); + } else { + throw new IndexOutOfBoundsException(); + } + } + + /** + * A byte array comparator based on lexicograpic ordering. */ public final static ByteArrayComparator BYTES_LEXICO_COMPARATOR = new LexicographicByteArrayComparator(); diff --git a/clients/src/test/java/org/apache/kafka/common/utils/BytesTest.java b/clients/src/test/java/org/apache/kafka/common/utils/BytesTest.java new file mode 100644 index 00000000000..bf7ec712ddc --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/common/utils/BytesTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.common.utils; + +import org.junit.Test; + +import java.util.Comparator; +import java.util.NavigableMap; +import java.util.TreeMap; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertEquals; + +public class BytesTest { + + @Test + public void testIncrement() { + byte[] input = new byte[]{(byte) 0xAB, (byte) 0xCD, (byte) 0xFF}; + byte[] expected = new byte[]{(byte) 0xAB, (byte) 0xCE, (byte) 0x00}; + Bytes output = Bytes.increment(Bytes.wrap(input)); + assertArrayEquals(output.get(), expected); + } + + @Test + public void testIncrementUpperBoundary() { + byte[] input = new byte[]{(byte) 0xFF, (byte) 0xFF, (byte) 0xFF}; + assertThrows(IndexOutOfBoundsException.class, () -> Bytes.increment(Bytes.wrap(input))); + } + + @Test + public void testIncrementWithSubmap() { + final NavigableMap map = new TreeMap<>(); + Bytes key1 = Bytes.wrap(new byte[]{(byte) 0xAA}); + byte[] val = new byte[]{(byte) 0x00}; + map.put(key1, val); + + Bytes key2 = Bytes.wrap(new byte[]{(byte) 0xAA, (byte) 0xAA}); + map.put(key2, val); + + Bytes key3 = Bytes.wrap(new byte[]{(byte) 0xAA, (byte) 0x00, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); + map.put(key3, val); + + Bytes key4 = Bytes.wrap(new byte[]{(byte) 0xAB, (byte) 0x00}); + map.put(key4, val); + + Bytes key5 = Bytes.wrap(new byte[]{(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x01}); + map.put(key5, val); + + Bytes prefix = key1; + Bytes prefixEnd = Bytes.increment(prefix); + + Comparator comparator = map.comparator(); + final int result = comparator == null ? prefix.compareTo(prefixEnd) : comparator.compare(prefix, prefixEnd); + NavigableMap subMapResults; + if (result > 0) { + //Prefix increment would cause a wrap-around. Get the submap from toKey to the end of the map + subMapResults = map.tailMap(prefix, true); + } else { + subMapResults = map.subMap(prefix, true, prefixEnd, false); + } + + NavigableMap subMapExpected = new TreeMap<>(); + subMapExpected.put(key1, val); + subMapExpected.put(key2, val); + subMapExpected.put(key3, val); + + assertEquals(subMapExpected.keySet(), subMapResults.keySet()); + } +} diff --git a/core/src/main/scala/kafka/tools/StreamsResetter.java b/core/src/main/scala/kafka/tools/StreamsResetter.java index 597b9d34c23..574e9c66e29 100644 --- a/core/src/main/scala/kafka/tools/StreamsResetter.java +++ b/core/src/main/scala/kafka/tools/StreamsResetter.java @@ -675,7 +675,9 @@ public class StreamsResetter { // Cf. https://issues.apache.org/jira/browse/KAFKA-7930 return !isInputTopic(topicName) && !isIntermediateTopic(topicName) && topicName.startsWith(options.valueOf(applicationIdOption) + "-") - && (topicName.endsWith("-changelog") || topicName.endsWith("-repartition")); + && (topicName.endsWith("-changelog") || topicName.endsWith("-repartition") + || topicName.endsWith("-subscription-registration-topic") + || topicName.endsWith("-subscription-response-topic")); } public static void main(final String[] args) { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java index a2b9bafd9a7..21e42df688e 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java @@ -30,6 +30,8 @@ import org.apache.kafka.streams.state.KeyValueStore; import org.apache.kafka.streams.state.QueryableStoreType; import org.apache.kafka.streams.state.ReadOnlyKeyValueStore; +import java.util.function.Function; + /** * {@code KTable} is an abstraction of a changelog stream from a primary-keyed table. * Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key. @@ -2117,6 +2119,92 @@ public interface KTable { final Named named, final Materialized> materialized); + /** + * + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other {@code KTable} to be joined with this {@code KTable}. Keyed by KO. + * @param foreignKeyExtractor a {@link Function} that extracts the key (KO) from this table's value (V) + * @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records + * @param named a {@link Named} config used to name the processor in the topology + * @param materialized a {@link Materialized} that describes how the {@link StateStore} for the resulting {@code KTable} + * should be materialized. Cannot be {@code null} + * @param the value type of the result {@code KTable} + * @param the key type of the other {@code KTable} + * @param the value type of the other {@code KTable} + * @return + */ + KTable join(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Named named, + final Materialized> materialized); + + /** + * + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other {@code KTable} to be joined with this {@code KTable}. Keyed by KO. + * @param foreignKeyExtractor a {@link Function} that extracts the key (KO) from this table's value (V) + * @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records + * @param materialized a {@link Materialized} that describes how the {@link StateStore} for the resulting {@code KTable} + * should be materialized. Cannot be {@code null} + * @param the value type of the result {@code KTable} + * @param the key type of the other {@code KTable} + * @param the value type of the other {@code KTable} + * @return + */ + KTable join(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Materialized> materialized); + + /** + * + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other {@code KTable} to be joined with this {@code KTable}. Keyed by KO. + * @param foreignKeyExtractor a {@link Function} that extracts the key (KO) from this table's value (V). If the + * * resultant foreignKey is null, the record will not propagate to the output. + * @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records + * @param named a {@link Named} config used to name the processor in the topology + * @param materialized a {@link Materialized} that describes how the {@link StateStore} for the resulting {@code KTable} + * should be materialized. Cannot be {@code null} + * @param the value type of the result {@code KTable} + * @param the key type of the other {@code KTable} + * @param the value type of the other {@code KTable} + * @return a {@code KTable} that contains only those records that satisfy the given predicate + */ + KTable leftJoin(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Named named, + final Materialized> materialized); + + /** + * + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other {@code KTable} to be joined with this {@code KTable}. Keyed by KO. + * @param foreignKeyExtractor a {@link Function} that extracts the key (KO) from this table's value (V). If the + * resultant foreignKey is null, the record will not propagate to the output. + * @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records + * @param materialized a {@link Materialized} that describes how the {@link StateStore} for the resulting {@code KTable} + * should be materialized. Cannot be {@code null} + * @param the value type of the result {@code KTable} + * @param the key type of the other {@code KTable} + * @param the value type of the other {@code KTable} + * @return a {@code KTable} that contains only those records that satisfy the given predicate + */ + KTable leftJoin(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Materialized> materialized); + /** * Get the name of the local state store used that can be used to query this {@code KTable}. * diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java index 4bc102a746d..05e04e8c291 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java @@ -17,8 +17,10 @@ package org.apache.kafka.streams.kstream.internals; import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.common.utils.Bytes; import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.kstream.Consumed; import org.apache.kafka.streams.kstream.Grouped; import org.apache.kafka.streams.kstream.KGroupedTable; import org.apache.kafka.streams.kstream.KStream; @@ -27,15 +29,29 @@ import org.apache.kafka.streams.kstream.KeyValueMapper; import org.apache.kafka.streams.kstream.Materialized; import org.apache.kafka.streams.kstream.Named; import org.apache.kafka.streams.kstream.Predicate; +import org.apache.kafka.streams.kstream.Produced; import org.apache.kafka.streams.kstream.Suppressed; import org.apache.kafka.streams.kstream.ValueJoiner; import org.apache.kafka.streams.kstream.ValueMapper; import org.apache.kafka.streams.kstream.ValueMapperWithKey; import org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.CombinedKey; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.CombinedKeySchema; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.ForeignJoinSubscriptionProcessorSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.ForeignJoinSubscriptionSendProcessorSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionJoinForeignProcessorSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionResolverJoinProcessorSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionResponseWrapper; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionResponseWrapperSerde; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionStoreReceiveProcessorSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapperSerde; import org.apache.kafka.streams.kstream.internals.graph.KTableKTableJoinNode; import org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode; import org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters; import org.apache.kafka.streams.kstream.internals.graph.StatefulProcessorNode; +import org.apache.kafka.streams.kstream.internals.graph.StreamSinkNode; +import org.apache.kafka.streams.kstream.internals.graph.StreamSourceNode; import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode; import org.apache.kafka.streams.kstream.internals.graph.TableProcessorNode; import org.apache.kafka.streams.kstream.internals.suppress.FinalResultsSuppressionBuilder; @@ -43,17 +59,22 @@ import org.apache.kafka.streams.kstream.internals.suppress.KTableSuppressProcess import org.apache.kafka.streams.kstream.internals.suppress.NamedSuppressed; import org.apache.kafka.streams.kstream.internals.suppress.SuppressedInternal; import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.processor.internals.StaticTopicNameExtractor; import org.apache.kafka.streams.state.KeyValueStore; import org.apache.kafka.streams.state.StoreBuilder; +import org.apache.kafka.streams.state.Stores; import org.apache.kafka.streams.state.TimestampedKeyValueStore; +import org.apache.kafka.streams.state.ValueAndTimestamp; import org.apache.kafka.streams.state.internals.InMemoryTimeOrderedKeyValueBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Duration; import java.util.Collections; +import java.util.HashSet; import java.util.Objects; import java.util.Set; +import java.util.function.Function; import static org.apache.kafka.streams.kstream.internals.graph.GraphGraceSearchUtil.findAndVerifyWindowGrace; @@ -89,6 +110,15 @@ public class KTableImpl extends AbstractStream implements KTable< private static final String TRANSFORMVALUES_NAME = "KTABLE-TRANSFORMVALUES-"; + private static final String FK_JOIN_STATE_STORE_NAME = "KTABLE-INTERNAL-SUBSCRIPTION-STATE-STORE-"; + private static final String SUBSCRIPTION_REGISTRATION = "KTABLE-SUBSCRIPTION-REGISTRATION-"; + private static final String SUBSCRIPTION_RESPONSE = "KTABLE-SUBSCRIPTION-RESPONSE-"; + private static final String SUBSCRIPTION_PROCESSOR = "KTABLE-SUBSCRIPTION-PROCESSOR-"; + private static final String SUBSCRIPTION_RESPONSE_RESOLVER_PROCESSOR = "KTABLE-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-"; + private static final String FK_JOIN_OUTPUT_PROCESSOR = "KTABLE-OUTPUT-PROCESSOR-"; + private static final String TOPIC_SUFFIX = "-topic"; + private static final String SINK_NAME = "KTABLE-SINK-"; + private final ProcessorSupplier processorSupplier; private final String queryableStoreName; @@ -495,7 +525,7 @@ public class KTableImpl extends AbstractStream implements KTable< storeName, this ); - + final ProcessorGraphNode> node = new StatefulProcessorNode<>( name, new ProcessorParameters<>(suppressionSupplier, name), @@ -803,4 +833,224 @@ public class KTableImpl extends AbstractStream implements KTable< return (ProcessorParameters) kObjectProcessorParameters; } + @Override + public KTable join(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Named named, + final Materialized> materialized) { + + return doJoinOnForeignKey(other, foreignKeyExtractor, joiner, named, new MaterializedInternal<>(materialized), false); + } + + @Override + public KTable join(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Materialized> materialized) { + + return doJoinOnForeignKey(other, foreignKeyExtractor, joiner, NamedInternal.empty(), new MaterializedInternal<>(materialized), false); + } + + @Override + public KTable leftJoin(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Named named, + final Materialized> materialized) { + return doJoinOnForeignKey(other, foreignKeyExtractor, joiner, named, new MaterializedInternal<>(materialized), true); + } + + @Override + public KTable leftJoin(final KTable other, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Materialized> materialized) { + + return doJoinOnForeignKey(other, foreignKeyExtractor, joiner, NamedInternal.empty(), new MaterializedInternal<>(materialized), true); + } + + + @SuppressWarnings("unchecked") + private KTable doJoinOnForeignKey(final KTable foreignKeyTable, + final Function foreignKeyExtractor, + final ValueJoiner joiner, + final Named joinName, + final MaterializedInternal> materializedInternal, + final boolean leftJoin) { + Objects.requireNonNull(foreignKeyTable, "foreignKeyTable can't be null"); + Objects.requireNonNull(foreignKeyExtractor, "foreignKeyExtractor can't be null"); + Objects.requireNonNull(joiner, "joiner can't be null"); + Objects.requireNonNull(joinName, "joinName can't be null"); + Objects.requireNonNull(materializedInternal, "materialized can't be null"); + + //Old values are a useful optimization. The old values from the foreignKeyTable table are compared to the new values, + //such that identical values do not cause a prefixScan. PrefixScan and propagation can be expensive and should + //not be done needlessly. + ((KTableImpl) foreignKeyTable).enableSendingOldValues(); + + //Old values must be sent such that the ForeignJoinSubscriptionSendProcessorSupplier can propagate deletions to the correct node. + //This occurs whenever the extracted foreignKey changes values. + enableSendingOldValues(); + + final Serde foreignKeySerde = ((KTableImpl) foreignKeyTable).keySerde; + final Serde> subscriptionWrapperSerde = new SubscriptionWrapperSerde<>(keySerde); + final SubscriptionResponseWrapperSerde responseWrapperSerde = + new SubscriptionResponseWrapperSerde<>(((KTableImpl) foreignKeyTable).valSerde); + + + final NamedInternal renamed = new NamedInternal(joinName); + final String subscriptionTopicName = renamed.suffixWithOrElseGet("-subscription-registration", builder, SUBSCRIPTION_REGISTRATION) + TOPIC_SUFFIX; + builder.internalTopologyBuilder.addInternalTopic(subscriptionTopicName); + final CombinedKeySchema combinedKeySchema = new CombinedKeySchema<>(subscriptionTopicName, foreignKeySerde, keySerde); + + final ProcessorGraphNode> subscriptionNode = new ProcessorGraphNode<>( + new ProcessorParameters<>( + new ForeignJoinSubscriptionSendProcessorSupplier<>( + foreignKeyExtractor, + foreignKeySerde, + subscriptionTopicName, + valSerde.serializer(), + leftJoin + ), + renamed.suffixWithOrElseGet("-subscription-registration-processor", builder, SUBSCRIPTION_REGISTRATION) + ) + ); + builder.addGraphNode(streamsGraphNode, subscriptionNode); + + + final StreamSinkNode> subscriptionSink = new StreamSinkNode<>( + renamed.suffixWithOrElseGet("-subscription-registration-sink", builder, SINK_NAME), + new StaticTopicNameExtractor<>(subscriptionTopicName), + new ProducedInternal<>(Produced.with(foreignKeySerde, subscriptionWrapperSerde)) + ); + builder.addGraphNode(subscriptionNode, subscriptionSink); + + final StreamSourceNode> subscriptionSource = new StreamSourceNode<>( + renamed.suffixWithOrElseGet("-subscription-registration-source", builder, SOURCE_NAME), + Collections.singleton(subscriptionTopicName), + new ConsumedInternal<>(Consumed.with(foreignKeySerde, subscriptionWrapperSerde)) + ); + builder.addGraphNode(subscriptionSink, subscriptionSource); + + // The subscription source is the source node on the *receiving* end *after* the repartition. + // This topic needs to be copartitioned with the Foreign Key table. + final Set copartitionedRepartitionSources = + new HashSet<>(((KTableImpl) foreignKeyTable).sourceNodes); + copartitionedRepartitionSources.add(subscriptionSource.nodeName()); + builder.internalTopologyBuilder.copartitionSources(copartitionedRepartitionSources); + + + final StoreBuilder>> subscriptionStore = + Stores.timestampedKeyValueStoreBuilder( + Stores.persistentTimestampedKeyValueStore( + renamed.suffixWithOrElseGet("-subscription-store", builder, FK_JOIN_STATE_STORE_NAME) + ), + new Serdes.BytesSerde(), + subscriptionWrapperSerde + ); + builder.addStateStore(subscriptionStore); + + final StatefulProcessorNode> subscriptionReceiveNode = + new StatefulProcessorNode<>( + new ProcessorParameters<>( + new SubscriptionStoreReceiveProcessorSupplier<>(subscriptionStore, combinedKeySchema), + renamed.suffixWithOrElseGet("-subscription-receive", builder, SUBSCRIPTION_PROCESSOR) + ), + Collections.singleton(subscriptionStore), + Collections.emptySet() + ); + builder.addGraphNode(subscriptionSource, subscriptionReceiveNode); + + final StatefulProcessorNode, Change>>> subscriptionJoinForeignNode = + new StatefulProcessorNode<>( + new ProcessorParameters<>( + new SubscriptionJoinForeignProcessorSupplier<>( + ((KTableImpl) foreignKeyTable).valueGetterSupplier() + ), + renamed.suffixWithOrElseGet("-subscription-join-foreign", builder, SUBSCRIPTION_PROCESSOR) + ), + Collections.emptySet(), + Collections.singleton(((KTableImpl) foreignKeyTable).valueGetterSupplier()) + ); + builder.addGraphNode(subscriptionReceiveNode, subscriptionJoinForeignNode); + + final StatefulProcessorNode> foreignJoinSubscriptionNode = new StatefulProcessorNode<>( + new ProcessorParameters<>( + new ForeignJoinSubscriptionProcessorSupplier<>(subscriptionStore, combinedKeySchema), + renamed.suffixWithOrElseGet("-foreign-join-subscription", builder, SUBSCRIPTION_PROCESSOR) + ), + Collections.singleton(subscriptionStore), + Collections.emptySet() + ); + builder.addGraphNode(((KTableImpl) foreignKeyTable).streamsGraphNode, foreignJoinSubscriptionNode); + + + final String finalRepartitionTopicName = renamed.suffixWithOrElseGet("-subscription-response", builder, SUBSCRIPTION_RESPONSE) + TOPIC_SUFFIX; + builder.internalTopologyBuilder.addInternalTopic(finalRepartitionTopicName); + + final StreamSinkNode> foreignResponseSink = + new StreamSinkNode<>( + renamed.suffixWithOrElseGet("-subscription-response-sink", builder, SINK_NAME), + new StaticTopicNameExtractor<>(finalRepartitionTopicName), + new ProducedInternal<>(Produced.with(keySerde, responseWrapperSerde)) + ); + builder.addGraphNode(subscriptionJoinForeignNode, foreignResponseSink); + builder.addGraphNode(foreignJoinSubscriptionNode, foreignResponseSink); + + final StreamSourceNode> foreignResponseSource = new StreamSourceNode<>( + renamed.suffixWithOrElseGet("-subscription-response-source", builder, SOURCE_NAME), + Collections.singleton(finalRepartitionTopicName), + new ConsumedInternal<>(Consumed.with(keySerde, responseWrapperSerde)) + ); + builder.addGraphNode(foreignResponseSink, foreignResponseSource); + + // the response topic has to be copartitioned with the left (primary) side of the join + final Set resultSourceNodes = new HashSet<>(this.sourceNodes); + resultSourceNodes.add(foreignResponseSource.nodeName()); + builder.internalTopologyBuilder.copartitionSources(resultSourceNodes); + + final KTableValueGetterSupplier primaryKeyValueGetter = valueGetterSupplier(); + final StatefulProcessorNode> resolverNode = new StatefulProcessorNode<>( + new ProcessorParameters<>( + new SubscriptionResolverJoinProcessorSupplier<>( + primaryKeyValueGetter, + valueSerde().serializer(), + joiner, + leftJoin + ), + renamed.suffixWithOrElseGet("-subscription-response-resolver", builder, SUBSCRIPTION_RESPONSE_RESOLVER_PROCESSOR) + ), + Collections.emptySet(), + Collections.singleton(primaryKeyValueGetter) + ); + builder.addGraphNode(foreignResponseSource, resolverNode); + + final String resultProcessorName = renamed.suffixWithOrElseGet("-result", builder, FK_JOIN_OUTPUT_PROCESSOR); + final KTableSource resultProcessorSupplier = new KTableSource<>(materializedInternal.storeName(), materializedInternal.queryableStoreName()); + final StoreBuilder> resultStore = + materializedInternal.queryableStoreName() == null + ? null + : new TimestampedKeyValueStoreMaterializer<>(materializedInternal).materialize(); + final TableProcessorNode resultNode = new TableProcessorNode<>( + resultProcessorName, + new ProcessorParameters<>( + resultProcessorSupplier, + resultProcessorName + ), + resultStore + ); + builder.addGraphNode(resolverNode, resultNode); + + return new KTableImpl( + resultProcessorName, + keySerde, + materializedInternal.valueSerde(), + resultSourceNodes, + materializedInternal.storeName(), + resultProcessorSupplier, + resultNode, + builder + ); + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSourceValueGetterSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSourceValueGetterSupplier.java index 7083b88cc54..bed221387bd 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSourceValueGetterSupplier.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSourceValueGetterSupplier.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.kafka.streams.kstream.internals; import org.apache.kafka.streams.processor.ProcessorContext; @@ -23,7 +24,7 @@ import org.apache.kafka.streams.state.ValueAndTimestamp; public class KTableSourceValueGetterSupplier implements KTableValueGetterSupplier { private final String storeName; - KTableSourceValueGetterSupplier(final String storeName) { + public KTableSourceValueGetterSupplier(final String storeName) { this.storeName = storeName; } @@ -49,6 +50,7 @@ public class KTableSourceValueGetterSupplier implements KTableValueGetterS } @Override - public void close() {} + public void close() { + } } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKey.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKey.java new file mode 100644 index 00000000000..0dd60fc026d --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKey.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import java.util.Objects; + +public class CombinedKey { + private final KF foreignKey; + private final KP primaryKey; + + CombinedKey(final KF foreignKey, final KP primaryKey) { + Objects.requireNonNull(foreignKey, "foreignKey can't be null"); + Objects.requireNonNull(primaryKey, "primaryKey can't be null"); + this.foreignKey = foreignKey; + this.primaryKey = primaryKey; + } + + public KF getForeignKey() { + return foreignKey; + } + + public KP getPrimaryKey() { + return primaryKey; + } + + public boolean equals(final KF foreignKey, final KP primaryKey) { + if (this.primaryKey == null) { + return false; + } + return this.foreignKey.equals(foreignKey) && this.primaryKey.equals(primaryKey); + } + + @Override + public String toString() { + return "CombinedKey{" + + "foreignKey=" + foreignKey + + ", primaryKey=" + primaryKey + + '}'; + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchema.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchema.java new file mode 100644 index 00000000000..8abe583b0c0 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchema.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.serialization.Deserializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serializer; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.processor.ProcessorContext; + +import java.nio.ByteBuffer; + +/** + * Factory for creating CombinedKey serializers / deserializers. + */ +public class CombinedKeySchema { + private final String serdeTopic; + private Serializer primaryKeySerializer; + private Deserializer primaryKeyDeserializer; + private Serializer foreignKeySerializer; + private Deserializer foreignKeyDeserializer; + + public CombinedKeySchema(final String serdeTopic, final Serde foreignKeySerde, final Serde primaryKeySerde) { + this.serdeTopic = serdeTopic; + primaryKeySerializer = primaryKeySerde.serializer(); + primaryKeyDeserializer = primaryKeySerde.deserializer(); + foreignKeyDeserializer = foreignKeySerde.deserializer(); + foreignKeySerializer = foreignKeySerde.serializer(); + } + + @SuppressWarnings("unchecked") + public void init(final ProcessorContext context) { + primaryKeySerializer = primaryKeySerializer == null ? (Serializer) context.keySerde().serializer() : primaryKeySerializer; + primaryKeyDeserializer = primaryKeyDeserializer == null ? (Deserializer) context.keySerde().deserializer() : primaryKeyDeserializer; + foreignKeySerializer = foreignKeySerializer == null ? (Serializer) context.keySerde().serializer() : foreignKeySerializer; + foreignKeyDeserializer = foreignKeyDeserializer == null ? (Deserializer) context.keySerde().deserializer() : foreignKeyDeserializer; + } + + Bytes toBytes(final KO foreignKey, final K primaryKey) { + //The serialization format - note that primaryKeySerialized may be null, such as when a prefixScan + //key is being created. + //{Integer.BYTES foreignKeyLength}{foreignKeySerialized}{Optional-primaryKeySerialized} + final byte[] foreignKeySerializedData = foreignKeySerializer.serialize(serdeTopic, foreignKey); + + //? bytes + final byte[] primaryKeySerializedData = primaryKeySerializer.serialize(serdeTopic, primaryKey); + + final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES + foreignKeySerializedData.length + primaryKeySerializedData.length); + buf.putInt(foreignKeySerializedData.length); + buf.put(foreignKeySerializedData); + buf.put(primaryKeySerializedData); + return Bytes.wrap(buf.array()); + } + + + public CombinedKey fromBytes(final Bytes data) { + //{Integer.BYTES foreignKeyLength}{foreignKeySerialized}{Optional-primaryKeySerialized} + final byte[] dataArray = data.get(); + final ByteBuffer dataBuffer = ByteBuffer.wrap(dataArray); + final int foreignKeyLength = dataBuffer.getInt(); + final byte[] foreignKeyRaw = new byte[foreignKeyLength]; + dataBuffer.get(foreignKeyRaw, 0, foreignKeyLength); + final KO foreignKey = foreignKeyDeserializer.deserialize(serdeTopic, foreignKeyRaw); + + final byte[] primaryKeyRaw = new byte[dataArray.length - foreignKeyLength - Integer.BYTES]; + dataBuffer.get(primaryKeyRaw, 0, primaryKeyRaw.length); + final K primaryKey = primaryKeyDeserializer.deserialize(serdeTopic, primaryKeyRaw); + return new CombinedKey<>(foreignKey, primaryKey); + } + + Bytes prefixBytes(final KO key) { + //The serialization format. Note that primaryKeySerialized is not required/used in this function. + //{Integer.BYTES foreignKeyLength}{foreignKeySerialized}{Optional-primaryKeySerialized} + + final byte[] foreignKeySerializedData = foreignKeySerializer.serialize(serdeTopic, key); + + final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES + foreignKeySerializedData.length); + buf.putInt(foreignKeySerializedData.length); + buf.put(foreignKeySerializedData); + return Bytes.wrap(buf.array()); + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java new file mode 100644 index 00000000000..614b91f071e --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.metrics.Sensor; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.errors.StreamsException; +import org.apache.kafka.streams.kstream.internals.Change; +import org.apache.kafka.streams.processor.AbstractProcessor; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.processor.internals.InternalProcessorContext; +import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics; +import org.apache.kafka.streams.state.KeyValueIterator; +import org.apache.kafka.streams.state.StoreBuilder; +import org.apache.kafka.streams.state.TimestampedKeyValueStore; +import org.apache.kafka.streams.state.ValueAndTimestamp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; + +public class ForeignJoinSubscriptionProcessorSupplier implements ProcessorSupplier> { + private static final Logger LOG = LoggerFactory.getLogger(ForeignJoinSubscriptionProcessorSupplier.class); + private final StoreBuilder>> storeBuilder; + private final CombinedKeySchema keySchema; + + public ForeignJoinSubscriptionProcessorSupplier( + final StoreBuilder>> storeBuilder, + final CombinedKeySchema keySchema) { + + this.storeBuilder = storeBuilder; + this.keySchema = keySchema; + } + + @Override + public Processor> get() { + return new KTableKTableJoinProcessor(); + } + + + private final class KTableKTableJoinProcessor extends AbstractProcessor> { + private Sensor skippedRecordsSensor; + private TimestampedKeyValueStore> store; + + @Override + public void init(final ProcessorContext context) { + super.init(context); + final InternalProcessorContext internalProcessorContext = (InternalProcessorContext) context; + skippedRecordsSensor = ThreadMetrics.skipRecordSensor(internalProcessorContext.metrics()); + store = internalProcessorContext.getStateStore(storeBuilder); + } + + /** + * @throws StreamsException if key is null + */ + @Override + public void process(final KO key, final Change value) { + // if the key is null, we do not need proceed aggregating + // the record with the table + if (key == null) { + LOG.warn( + "Skipping record due to null key. value=[{}] topic=[{}] partition=[{}] offset=[{}]", + value, context().topic(), context().partition(), context().offset() + ); + skippedRecordsSensor.record(); + return; + } + + final Bytes prefixBytes = keySchema.prefixBytes(key); + + //Perform the prefixScan and propagate the results + try (final KeyValueIterator>> prefixScanResults = + store.range(prefixBytes, Bytes.increment(prefixBytes))) { + + while (prefixScanResults.hasNext()) { + final KeyValue>> next = prefixScanResults.next(); + // have to check the prefix because the range end is inclusive :( + if (prefixEquals(next.key.get(), prefixBytes.get())) { + final CombinedKey combinedKey = keySchema.fromBytes(next.key); + context().forward( + combinedKey.getPrimaryKey(), + new SubscriptionResponseWrapper<>(next.value.value().getHash(), value.newValue) + ); + } + } + } + } + + private boolean prefixEquals(final byte[] x, final byte[] y) { + final int min = Math.min(x.length, y.length); + final ByteBuffer xSlice = ByteBuffer.wrap(x, 0, min); + final ByteBuffer ySlice = ByteBuffer.wrap(y, 0, min); + return xSlice.equals(ySlice); + } + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java new file mode 100644 index 00000000000..f122258d930 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serializer; +import org.apache.kafka.streams.kstream.internals.Change; +import org.apache.kafka.streams.processor.AbstractProcessor; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.state.internals.Murmur3; + +import java.util.function.Function; +import java.util.Arrays; + +import static org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE; +import static org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper.Instruction.PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE; +import static org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE; +import static org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper.Instruction.DELETE_KEY_NO_PROPAGATE; + +public class ForeignJoinSubscriptionSendProcessorSupplier implements ProcessorSupplier> { + + private final Function foreignKeyExtractor; + private final String repartitionTopicName; + private final Serializer valueSerializer; + private final boolean leftJoin; + private Serializer foreignKeySerializer; + + public ForeignJoinSubscriptionSendProcessorSupplier(final Function foreignKeyExtractor, + final Serde foreignKeySerde, + final String repartitionTopicName, + final Serializer valueSerializer, + final boolean leftJoin) { + this.foreignKeyExtractor = foreignKeyExtractor; + this.valueSerializer = valueSerializer; + this.leftJoin = leftJoin; + this.repartitionTopicName = repartitionTopicName; + foreignKeySerializer = foreignKeySerde == null ? null : foreignKeySerde.serializer(); + } + + @Override + public Processor> get() { + return new UnbindChangeProcessor(); + } + + private class UnbindChangeProcessor extends AbstractProcessor> { + + @SuppressWarnings("unchecked") + @Override + public void init(final ProcessorContext context) { + super.init(context); + // get default key serde if it wasn't supplied directly at construction + if (foreignKeySerializer == null) { + foreignKeySerializer = (Serializer) context.keySerde().serializer(); + } + } + + @Override + public void process(final K key, final Change change) { + final long[] currentHash = change.newValue == null ? + null : + Murmur3.hash128(valueSerializer.serialize(repartitionTopicName, change.newValue)); + + if (change.oldValue != null) { + final KO oldForeignKey = foreignKeyExtractor.apply(change.oldValue); + if (change.newValue != null) { + final KO newForeignKey = foreignKeyExtractor.apply(change.newValue); + + final byte[] serialOldForeignKey = foreignKeySerializer.serialize(repartitionTopicName, oldForeignKey); + final byte[] serialNewForeignKey = foreignKeySerializer.serialize(repartitionTopicName, newForeignKey); + if (!Arrays.equals(serialNewForeignKey, serialOldForeignKey)) { + //Different Foreign Key - delete the old key value and propagate the new one. + //Delete it from the oldKey's state store + context().forward(oldForeignKey, new SubscriptionWrapper<>(currentHash, DELETE_KEY_NO_PROPAGATE, key)); + //Add to the newKey's state store. Additionally, propagate null if no FK is found there, + //since we must "unset" any output set by the previous FK-join. This is true for both INNER + //and LEFT join. + } + context().forward(newForeignKey, new SubscriptionWrapper<>(currentHash, PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE, key)); + } else { + //A simple propagatable delete. Delete from the state store and propagate the delete onwards. + context().forward(oldForeignKey, new SubscriptionWrapper<>(currentHash, DELETE_KEY_AND_PROPAGATE, key)); + } + } else if (change.newValue != null) { + //change.oldValue is null, which means it was deleted at least once before, or it is brand new. + //In either case, we only need to propagate if the FK_VAL is available, as the null from the delete would + //have been propagated otherwise. + + final SubscriptionWrapper.Instruction instruction; + if (leftJoin) { + //Want to send info even if RHS is null. + instruction = PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE; + } else { + instruction = PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE; + } + context().forward(foreignKeyExtractor.apply(change.newValue), new SubscriptionWrapper<>(currentHash, instruction, key)); + } + } + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java new file mode 100644 index 00000000000..2544eb1856b --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.streams.kstream.internals.Change; +import org.apache.kafka.streams.kstream.internals.KTableValueGetter; +import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier; +import org.apache.kafka.streams.processor.AbstractProcessor; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.processor.To; +import org.apache.kafka.streams.state.ValueAndTimestamp; + +import java.util.Objects; + +/** + * Receives {@code SubscriptionWrapper} events and processes them according to their Instruction. + * Depending on the results, {@code SubscriptionResponseWrapper}s are created, which will be propagated to + * the {@code SubscriptionResolverJoinProcessorSupplier} instance. + * + * @param Type of primary keys + * @param Type of foreign key + * @param Type of foreign value + */ +public class SubscriptionJoinForeignProcessorSupplier + implements ProcessorSupplier, Change>>> { + + private final KTableValueGetterSupplier foreignValueGetterSupplier; + + public SubscriptionJoinForeignProcessorSupplier(final KTableValueGetterSupplier foreignValueGetterSupplier) { + this.foreignValueGetterSupplier = foreignValueGetterSupplier; + } + + @Override + public Processor, Change>>> get() { + + return new AbstractProcessor, Change>>>() { + + private KTableValueGetter foreignValues; + + @Override + public void init(final ProcessorContext context) { + super.init(context); + foreignValues = foreignValueGetterSupplier.get(); + foreignValues.init(context); + } + + @Override + public void process(final CombinedKey combinedKey, final Change>> change) { + Objects.requireNonNull(combinedKey, "This processor should never see a null key."); + Objects.requireNonNull(change, "This processor should never see a null value."); + final ValueAndTimestamp> valueAndTimestamp = change.newValue; + Objects.requireNonNull(valueAndTimestamp, "This processor should never see a null newValue."); + final SubscriptionWrapper value = valueAndTimestamp.value(); + + if (value.getVersion() != SubscriptionWrapper.CURRENT_VERSION) { + //Guard against modifications to SubscriptionWrapper. Need to ensure that there is compatibility + //with previous versions to enable rolling upgrades. Must develop a strategy for upgrading + //from older SubscriptionWrapper versions to newer versions. + throw new UnsupportedVersionException("SubscriptionWrapper is of an incompatible version."); + } + + final ValueAndTimestamp foreignValueAndTime = foreignValues.get(combinedKey.getForeignKey()); + + final long resultTimestamp = + foreignValueAndTime == null ? + valueAndTimestamp.timestamp() : + Math.max(valueAndTimestamp.timestamp(), foreignValueAndTime.timestamp()); + + switch (value.getInstruction()) { + case DELETE_KEY_AND_PROPAGATE: + context().forward( + combinedKey.getPrimaryKey(), + new SubscriptionResponseWrapper(value.getHash(), null), + To.all().withTimestamp(resultTimestamp) + ); + break; + case PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE: + //This one needs to go through regardless of LEFT or INNER join, since the extracted FK was + //changed and there is no match for it. We must propagate the (key, null) to ensure that the + //downstream consumers are alerted to this fact. + final VO valueToSend = foreignValueAndTime == null ? null : foreignValueAndTime.value(); + + context().forward( + combinedKey.getPrimaryKey(), + new SubscriptionResponseWrapper<>(value.getHash(), valueToSend), + To.all().withTimestamp(resultTimestamp) + ); + break; + case PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE: + if (foreignValueAndTime != null) { + context().forward( + combinedKey.getPrimaryKey(), + new SubscriptionResponseWrapper<>(value.getHash(), foreignValueAndTime.value()), + To.all().withTimestamp(resultTimestamp) + ); + } + break; + case DELETE_KEY_NO_PROPAGATE: + break; + default: + throw new IllegalStateException("Unhandled instruction: " + value.getInstruction()); + } + } + }; + } +} \ No newline at end of file diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplier.java new file mode 100644 index 00000000000..a188f15062c --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplier.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.serialization.Serializer; +import org.apache.kafka.streams.kstream.ValueJoiner; +import org.apache.kafka.streams.kstream.internals.KTableValueGetter; +import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier; +import org.apache.kafka.streams.processor.AbstractProcessor; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.state.ValueAndTimestamp; +import org.apache.kafka.streams.state.internals.Murmur3; + +/** + * Receives {@code SubscriptionResponseWrapper} events and filters out events which do not match the current hash + * of the primary key. This eliminates race-condition results for rapidly-changing foreign-keys for a given primary key. + * Applies the join and emits nulls according to LEFT/INNER rules. + * + * @param Type of primary keys + * @param Type of primary values + * @param Type of foreign values + * @param Type of joined result of primary and foreign values + */ +public class SubscriptionResolverJoinProcessorSupplier implements ProcessorSupplier> { + private final KTableValueGetterSupplier valueGetterSupplier; + private final Serializer valueSerializer; + private final ValueJoiner joiner; + private final boolean leftJoin; + + public SubscriptionResolverJoinProcessorSupplier(final KTableValueGetterSupplier valueGetterSupplier, + final Serializer valueSerializer, + final ValueJoiner joiner, + final boolean leftJoin) { + this.valueGetterSupplier = valueGetterSupplier; + this.valueSerializer = valueSerializer; + this.joiner = joiner; + this.leftJoin = leftJoin; + } + + @Override + public Processor> get() { + return new AbstractProcessor>() { + + private KTableValueGetter valueGetter; + + @Override + public void init(final ProcessorContext context) { + super.init(context); + valueGetter = valueGetterSupplier.get(); + valueGetter.init(context); + } + + @Override + public void process(final K key, final SubscriptionResponseWrapper value) { + if (value.getVersion() != SubscriptionResponseWrapper.CURRENT_VERSION) { + //Guard against modifications to SubscriptionResponseWrapper. Need to ensure that there is + //compatibility with previous versions to enable rolling upgrades. Must develop a strategy for + //upgrading from older SubscriptionWrapper versions to newer versions. + throw new UnsupportedVersionException("SubscriptionResponseWrapper is of an incompatible version."); + } + final ValueAndTimestamp currentValueWithTimestamp = valueGetter.get(key); + + //We are unable to access the actual source topic name for the valueSerializer at runtime, without + //tightly coupling to KTableRepartitionProcessorSupplier. + //While we can use the source topic from where the events came from, we shouldn't serialize against it + //as it causes problems with the confluent schema registry, which requires each topic have only a single + //registered schema. + final String dummySerializationTopic = context().topic() + "-join-resolver"; + final long[] currentHash = currentValueWithTimestamp == null ? + null : + Murmur3.hash128(valueSerializer.serialize(dummySerializationTopic, currentValueWithTimestamp.value())); + + final long[] messageHash = value.getOriginalValueHash(); + + //If this value doesn't match the current value from the original table, it is stale and should be discarded. + if (java.util.Arrays.equals(messageHash, currentHash)) { + final VR result; + + if (value.getForeignValue() == null && (!leftJoin || currentValueWithTimestamp == null)) { + result = null; //Emit tombstone + } else { + result = joiner.apply(currentValueWithTimestamp == null ? null : currentValueWithTimestamp.value(), value.getForeignValue()); + } + context().forward(key, result); + } + } + }; + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java new file mode 100644 index 00000000000..9c79e468213 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; + +import java.util.Arrays; + +public class SubscriptionResponseWrapper { + final static byte CURRENT_VERSION = 0x00; + private final long[] originalValueHash; + private final FV foreignValue; + private final byte version; + + public SubscriptionResponseWrapper(final long[] originalValueHash, final FV foreignValue) { + this(originalValueHash, foreignValue, CURRENT_VERSION); + } + + public SubscriptionResponseWrapper(final long[] originalValueHash, final FV foreignValue, final byte version) { + if (version != CURRENT_VERSION) { + throw new UnsupportedVersionException("SubscriptionWrapper does not support version " + version); + } + this.originalValueHash = originalValueHash; + this.foreignValue = foreignValue; + this.version = version; + } + + public long[] getOriginalValueHash() { + return originalValueHash; + } + + public FV getForeignValue() { + return foreignValue; + } + + public byte getVersion() { + return version; + } + + @Override + public String toString() { + return "SubscriptionResponseWrapper{" + + "version=" + version + + ", foreignValue=" + foreignValue + + ", originalValueHash=" + Arrays.toString(originalValueHash) + + '}'; + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java new file mode 100644 index 00000000000..6524b4fc764 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.serialization.Deserializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serializer; + +import java.nio.ByteBuffer; + +public class SubscriptionResponseWrapperSerde implements Serde> { + private final SubscriptionResponseWrapperSerializer serializer; + private final SubscriptionResponseWrapperDeserializer deserializer; + + public SubscriptionResponseWrapperSerde(final Serde foreignValueSerde) { + serializer = new SubscriptionResponseWrapperSerializer<>(foreignValueSerde.serializer()); + deserializer = new SubscriptionResponseWrapperDeserializer<>(foreignValueSerde.deserializer()); + } + + @Override + public Serializer> serializer() { + return serializer; + } + + @Override + public Deserializer> deserializer() { + return deserializer; + } + + private static final class SubscriptionResponseWrapperSerializer implements Serializer> { + private final Serializer serializer; + + private SubscriptionResponseWrapperSerializer(final Serializer serializer) { + this.serializer = serializer; + } + + @Override + public byte[] serialize(final String topic, final SubscriptionResponseWrapper data) { + //{1-bit-isHashNull}{7-bits-version}{Optional-16-byte-Hash}{n-bytes serialized data} + + //7-bit (0x7F) maximum for data version. + if (Byte.compare((byte) 0x7F, data.getVersion()) < 0) { + throw new UnsupportedVersionException("SubscriptionResponseWrapper version is larger than maximum supported 0x7F"); + } + + final byte[] serializedData = serializer.serialize(topic, data.getForeignValue()); + final int serializedDataLength = serializedData == null ? 0 : serializedData.length; + final long[] originalHash = data.getOriginalValueHash(); + final int hashLength = originalHash == null ? 0 : 2 * Long.BYTES; + + final ByteBuffer buf = ByteBuffer.allocate(1 + hashLength + serializedDataLength); + + if (originalHash != null) { + buf.put(data.getVersion()); + buf.putLong(originalHash[0]); + buf.putLong(originalHash[1]); + } else { + //Don't store hash as it's null. + buf.put((byte) (data.getVersion() | (byte) 0x80)); + } + + if (serializedData != null) + buf.put(serializedData); + return buf.array(); + } + + } + + private static final class SubscriptionResponseWrapperDeserializer implements Deserializer> { + private final Deserializer deserializer; + + private SubscriptionResponseWrapperDeserializer(final Deserializer deserializer) { + this.deserializer = deserializer; + } + + @Override + public SubscriptionResponseWrapper deserialize(final String topic, final byte[] data) { + //{1-bit-isHashNull}{7-bits-version}{Optional-16-byte-Hash}{n-bytes serialized data} + + final ByteBuffer buf = ByteBuffer.wrap(data); + final byte versionAndIsHashNull = buf.get(); + final byte version = (byte) (0x7F & versionAndIsHashNull); + final boolean isHashNull = (0x80 & versionAndIsHashNull) == 0x80; + + final long[] hash; + int lengthSum = 1; //The first byte + if (isHashNull) { + hash = null; + } else { + hash = new long[2]; + hash[0] = buf.getLong(); + hash[1] = buf.getLong(); + lengthSum += 2 * Long.BYTES; + } + + final byte[] serializedValue; + if (data.length - lengthSum > 0) { + serializedValue = new byte[data.length - lengthSum]; + buf.get(serializedValue, 0, serializedValue.length); + } else + serializedValue = null; + + final V value = deserializer.deserialize(topic, serializedValue); + return new SubscriptionResponseWrapper<>(hash, value, version); + } + + } + +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionStoreReceiveProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionStoreReceiveProcessorSupplier.java new file mode 100644 index 00000000000..3d5f5160197 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionStoreReceiveProcessorSupplier.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.metrics.Sensor; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.kstream.internals.Change; +import org.apache.kafka.streams.processor.AbstractProcessor; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.processor.To; +import org.apache.kafka.streams.processor.internals.InternalProcessorContext; +import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl; +import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics; +import org.apache.kafka.streams.state.StoreBuilder; +import org.apache.kafka.streams.state.TimestampedKeyValueStore; +import org.apache.kafka.streams.state.ValueAndTimestamp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SubscriptionStoreReceiveProcessorSupplier + implements ProcessorSupplier> { + private static final Logger LOG = LoggerFactory.getLogger(SubscriptionStoreReceiveProcessorSupplier.class); + + private final StoreBuilder>> storeBuilder; + private final CombinedKeySchema keySchema; + + public SubscriptionStoreReceiveProcessorSupplier( + final StoreBuilder>> storeBuilder, + final CombinedKeySchema keySchema) { + + this.storeBuilder = storeBuilder; + this.keySchema = keySchema; + } + + @Override + public Processor> get() { + + return new AbstractProcessor>() { + + private TimestampedKeyValueStore> store; + private StreamsMetricsImpl metrics; + private Sensor skippedRecordsSensor; + + @Override + public void init(final ProcessorContext context) { + super.init(context); + final InternalProcessorContext internalProcessorContext = (InternalProcessorContext) context; + + metrics = internalProcessorContext.metrics(); + skippedRecordsSensor = ThreadMetrics.skipRecordSensor(metrics); + store = internalProcessorContext.getStateStore(storeBuilder); + } + + @Override + public void process(final KO key, final SubscriptionWrapper value) { + if (key == null) { + LOG.warn( + "Skipping record due to null foreign key. value=[{}] topic=[{}] partition=[{}] offset=[{}]", + value, context().topic(), context().partition(), context().offset() + ); + skippedRecordsSensor.record(); + return; + } + if (value.getVersion() != SubscriptionWrapper.CURRENT_VERSION) { + //Guard against modifications to SubscriptionWrapper. Need to ensure that there is compatibility + //with previous versions to enable rolling upgrades. Must develop a strategy for upgrading + //from older SubscriptionWrapper versions to newer versions. + throw new UnsupportedVersionException("SubscriptionWrapper is of an incompatible version."); + } + + final Bytes subscriptionKey = keySchema.toBytes(key, value.getPrimaryKey()); + + final ValueAndTimestamp> newValue = ValueAndTimestamp.make(value, context().timestamp()); + final ValueAndTimestamp> oldValue = store.get(subscriptionKey); + + //If the subscriptionWrapper hash indicates a null, must delete from statestore. + //This store is used by the prefix scanner in ForeignJoinSubscriptionProcessorSupplier + if (value.getHash() == null) { + store.delete(subscriptionKey); + } else { + store.put(subscriptionKey, newValue); + } + final Change>> change = new Change<>(newValue, oldValue); + // note: key is non-nullable + // note: newValue is non-nullable + context().forward( + new CombinedKey<>(key, value.getPrimaryKey()), + change, + To.all().withTimestamp(newValue.timestamp()) + ); + } + }; + } +} \ No newline at end of file diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java new file mode 100644 index 00000000000..a757895aecf --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; + +import java.util.Arrays; +import java.util.Objects; + + +public class SubscriptionWrapper { + static final byte CURRENT_VERSION = 0; + + private final long[] hash; + private final Instruction instruction; + private final byte version; + private final K primaryKey; + + public enum Instruction { + //Send nothing. Do not propagate. + DELETE_KEY_NO_PROPAGATE((byte) 0x00), + + //Send (k, null) + DELETE_KEY_AND_PROPAGATE((byte) 0x01), + + //(changing foreign key, but FK+Val may not exist) + //Send (k, fk-val) OR + //Send (k, null) if fk-val does not exist + PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE((byte) 0x02), + + //(first time ever sending key) + //Send (k, fk-val) only if fk-val exists. + PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE((byte) 0x03); + + private final byte value; + Instruction(final byte value) { + this.value = value; + } + + public byte getValue() { + return value; + } + + public static Instruction fromValue(final byte value) { + for (final Instruction i: values()) { + if (i.value == value) { + return i; + } + } + throw new IllegalArgumentException("Unknown instruction byte value = " + value); + } + } + + public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey) { + this(hash, instruction, primaryKey, CURRENT_VERSION); + } + + public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey, final byte version) { + Objects.requireNonNull(instruction, "instruction cannot be null. Required by downstream processor."); + Objects.requireNonNull(primaryKey, "primaryKey cannot be null. Required by downstream processor."); + if (version != CURRENT_VERSION) { + throw new UnsupportedVersionException("SubscriptionWrapper does not support version " + version); + } + + this.instruction = instruction; + this.hash = hash; + this.primaryKey = primaryKey; + this.version = version; + } + + public Instruction getInstruction() { + return instruction; + } + + public long[] getHash() { + return hash; + } + + public K getPrimaryKey() { + return primaryKey; + } + + public byte getVersion() { + return version; + } + + @Override + public String toString() { + return "SubscriptionWrapper{" + + "version=" + version + + ", primaryKey=" + primaryKey + + ", instruction=" + instruction + + ", hash=" + Arrays.toString(hash) + + '}'; + } +} + diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java new file mode 100644 index 00000000000..ae53ba8b349 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.serialization.Deserializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serializer; + +import java.nio.ByteBuffer; + +public class SubscriptionWrapperSerde implements Serde> { + private final SubscriptionWrapperSerializer serializer; + private final SubscriptionWrapperDeserializer deserializer; + + public SubscriptionWrapperSerde(final Serde primaryKeySerde) { + serializer = new SubscriptionWrapperSerializer<>(primaryKeySerde.serializer()); + deserializer = new SubscriptionWrapperDeserializer<>(primaryKeySerde.deserializer()); + } + + @Override + public Serializer> serializer() { + return serializer; + } + + @Override + public Deserializer> deserializer() { + return deserializer; + } + + private static class SubscriptionWrapperSerializer implements Serializer> { + private final Serializer primaryKeySerializer; + SubscriptionWrapperSerializer(final Serializer primaryKeySerializer) { + this.primaryKeySerializer = primaryKeySerializer; + } + + @Override + public byte[] serialize(final String topic, final SubscriptionWrapper data) { + //{1-bit-isHashNull}{7-bits-version}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized} + + //7-bit (0x7F) maximum for data version. + if (Byte.compare((byte) 0x7F, data.getVersion()) < 0) { + throw new UnsupportedVersionException("SubscriptionWrapper version is larger than maximum supported 0x7F"); + } + + final byte[] primaryKeySerializedData = primaryKeySerializer.serialize(topic, data.getPrimaryKey()); + + final ByteBuffer buf; + if (data.getHash() != null) { + buf = ByteBuffer.allocate(2 + 2 * Long.BYTES + primaryKeySerializedData.length); + buf.put(data.getVersion()); + } else { + //Don't store hash as it's null. + buf = ByteBuffer.allocate(2 + primaryKeySerializedData.length); + buf.put((byte) (data.getVersion() | (byte) 0x80)); + } + + buf.put(data.getInstruction().getValue()); + final long[] elem = data.getHash(); + if (data.getHash() != null) { + buf.putLong(elem[0]); + buf.putLong(elem[1]); + } + buf.put(primaryKeySerializedData); + return buf.array(); + } + + } + + private static class SubscriptionWrapperDeserializer implements Deserializer> { + private final Deserializer primaryKeyDeserializer; + SubscriptionWrapperDeserializer(final Deserializer primaryKeyDeserializer) { + this.primaryKeyDeserializer = primaryKeyDeserializer; + } + + @Override + public SubscriptionWrapper deserialize(final String topic, final byte[] data) { + //{7-bits-version}{1-bit-isHashNull}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized} + final ByteBuffer buf = ByteBuffer.wrap(data); + final byte versionAndIsHashNull = buf.get(); + final byte version = (byte) (0x7F & versionAndIsHashNull); + final boolean isHashNull = (0x80 & versionAndIsHashNull) == 0x80; + final SubscriptionWrapper.Instruction inst = SubscriptionWrapper.Instruction.fromValue(buf.get()); + + final long[] hash; + int lengthSum = 2; //The first 2 bytes + if (isHashNull) { + hash = null; + } else { + hash = new long[2]; + hash[0] = buf.getLong(); + hash[1] = buf.getLong(); + lengthSum += 2 * Long.BYTES; + } + + final byte[] primaryKeyRaw = new byte[data.length - lengthSum]; //The remaining data is the serialized pk + buf.get(primaryKeyRaw, 0, primaryKeyRaw.length); + final K primaryKey = primaryKeyDeserializer.deserialize(topic, primaryKeyRaw); + + return new SubscriptionWrapper<>(hash, inst, primaryKey, version); + } + + } + +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/BaseRepartitionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/BaseRepartitionNode.java index 460f640b5c3..2cc153975b7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/BaseRepartitionNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/BaseRepartitionNode.java @@ -20,6 +20,7 @@ package org.apache.kafka.streams.kstream.internals.graph; import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.common.serialization.Serializer; +import org.apache.kafka.streams.processor.StreamPartitioner; public abstract class BaseRepartitionNode extends StreamsGraphNode { @@ -29,7 +30,7 @@ public abstract class BaseRepartitionNode extends StreamsGraphNode { protected final String sourceName; protected final String repartitionTopic; protected final ProcessorParameters processorParameters; - + protected final StreamPartitioner partitioner; BaseRepartitionNode(final String nodeName, final String sourceName, @@ -37,7 +38,8 @@ public abstract class BaseRepartitionNode extends StreamsGraphNode { final Serde keySerde, final Serde valueSerde, final String sinkName, - final String repartitionTopic) { + final String repartitionTopic, + final StreamPartitioner partitioner) { super(nodeName); @@ -47,6 +49,7 @@ public abstract class BaseRepartitionNode extends StreamsGraphNode { this.sourceName = sourceName; this.repartitionTopic = repartitionTopic; this.processorParameters = processorParameters; + this.partitioner = partitioner; } abstract Serializer getValueSerializer(); @@ -61,7 +64,8 @@ public abstract class BaseRepartitionNode extends StreamsGraphNode { ", sinkName='" + sinkName + '\'' + ", sourceName='" + sourceName + '\'' + ", repartitionTopic='" + repartitionTopic + '\'' + - ", processorParameters=" + processorParameters + + ", processorParameters=" + processorParameters + '\'' + + ", partitioner=" + partitioner + "} " + super.toString(); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/GroupedTableOperationRepartitionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/GroupedTableOperationRepartitionNode.java index 4d1b67dbc33..a3f79c50ea9 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/GroupedTableOperationRepartitionNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/GroupedTableOperationRepartitionNode.java @@ -43,7 +43,8 @@ public class GroupedTableOperationRepartitionNode extends BaseRepartitionN keySerde, valueSerde, sinkName, - repartitionTopic + repartitionTopic, + null ); } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/KTableKTableForeignKeyJoinResolutionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/KTableKTableForeignKeyJoinResolutionNode.java new file mode 100644 index 00000000000..672625cfc1c --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/KTableKTableForeignKeyJoinResolutionNode.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.kstream.internals.graph; + +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.streams.kstream.internals.Change; +import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionResponseWrapper; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper; +import org.apache.kafka.streams.processor.FailOnInvalidTimestamp; +import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder; + +/** + * Too much specific information to generalize so the Foreign Key KTable-KTable join requires a specific node. + */ +public class KTableKTableForeignKeyJoinResolutionNode extends StreamsGraphNode { + private final ProcessorParameters> joinOneToOneProcessorParameters; + private final ProcessorParameters> joinByPrefixProcessorParameters; + private final ProcessorParameters> resolverProcessorParameters; + private final String finalRepartitionTopicName; + private final String finalRepartitionSinkName; + private final String finalRepartitionSourceName; + private final Serde keySerde; + private final Serde> subResponseSerde; + private final KTableValueGetterSupplier originalValueGetter; + + public KTableKTableForeignKeyJoinResolutionNode(final String nodeName, + final ProcessorParameters> joinOneToOneProcessorParameters, + final ProcessorParameters> joinByPrefixProcessorParameters, + final ProcessorParameters> resolverProcessorParameters, + final String finalRepartitionTopicName, + final String finalRepartitionSinkName, + final String finalRepartitionSourceName, + final Serde keySerde, + final Serde> subResponseSerde, + final KTableValueGetterSupplier originalValueGetter + ) { + super(nodeName); + this.joinOneToOneProcessorParameters = joinOneToOneProcessorParameters; + this.joinByPrefixProcessorParameters = joinByPrefixProcessorParameters; + this.resolverProcessorParameters = resolverProcessorParameters; + this.finalRepartitionTopicName = finalRepartitionTopicName; + this.finalRepartitionSinkName = finalRepartitionSinkName; + this.finalRepartitionSourceName = finalRepartitionSourceName; + this.keySerde = keySerde; + this.subResponseSerde = subResponseSerde; + this.originalValueGetter = originalValueGetter; + } + + @Override + public void writeToTopology(final InternalTopologyBuilder topologyBuilder) { + topologyBuilder.addInternalTopic(finalRepartitionTopicName); + //Repartition back to the original partitioning structure + topologyBuilder.addSink(finalRepartitionSinkName, finalRepartitionTopicName, + keySerde.serializer(), subResponseSerde.serializer(), + null, + joinByPrefixProcessorParameters.processorName(), joinOneToOneProcessorParameters.processorName()); + + topologyBuilder.addSource(null, finalRepartitionSourceName, new FailOnInvalidTimestamp(), + keySerde.deserializer(), subResponseSerde.deserializer(), finalRepartitionTopicName); + + //Connect highwaterProcessor to source, add the state store, and connect the statestore with the processor. + topologyBuilder.addProcessor(resolverProcessorParameters.processorName(), resolverProcessorParameters.processorSupplier(), finalRepartitionSourceName); + topologyBuilder.connectProcessorAndStateStores(resolverProcessorParameters.processorName(), originalValueGetter.storeNames()); + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java index 4797a21399d..e3cf2b80d45 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java @@ -22,6 +22,7 @@ import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.common.serialization.Serializer; import org.apache.kafka.streams.processor.FailOnInvalidTimestamp; +import org.apache.kafka.streams.processor.StreamPartitioner; import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder; public class OptimizableRepartitionNode extends BaseRepartitionNode { @@ -32,7 +33,8 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode final Serde keySerde, final Serde valueSerde, final String sinkName, - final String repartitionTopic) { + final String repartitionTopic, + final StreamPartitioner partitioner) { super( nodeName, @@ -41,9 +43,9 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode keySerde, valueSerde, sinkName, - repartitionTopic + repartitionTopic, + partitioner ); - } public Serde keySerde() { @@ -91,7 +93,7 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode repartitionTopic, keySerializer, getValueSerializer(), - null, + partitioner, processorParameters.processorName() ); @@ -120,6 +122,7 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode private String sinkName; private String sourceName; private String repartitionTopic; + private StreamPartitioner partitioner; private OptimizableRepartitionNodeBuilder() { } @@ -160,6 +163,11 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode return this; } + public OptimizableRepartitionNodeBuilder withPartitioner(final StreamPartitioner partitioner) { + this.partitioner = partitioner; + return this; + } + public OptimizableRepartitionNode build() { return new OptimizableRepartitionNode<>( @@ -169,7 +177,8 @@ public class OptimizableRepartitionNode extends BaseRepartitionNode keySerde, valueSerde, sinkName, - repartitionTopic + repartitionTopic, + partitioner ); } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java index 2cfe3ccb54c..5c75a09816a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java @@ -28,6 +28,13 @@ public class ProcessorGraphNode extends StreamsGraphNode { private final ProcessorParameters processorParameters; + public ProcessorGraphNode(final ProcessorParameters processorParameters) { + + super(processorParameters.processorName()); + + this.processorParameters = processorParameters; + } + public ProcessorGraphNode(final String nodeName, final ProcessorParameters processorParameters) { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java index 1e910cea151..6ed2917da67 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java @@ -18,21 +18,36 @@ package org.apache.kafka.streams.kstream.internals.graph; +import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier; import org.apache.kafka.streams.processor.ProcessorSupplier; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder; import org.apache.kafka.streams.state.StoreBuilder; import java.util.Arrays; +import java.util.Set; +import java.util.stream.Stream; public class StatefulProcessorNode extends ProcessorGraphNode { private final String[] storeNames; private final StoreBuilder storeBuilder; + /** + * Create a node representing a stateful processor, where the named stores have already been registered. + */ + public StatefulProcessorNode(final ProcessorParameters processorParameters, + final Set> preRegisteredStores, + final Set> valueGetterSuppliers) { + super(processorParameters.processorName(), processorParameters); + final Stream registeredStoreNames = preRegisteredStores.stream().map(StoreBuilder::name); + final Stream valueGetterStoreNames = valueGetterSuppliers.stream().flatMap(s -> Arrays.stream(s.storeNames())); + storeNames = Stream.concat(registeredStoreNames, valueGetterStoreNames).toArray(String[]::new); + storeBuilder = null; + } /** - * Create a node representing a stateful processor, where the named store has already been registered. + * Create a node representing a stateful processor, where the named stores have already been registered. */ public StatefulProcessorNode(final String nodeName, final ProcessorParameters processorParameters, @@ -80,5 +95,6 @@ public class StatefulProcessorNode extends ProcessorGraphNode { if (storeBuilder != null) { topologyBuilder.addStateStore(storeBuilder, processorName); } + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StreamSinkNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StreamSinkNode.java index dfe7f9e44b0..40ce3576005 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StreamSinkNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StreamSinkNode.java @@ -24,6 +24,7 @@ import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner; import org.apache.kafka.streams.processor.StreamPartitioner; import org.apache.kafka.streams.processor.TopicNameExtractor; import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder; +import org.apache.kafka.streams.processor.internals.StaticTopicNameExtractor; public class StreamSinkNode extends StreamsGraphNode { @@ -60,6 +61,9 @@ public class StreamSinkNode extends StreamsGraphNode { @SuppressWarnings("unchecked") final StreamPartitioner windowedPartitioner = (StreamPartitioner) new WindowedStreamPartitioner((WindowedSerializer) keySerializer); topologyBuilder.addSink(nodeName(), topicNameExtractor, keySerializer, valSerializer, windowedPartitioner, parentNames); + } else if (topicNameExtractor instanceof StaticTopicNameExtractor) { + final String topicName = ((StaticTopicNameExtractor) topicNameExtractor).topicName; + topologyBuilder.addSink(nodeName(), topicName, keySerializer, valSerializer, partitioner, parentNames); } else { topologyBuilder.addSink(nodeName(), topicNameExtractor, keySerializer, valSerializer, partitioner, parentNames); } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java index 2a1d05e2807..23f7ef86ca0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java @@ -18,7 +18,9 @@ package org.apache.kafka.streams.processor.internals; import org.apache.kafka.streams.processor.ProcessorContext; import org.apache.kafka.streams.processor.RecordContext; +import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl; +import org.apache.kafka.streams.state.StoreBuilder; import org.apache.kafka.streams.state.internals.ThreadCache; /** @@ -66,4 +68,15 @@ public interface InternalProcessorContext extends ProcessorContext { * Mark this context as being uninitialized */ void uninitialize(); + + /** + * Get a correctly typed state store, given a handle on the original builder. + * @param builder + * @param + * @return + */ + @SuppressWarnings("unchecked") + default T getStateStore(final StoreBuilder builder) { + return (T) getStateStore(builder.name()); + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java index 3a7bd55668d..d34ff00c686 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java @@ -29,8 +29,8 @@ import org.apache.kafka.streams.processor.TimestampExtractor; import org.apache.kafka.streams.processor.TopicNameExtractor; import org.apache.kafka.streams.state.StoreBuilder; import org.apache.kafka.streams.state.internals.SessionStoreBuilder; -import org.apache.kafka.streams.state.internals.WindowStoreBuilder; import org.apache.kafka.streams.state.internals.TimestampedWindowStoreBuilder; +import org.apache.kafka.streams.state.internals.WindowStoreBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,6 +50,7 @@ import java.util.Objects; import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; +import java.util.stream.Collectors; public class InternalTopologyBuilder { @@ -1149,21 +1150,45 @@ public class InternalTopologyBuilder { } public synchronized Collection> copartitionGroups() { - final List> list = new ArrayList<>(copartitionSourceGroups.size()); - for (final Set nodeNames : copartitionSourceGroups) { - final Set copartitionGroup = new HashSet<>(); - for (final String node : nodeNames) { - final List topics = nodeToSourceTopics.get(node); - if (topics != null) { - copartitionGroup.addAll(maybeDecorateInternalSourceTopics(topics)); + // compute transitive closures of copartitionGroups to relieve registering code to know all members + // of a copartitionGroup at the same time + final List> copartitionSourceTopics = + copartitionSourceGroups + .stream() + .map(sourceGroup -> + sourceGroup + .stream() + .flatMap(node -> maybeDecorateInternalSourceTopics(nodeToSourceTopics.get(node)).stream()) + .collect(Collectors.toSet()) + ).collect(Collectors.toList()); + + final Map> topicsToCopartitionGroup = new LinkedHashMap<>(); + for (final Set topics : copartitionSourceTopics) { + if (topics != null) { + Set coparititonGroup = null; + for (final String topic : topics) { + coparititonGroup = topicsToCopartitionGroup.get(topic); + if (coparititonGroup != null) { + break; + } + } + if (coparititonGroup == null) { + coparititonGroup = new HashSet<>(); + } + coparititonGroup.addAll(maybeDecorateInternalSourceTopics(topics)); + for (final String topic : topics) { + topicsToCopartitionGroup.put(topic, coparititonGroup); } } - list.add(Collections.unmodifiableSet(copartitionGroup)); } - return Collections.unmodifiableList(list); + final Set> uniqueCopartitionGroups = new HashSet<>(topicsToCopartitionGroup.values()); + return Collections.unmodifiableList(new ArrayList<>(uniqueCopartitionGroups)); } private List maybeDecorateInternalSourceTopics(final Collection sourceTopics) { + if (sourceTopics == null) { + return Collections.emptyList(); + } final List decoratedTopics = new ArrayList<>(); for (final String topic : sourceTopics) { if (internalTopicNames.contains(topic)) { diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBPrefixIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBPrefixIterator.java new file mode 100644 index 00000000000..b84175e1ee0 --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBPrefixIterator.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.state.internals; + +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.state.KeyValueIterator; +import org.rocksdb.RocksIterator; + +import java.util.Set; + +class RocksDBPrefixIterator extends RocksDbIterator { + private byte[] rawPrefix; + + RocksDBPrefixIterator(final String name, + final RocksIterator newIterator, + final Set> openIterators, + final Bytes prefix) { + super(name, newIterator, openIterators); + rawPrefix = prefix.get(); + newIterator.seek(rawPrefix); + } + + @Override + public synchronized boolean hasNext() { + if (!super.hasNext()) { + return false; + } + + final byte[] rawNextKey = super.peekNextKey().get(); + for (int i = 0; i < rawPrefix.length; i++) { + if (i == rawNextKey.length) { + throw new IllegalStateException("Unexpected RocksDB Key Value. Should have been skipped with seek."); + } + if (rawNextKey[i] != rawPrefix[i]) { + return false; + } + } + return true; + } +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ForeignKeyJoinSuite.java b/streams/src/test/java/org/apache/kafka/streams/integration/ForeignKeyJoinSuite.java new file mode 100644 index 00000000000..6245b11af60 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ForeignKeyJoinSuite.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + +import org.apache.kafka.common.utils.BytesTest; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.CombinedKeySchemaTest; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionResponseWrapperSerdeTest; +import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapperSerdeTest; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +/** + * This suite runs all the tests related to the KTable-KTable foreign key join feature. + * + * It can be used from an IDE to selectively just run these tests when developing code related to KTable-KTable + * foreign key join. + * + * If desired, it can also be added to a Gradle build task, although this isn't strictly necessary, since all + * these tests are already included in the `:streams:test` task. + */ +@RunWith(Suite.class) +@Suite.SuiteClasses({ + BytesTest.class, + KTableKTableForeignKeyInnerJoinMultiIntegrationTest.class, + KTableKTableForeignKeyJoinIntegrationTest.class, + CombinedKeySchemaTest.class, + SubscriptionWrapperSerdeTest.class, + SubscriptionResponseWrapperSerdeTest.class +}) +public class ForeignKeyJoinSuite { +} + + diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java new file mode 100644 index 00000000000..ad746d8f611 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + +import kafka.utils.MockTime; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.FloatSerializer; +import org.apache.kafka.common.serialization.IntegerDeserializer; +import org.apache.kafka.common.serialization.IntegerSerializer; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsBuilder; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; +import org.apache.kafka.streams.kstream.Consumed; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.Materialized; +import org.apache.kafka.streams.kstream.Produced; +import org.apache.kafka.streams.kstream.ValueJoiner; +import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.test.IntegrationTest; +import org.apache.kafka.test.TestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.function.Function; + +import static org.junit.Assert.assertEquals; + +@Category({IntegrationTest.class}) +public class KTableKTableForeignKeyInnerJoinMultiIntegrationTest { + private final static int NUM_BROKERS = 1; + + @ClassRule + public final static EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS); + private final static MockTime MOCK_TIME = CLUSTER.time; + private final static String TABLE_1 = "table1"; + private final static String TABLE_2 = "table2"; + private final static String TABLE_3 = "table3"; + private final static String OUTPUT = "output-"; + private static Properties streamsConfig; + private KafkaStreams streams; + private KafkaStreams streamsTwo; + private KafkaStreams streamsThree; + private final static Properties CONSUMER_CONFIG = new Properties(); + + private final static Properties PRODUCER_CONFIG_1 = new Properties(); + private final static Properties PRODUCER_CONFIG_2 = new Properties(); + private final static Properties PRODUCER_CONFIG_3 = new Properties(); + + @BeforeClass + public static void beforeTest() throws Exception { + //Use multiple partitions to ensure distribution of keys. + + CLUSTER.createTopic(TABLE_1, 3, 1); + CLUSTER.createTopic(TABLE_2, 5, 1); + CLUSTER.createTopic(TABLE_3, 7, 1); + CLUSTER.createTopic(OUTPUT, 11, 1); + + PRODUCER_CONFIG_1.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + PRODUCER_CONFIG_1.put(ProducerConfig.ACKS_CONFIG, "all"); + PRODUCER_CONFIG_1.put(ProducerConfig.RETRIES_CONFIG, 0); + PRODUCER_CONFIG_1.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class); + PRODUCER_CONFIG_1.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, FloatSerializer.class); + + PRODUCER_CONFIG_2.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + PRODUCER_CONFIG_2.put(ProducerConfig.ACKS_CONFIG, "all"); + PRODUCER_CONFIG_2.put(ProducerConfig.RETRIES_CONFIG, 0); + PRODUCER_CONFIG_2.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + PRODUCER_CONFIG_2.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class); + + PRODUCER_CONFIG_3.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + PRODUCER_CONFIG_3.put(ProducerConfig.ACKS_CONFIG, "all"); + PRODUCER_CONFIG_3.put(ProducerConfig.RETRIES_CONFIG, 0); + PRODUCER_CONFIG_3.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class); + PRODUCER_CONFIG_3.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + + streamsConfig = new Properties(); + streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + streamsConfig.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); + streamsConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100); + + final List> table1 = Arrays.asList( + new KeyValue<>(1, 1.33f), + new KeyValue<>(2, 2.22f), + new KeyValue<>(3, -1.22f), //Won't be joined in yet. + new KeyValue<>(4, -2.22f) //Won't be joined in at all. + ); + + //Partitions pre-computed using the default Murmur2 hash, just to ensure that all 3 partitions will be exercised. + final List> table2 = Arrays.asList( + new KeyValue<>("0", 0L), //partition 2 + new KeyValue<>("1", 10L), //partition 0 + new KeyValue<>("2", 20L), //partition 2 + new KeyValue<>("3", 30L), //partition 2 + new KeyValue<>("4", 40L), //partition 1 + new KeyValue<>("5", 50L), //partition 0 + new KeyValue<>("6", 60L), //partition 1 + new KeyValue<>("7", 70L), //partition 0 + new KeyValue<>("8", 80L), //partition 0 + new KeyValue<>("9", 90L) //partition 2 + ); + + //Partitions pre-computed using the default Murmur2 hash, just to ensure that all 3 partitions will be exercised. + final List> table3 = Arrays.asList( + new KeyValue<>(10, "waffle") + ); + + IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_1, table1, PRODUCER_CONFIG_1, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_2, table2, PRODUCER_CONFIG_2, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(TABLE_3, table3, PRODUCER_CONFIG_3, MOCK_TIME); + + CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, "ktable-ktable-consumer"); + CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class); + CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + } + + @Before + public void before() throws IOException { + IntegrationTestUtils.purgeLocalStreamsState(streamsConfig); + } + + @After + public void after() throws IOException { + if (streams != null) { + streams.close(); + streams = null; + } + if (streamsTwo != null) { + streamsTwo.close(); + streamsTwo = null; + } + if (streamsThree != null) { + streamsThree.close(); + streamsThree = null; + } + IntegrationTestUtils.purgeLocalStreamsState(streamsConfig); + } + + private enum JoinType { + INNER + } + + @Test + public void shouldInnerJoinMultiPartitionQueryable() throws Exception { + final Set> expectedOne = new HashSet<>(); + expectedOne.add(new KeyValue<>(1, "value1=1.33,value2=10,value3=waffle")); + + verifyKTableKTableJoin(JoinType.INNER, expectedOne, true); + } + + private void verifyKTableKTableJoin(final JoinType joinType, + final Set> expectedResult, + final boolean verifyQueryableState) throws Exception { + final String queryableName = verifyQueryableState ? joinType + "-store1" : null; + final String queryableNameTwo = verifyQueryableState ? joinType + "-store2" : null; + streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, joinType + queryableName); + + streams = prepareTopology(queryableName, queryableNameTwo); + streamsTwo = prepareTopology(queryableName, queryableNameTwo); + streamsThree = prepareTopology(queryableName, queryableNameTwo); + streams.start(); + streamsTwo.start(); + streamsThree.start(); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expectedResult.size())); + + assertEquals(expectedResult, result); + } + + private KafkaStreams prepareTopology(final String queryableName, final String queryableNameTwo) { + final StreamsBuilder builder = new StreamsBuilder(); + + final KTable table1 = builder.table(TABLE_1, Consumed.with(Serdes.Integer(), Serdes.Float())); + final KTable table2 = builder.table(TABLE_2, Consumed.with(Serdes.String(), Serdes.Long())); + final KTable table3 = builder.table(TABLE_3, Consumed.with(Serdes.Integer(), Serdes.String())); + + final Materialized> materialized; + if (queryableName != null) { + materialized = Materialized.>as(queryableName) + .withKeySerde(Serdes.Integer()) + .withValueSerde(Serdes.String()) + .withCachingDisabled(); + } else { + throw new RuntimeException("Current implementation of joinOnForeignKey requires a materialized store"); + } + + final Materialized> materializedTwo; + if (queryableNameTwo != null) { + materializedTwo = Materialized.>as(queryableNameTwo) + .withKeySerde(Serdes.Integer()) + .withValueSerde(Serdes.String()) + .withCachingDisabled(); + } else { + throw new RuntimeException("Current implementation of joinOnForeignKey requires a materialized store"); + } + + final Function tableOneKeyExtractor = value -> Integer.toString((int) value.floatValue()); + final Function joinedTableKeyExtractor = value -> { + //Hardwired to return the desired foreign key as a test shortcut + if (value.contains("value2=10")) + return 10; + else + return 0; + }; + + final ValueJoiner joiner = (value1, value2) -> "value1=" + value1 + ",value2=" + value2; + final ValueJoiner joinerTwo = (value1, value2) -> value1 + ",value3=" + value2; + + table1.join(table2, tableOneKeyExtractor, joiner, materialized) + .join(table3, joinedTableKeyExtractor, joinerTwo, materializedTwo) + .toStream() + .to(OUTPUT, Produced.with(Serdes.Integer(), Serdes.String())); + + return new KafkaStreams(builder.build(streamsConfig), streamsConfig); + } +} diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java new file mode 100644 index 00000000000..ae0f3c200fa --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java @@ -0,0 +1,699 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + +import kafka.utils.MockTime; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.FloatSerializer; +import org.apache.kafka.common.serialization.IntegerDeserializer; +import org.apache.kafka.common.serialization.IntegerSerializer; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsBuilder; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; +import org.apache.kafka.streams.kstream.Consumed; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.Materialized; +import org.apache.kafka.streams.kstream.Named; +import org.apache.kafka.streams.kstream.Produced; +import org.apache.kafka.streams.kstream.ValueJoiner; +import org.apache.kafka.streams.state.KeyValueIterator; +import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.streams.state.QueryableStoreTypes; +import org.apache.kafka.streams.state.ReadOnlyKeyValueStore; +import org.apache.kafka.test.IntegrationTest; +import org.apache.kafka.test.TestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.function.Function; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertArrayEquals; + + +@Category({IntegrationTest.class}) +public class KTableKTableForeignKeyJoinIntegrationTest { + private final static int NUM_BROKERS = 1; + + @ClassRule + public final static EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS); + private final static MockTime MOCK_TIME = CLUSTER.time; + private final static String LEFT_TABLE = "left_table"; + private final static String RIGHT_TABLE = "right_table"; + private final static String OUTPUT = "output-topic"; + private static Properties streamsConfig; + private KafkaStreams streams; + private KafkaStreams streamsTwo; + private KafkaStreams streamsThree; + private static final Properties CONSUMER_CONFIG = new Properties(); + private static final Properties LEFT_PROD_CONF = new Properties(); + private static final Properties RIGHT_PROD_CONF = new Properties(); + + @BeforeClass + public static void beforeTest() { + //Use multiple partitions to ensure distribution of keys. + LEFT_PROD_CONF.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + LEFT_PROD_CONF.put(ProducerConfig.ACKS_CONFIG, "all"); + LEFT_PROD_CONF.put(ProducerConfig.RETRIES_CONFIG, 0); + LEFT_PROD_CONF.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class); + LEFT_PROD_CONF.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, FloatSerializer.class); + + RIGHT_PROD_CONF.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + RIGHT_PROD_CONF.put(ProducerConfig.ACKS_CONFIG, "all"); + RIGHT_PROD_CONF.put(ProducerConfig.RETRIES_CONFIG, 0); + RIGHT_PROD_CONF.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + RIGHT_PROD_CONF.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class); + + streamsConfig = new Properties(); + streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + streamsConfig.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); + streamsConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100); + streamsConfig.put(StreamsConfig.TOPOLOGY_OPTIMIZATION, StreamsConfig.OPTIMIZE); + + CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, "ktable-ktable-consumer"); + CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class); + CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + } + + @Before + public void before() throws IOException, InterruptedException { + CLUSTER.deleteTopicsAndWait(LEFT_TABLE); + CLUSTER.deleteTopicsAndWait(RIGHT_TABLE); + CLUSTER.deleteTopicsAndWait(OUTPUT); + + CLUSTER.createTopic(LEFT_TABLE, 3, 1); + CLUSTER.createTopic(RIGHT_TABLE, 3, 1); + CLUSTER.createTopic(OUTPUT, 3, 1); + + IntegrationTestUtils.purgeLocalStreamsState(streamsConfig); + } + + @After + public void after() throws IOException { + if (streams != null) { + streams.close(); + streams = null; + } + if (streamsTwo != null) { + streamsTwo.close(); + streamsTwo = null; + } + if (streamsThree != null) { + streamsThree.close(); + streamsThree = null; + } + IntegrationTestUtils.purgeLocalStreamsState(streamsConfig); + } + + @Test + public void doInnerJoinFromLeftThenDeleteLeftEntity() throws Exception { + final List> rightTableEvents = Arrays.asList(new KeyValue<>("1", 10L), new KeyValue<>("2", 20L)); //partition 0 + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, false); + + final List> leftTableEvents = Arrays.asList(new KeyValue<>(1, 1.33f), new KeyValue<>(2, 2.77f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); + expected.add(new KeyValue<>(2, "value1=2.77,value2=20")); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + assertEquals(expected, result); + + //Now delete one LHS entity such that one delete is propagated down to the output. + final Set> expectedDeleted = new HashSet<>(); + expectedDeleted.add(new KeyValue<>(1, null)); + + final List> rightTableDeleteEvents = Arrays.asList(new KeyValue<>(1, null)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, rightTableDeleteEvents, LEFT_PROD_CONF, MOCK_TIME); + final Set> resultDeleted = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expectedDeleted.size())); + assertEquals(expectedDeleted, resultDeleted); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(2, "value1=2.77,value2=20")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doLeftJoinFromLeftThenDeleteLeftEntity() throws Exception { + final List> rightTableEvents = Arrays.asList(new KeyValue<>("1", 10L), new KeyValue<>("2", 20L)); //partition 0 + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, true); + + final List> leftTableEvents = Arrays.asList(new KeyValue<>(1, 1.33f), new KeyValue<>(2, 2.77f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); + expected.add(new KeyValue<>(2, "value1=2.77,value2=20")); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + assertEquals(expected, result); + + //Now delete one LHS entity such that one delete is propagated down to the output. + final Set> expectedDeleted = new HashSet<>(); + expectedDeleted.add(new KeyValue<>(1, null)); + + final List> rightTableDeleteEvents = Arrays.asList(new KeyValue<>(1, null)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, rightTableDeleteEvents, LEFT_PROD_CONF, MOCK_TIME); + final Set> resultDeleted = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expectedDeleted.size())); + assertEquals(expectedDeleted, resultDeleted); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(2, "value1=2.77,value2=20")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doInnerJoinFromRightThenDeleteRightEntity() throws Exception { + final List> leftTableEvents = Arrays.asList( + new KeyValue<>(1, 1.33f), + new KeyValue<>(2, 1.77f), + new KeyValue<>(3, 3.77f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, false); + + final List> rightTableEvents = Arrays.asList( + new KeyValue<>("1", 10L), //partition 0 + new KeyValue<>("2", 20L), //partition 2 + new KeyValue<>("3", 30L)); //partition 2 + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + //Ensure that the joined values exist in the output + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); //Will be deleted. + expected.add(new KeyValue<>(2, "value1=1.77,value2=10")); //Will be deleted. + expected.add(new KeyValue<>(3, "value1=3.77,value2=30")); //Will not be deleted. + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + assertEquals(expected, result); + + //Now delete the RHS entity such that all matching keys have deletes propagated. + final Set> expectedDeleted = new HashSet<>(); + expectedDeleted.add(new KeyValue<>(1, null)); + expectedDeleted.add(new KeyValue<>(2, null)); + + final List> rightTableDeleteEvents = Arrays.asList(new KeyValue<>("1", null)); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableDeleteEvents, RIGHT_PROD_CONF, MOCK_TIME); + final Set> resultDeleted = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expectedDeleted.size())); + assertEquals(expectedDeleted, resultDeleted); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(3, "value1=3.77,value2=30")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doLeftJoinFromRightThenDeleteRightEntity() throws Exception { + final List> leftTableEvents = Arrays.asList( + new KeyValue<>(1, 1.33f), + new KeyValue<>(2, 1.77f), + new KeyValue<>(3, 3.77f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, true); + + final List> rightTableEvents = Arrays.asList( + new KeyValue<>("1", 10L), //partition 0 + new KeyValue<>("2", 20L), //partition 2 + new KeyValue<>("3", 30L)); //partition 2 + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + //Ensure that the joined values exist in the output + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); //Will be deleted. + expected.add(new KeyValue<>(2, "value1=1.77,value2=10")); //Will be deleted. + expected.add(new KeyValue<>(3, "value1=3.77,value2=30")); //Will not be deleted. + //final HashSet> expected = new HashSet<>(buildExpectedResults(leftTableEvents, rightTableEvents, false)); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + assertEquals(expected, result); + + //Now delete the RHS entity such that all matching keys have deletes propagated. + //This will exercise the joiner with the RHS value == null. + final Set> expectedDeleted = new HashSet<>(); + expectedDeleted.add(new KeyValue<>(1, "value1=1.33,value2=null")); + expectedDeleted.add(new KeyValue<>(2, "value1=1.77,value2=null")); + + final List> rightTableDeleteEvents = Arrays.asList(new KeyValue<>("1", null)); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableDeleteEvents, RIGHT_PROD_CONF, MOCK_TIME); + final Set> resultDeleted = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expectedDeleted.size())); + assertEquals(expectedDeleted, resultDeleted); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(1, "value1=1.33,value2=null")); + expMatResults.add(new KeyValue<>(2, "value1=1.77,value2=null")); + expMatResults.add(new KeyValue<>(3, "value1=3.77,value2=30")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doInnerJoinProduceNullsWhenValueHasNonMatchingForeignKey() throws Exception { + //There is no matching extracted foreign-key of 8 anywhere. Should not produce any output for INNER JOIN, only + //because the state is transitioning from oldValue=null -> newValue=8.33. + List> leftTableEvents = Arrays.asList(new KeyValue<>(1, 8.33f)); + final List> rightTableEvents = Arrays.asList(new KeyValue<>("1", 10L)); //partition 0 + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, false); + + //There is also no matching extracted foreign-key for 18 anywhere. This WILL produce a null output for INNER JOIN, + //since we cannot remember (maintain state) that the FK=8 also produced a null result. + leftTableEvents = Arrays.asList(new KeyValue<>(1, 18.00f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + + final List> expected = new LinkedList<>(); + expected.add(new KeyValue<>(1, null)); + + final List> result = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size()); + assertEquals(result, expected); + + //Another change to FK that has no match on the RHS will result in another null + leftTableEvents = Arrays.asList(new KeyValue<>(1, 100.00f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + //Consume the next event - note that we are using the same consumerGroupId, so this will consume a new event. + final List> result2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size()); + assertEquals(result2, expected); + + //Now set the LHS event FK to match the rightTableEvents key-value. + leftTableEvents = Arrays.asList(new KeyValue<>(1, 1.11f)); + + final List> expected3 = new LinkedList<>(); + expected3.add(new KeyValue<>(1, "value1=1.11,value2=10")); + + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + final List> result3 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected3.size()); + assertEquals(result3, expected3); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(1, "value1=1.11,value2=10")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doLeftJoinProduceJoinedResultsWhenValueHasNonMatchingForeignKey() throws Exception { + //There is no matching extracted foreign-key of 8 anywhere. + //However, it will still run the join function since this is LEFT join. + List> leftTableEvents = Arrays.asList(new KeyValue<>(1, 8.33f)); + final List> rightTableEvents = Arrays.asList(new KeyValue<>("1", 10L)); //partition 0 + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, true); + + final List> expected = new LinkedList<>(); + expected.add(new KeyValue<>(1, "value1=8.33,value2=null")); + final List> result = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size()); + assertEquals(expected, result); + + //There is also no matching extracted foreign-key for 18 anywhere. + //However, it will still run the join function since this if LEFT join. + leftTableEvents = Arrays.asList(new KeyValue<>(1, 18.0f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + + final List> expected2 = new LinkedList<>(); + expected2.add(new KeyValue<>(1, "value1=18.0,value2=null")); + final List> result2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected2.size()); + assertEquals(expected2, result2); + + + leftTableEvents = Arrays.asList(new KeyValue<>(1, 1.11f)); + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + + final List> expected3 = new LinkedList<>(); + expected3.add(new KeyValue<>(1, "value1=1.11,value2=10")); + final List> result3 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected3.size()); + assertEquals(expected3, result3); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.add(new KeyValue<>(1, "value1=1.11,value2=10")); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doInnerJoinFilterOutRapidlyChangingForeignKeyValues() throws Exception { + final List> leftTableEvents = Arrays.asList( + new KeyValue<>(1, 1.33f), + new KeyValue<>(2, 2.22f), + new KeyValue<>(3, -1.22f), //Won't be joined in + new KeyValue<>(4, -2.22f), //Won't be joined in + new KeyValue<>(5, 2.22f) + ); + + //Partitions pre-computed using the default Murmur2 hash, just to ensure that all 3 partitions will be exercised. + final List> rightTableEvents = Arrays.asList( + new KeyValue<>("0", 0L), //partition 2 + new KeyValue<>("1", 10L), //partition 0 + new KeyValue<>("2", 20L), //partition 2 + new KeyValue<>("3", 30L), //partition 2 + new KeyValue<>("4", 40L), //partition 1 + new KeyValue<>("5", 50L), //partition 0 + new KeyValue<>("6", 60L), //partition 1 + new KeyValue<>("7", 70L), //partition 0 + new KeyValue<>("8", 80L), //partition 0 + new KeyValue<>("9", 90L) //partition 2 + ); + + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); + expected.add(new KeyValue<>(2, "value1=2.22,value2=20")); + expected.add(new KeyValue<>(5, "value1=2.22,value2=20")); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, false); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + + assertEquals(result, expected); + + //Rapidly change the foreign key, to validate that the hashing prevents incorrect results from being output, + //and that eventually the correct value is output. + final List> table1ForeignKeyChange = Arrays.asList( + new KeyValue<>(3, 2.22f), //Partition 2 + new KeyValue<>(3, 3.33f), //Partition 2 + new KeyValue<>(3, 4.44f), //Partition 1 + new KeyValue<>(3, 5.55f), //Partition 0 + new KeyValue<>(3, 9.99f), //Partition 2 + new KeyValue<>(3, 8.88f), //Partition 0 + new KeyValue<>(3, 0.23f), //Partition 2 + new KeyValue<>(3, 7.77f), //Partition 0 + new KeyValue<>(3, 6.66f), //Partition 1 + new KeyValue<>(3, 1.11f) //Partition 0 - This will be the final result. + ); + + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, table1ForeignKeyChange, LEFT_PROD_CONF, MOCK_TIME); + final List> resultTwo = IntegrationTestUtils.readKeyValues(OUTPUT, CONSUMER_CONFIG, 15 * 1000L, Integer.MAX_VALUE); + + final List> expectedTwo = new LinkedList<>(); + expectedTwo.add(new KeyValue<>(3, "value1=1.11,value2=10")); + assertArrayEquals(resultTwo.toArray(), expectedTwo.toArray()); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.addAll(expected); + expMatResults.addAll(expectedTwo); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + @Test + public void doLeftJoinFilterOutRapidlyChangingForeignKeyValues() throws Exception { + final List> leftTableEvents = Arrays.asList( + new KeyValue<>(1, 1.33f), + new KeyValue<>(2, 2.22f) + ); + + //Partitions pre-computed using the default Murmur2 hash, just to ensure that all 3 partitions will be exercised. + final List> rightTableEvents = Arrays.asList( + new KeyValue<>("0", 0L), //partition 2 + new KeyValue<>("1", 10L), //partition 0 + new KeyValue<>("2", 20L), //partition 2 + new KeyValue<>("3", 30L), //partition 2 + new KeyValue<>("4", 40L), //partition 1 + new KeyValue<>("5", 50L), //partition 0 + new KeyValue<>("6", 60L), //partition 1 + new KeyValue<>("7", 70L), //partition 0 + new KeyValue<>("8", 80L), //partition 0 + new KeyValue<>("9", 90L) //partition 2 + ); + + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, leftTableEvents, LEFT_PROD_CONF, MOCK_TIME); + IntegrationTestUtils.produceKeyValuesSynchronously(RIGHT_TABLE, rightTableEvents, RIGHT_PROD_CONF, MOCK_TIME); + + final Set> expected = new HashSet<>(); + expected.add(new KeyValue<>(1, "value1=1.33,value2=10")); + expected.add(new KeyValue<>(2, "value1=2.22,value2=20")); + + final String currentMethodName = new Object() { } + .getClass() + .getEnclosingMethod() + .getName(); + createAndStartStreamsApplication(currentMethodName, false); + + final Set> result = new HashSet<>(IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived( + CONSUMER_CONFIG, + OUTPUT, + expected.size())); + + assertEquals(result, expected); + + //Rapidly change the foreign key, to validate that the hashing prevents incorrect results from being output, + //and that eventually the correct value is output. + final List> table1ForeignKeyChange = Arrays.asList( + new KeyValue<>(3, 2.22f), //Partition 2 + new KeyValue<>(3, 3.33f), //Partition 2 + new KeyValue<>(3, 4.44f), //Partition 1 + new KeyValue<>(3, 5.55f), //Partition 0 + new KeyValue<>(3, 9.99f), //Partition 2 + new KeyValue<>(3, 8.88f), //Partition 0 + new KeyValue<>(3, 0.23f), //Partition 2 + new KeyValue<>(3, 7.77f), //Partition 0 + new KeyValue<>(3, 6.66f), //Partition 1 + new KeyValue<>(3, 1.11f) //Partition 0 - This will be the final result. + ); + + IntegrationTestUtils.produceKeyValuesSynchronously(LEFT_TABLE, table1ForeignKeyChange, LEFT_PROD_CONF, MOCK_TIME); + final List> resultTwo = IntegrationTestUtils.readKeyValues(OUTPUT, CONSUMER_CONFIG, 15 * 1000L, Integer.MAX_VALUE); + + final List> expectedTwo = new LinkedList<>(); + expectedTwo.add(new KeyValue<>(3, "value1=1.11,value2=10")); + + assertArrayEquals(resultTwo.toArray(), expectedTwo.toArray()); + + //Ensure the state stores have the correct values within: + final Set> expMatResults = new HashSet<>(); + expMatResults.addAll(expected); + expMatResults.addAll(expectedTwo); + validateQueryableStoresContainExpectedKeyValues(expMatResults, currentMethodName); + } + + private void createAndStartStreamsApplication(final String queryableStoreName, final boolean leftJoin) { + streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "ktable-ktable-joinOnForeignKey-" + queryableStoreName); + streams = prepareTopology(queryableStoreName, leftJoin); + streamsTwo = prepareTopology(queryableStoreName, leftJoin); + streamsThree = prepareTopology(queryableStoreName, leftJoin); + streams.start(); + streamsTwo.start(); + streamsThree.start(); + } + + // These are hardwired into the test logic for readability sake. + // Do not change unless you want to change all the test results as well. + private ValueJoiner joiner = (value1, value2) -> "value1=" + value1 + ",value2=" + value2; + //Do not change. See above comment. + private Function tableOneKeyExtractor = value -> Integer.toString((int) value.floatValue()); + + private void validateQueryableStoresContainExpectedKeyValues(final Set> expectedResult, + final String queryableStoreName) { + final ReadOnlyKeyValueStore myJoinStoreOne = streams.store(queryableStoreName, + QueryableStoreTypes.keyValueStore()); + + final ReadOnlyKeyValueStore myJoinStoreTwo = streamsTwo.store(queryableStoreName, + QueryableStoreTypes.keyValueStore()); + + final ReadOnlyKeyValueStore myJoinStoreThree = streamsThree.store(queryableStoreName, + QueryableStoreTypes.keyValueStore()); + + // store only keeps last set of values, not entire stream of value changes + final Map expectedInStore = new HashMap<>(); + for (final KeyValue expected : expectedResult) { + expectedInStore.put(expected.key, expected.value); + } + + // depending on partition assignment, the values will be in one of the three stream clients. + for (final Map.Entry expected : expectedInStore.entrySet()) { + final String one = myJoinStoreOne.get(expected.getKey()); + final String two = myJoinStoreTwo.get(expected.getKey()); + final String three = myJoinStoreThree.get(expected.getKey()); + + String result; + if (one != null) + result = one; + else if (two != null) + result = two; + else if (three != null) + result = three; + else + throw new RuntimeException("Cannot find key " + expected.getKey() + " in any of the state stores"); + assertEquals(expected.getValue(), result); + } + + //Merge all the iterators together to ensure that their sum equals the total set of expected elements. + final KeyValueIterator allOne = myJoinStoreOne.all(); + final KeyValueIterator allTwo = myJoinStoreTwo.all(); + final KeyValueIterator allThree = myJoinStoreThree.all(); + + final List> all = new LinkedList<>(); + + while (allOne.hasNext()) { + all.add(allOne.next()); + } + while (allTwo.hasNext()) { + all.add(allTwo.next()); + } + while (allThree.hasNext()) { + all.add(allThree.next()); + } + allOne.close(); + allTwo.close(); + allThree.close(); + + for (final KeyValue elem : all) { + assertTrue(expectedResult.contains(elem)); + } + } + + private KafkaStreams prepareTopology(final String queryableStoreName, final boolean leftJoin) { + final StreamsBuilder builder = new StreamsBuilder(); + + final KTable left = builder.table(LEFT_TABLE, Consumed.with(Serdes.Integer(), Serdes.Float())); + final KTable right = builder.table(RIGHT_TABLE, Consumed.with(Serdes.String(), Serdes.Long())); + + final Materialized> materialized; + if (queryableStoreName != null) { + materialized = Materialized.>as(queryableStoreName) + .withKeySerde(Serdes.Integer()) + .withValueSerde(Serdes.String()) + .withCachingDisabled(); + } else { + throw new RuntimeException("Current implementation of join on foreign key requires a materialized store"); + } + + if (leftJoin) + left.leftJoin(right, tableOneKeyExtractor, joiner, Named.as("customName"), materialized) + .toStream() + .to(OUTPUT, Produced.with(Serdes.Integer(), Serdes.String())); + else + left.join(right, tableOneKeyExtractor, joiner, materialized) + .toStream() + .to(OUTPUT, Produced.with(Serdes.Integer(), Serdes.String())); + + final Topology topology = builder.build(streamsConfig); + + return new KafkaStreams(topology, streamsConfig); + } +} diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchemaTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchemaTest.java new file mode 100644 index 00000000000..47348038521 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/CombinedKeySchemaTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.utils.Bytes; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +public class CombinedKeySchemaTest { + + @Test + public void nonNullPrimaryKeySerdeTest() { + final CombinedKeySchema cks = new CombinedKeySchema<>("someTopic", Serdes.String(), Serdes.Integer()); + final Integer primary = -999; + final Bytes result = cks.toBytes("foreignKey", primary); + + final CombinedKey deserializedKey = cks.fromBytes(result); + assertEquals("foreignKey", deserializedKey.getForeignKey()); + assertEquals(primary, deserializedKey.getPrimaryKey()); + } + + @Test(expected = NullPointerException.class) + public void nullPrimaryKeySerdeTest() { + final CombinedKeySchema cks = new CombinedKeySchema<>("someTopic", Serdes.String(), Serdes.Integer()); + cks.toBytes("foreignKey", null); + } + + @Test(expected = NullPointerException.class) + public void nullForeignKeySerdeTest() { + final CombinedKeySchema cks = new CombinedKeySchema<>("someTopic", Serdes.String(), Serdes.Integer()); + cks.toBytes(null, 10); + } + + @Test + public void prefixKeySerdeTest() { + final CombinedKeySchema cks = new CombinedKeySchema<>("someTopic", Serdes.String(), Serdes.Integer()); + final String foreignKey = "someForeignKey"; + final byte[] foreignKeySerializedData = Serdes.String().serializer().serialize("someTopic", foreignKey); + final Bytes prefix = cks.prefixBytes(foreignKey); + + final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES + foreignKeySerializedData.length); + buf.putInt(foreignKeySerializedData.length); + buf.put(foreignKeySerializedData); + final Bytes expectedPrefixBytes = Bytes.wrap(buf.array()); + + assertEquals(expectedPrefixBytes, prefix); + } + + @Test(expected = NullPointerException.class) + public void nullPrefixKeySerdeTest() { + final CombinedKeySchema cks = new CombinedKeySchema<>("someTopic", Serdes.String(), Serdes.Integer()); + final String foreignKey = null; + cks.prefixBytes(foreignKey); + } +} diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java new file mode 100644 index 00000000000..fde9bddfac3 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.streams.state.internals.Murmur3; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNull; + +public class SubscriptionResponseWrapperSerdeTest { + + @Test + @SuppressWarnings("unchecked") + public void ShouldSerdeWithNonNullsTest() { + final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0x01, (byte) 0x9A, (byte) 0xFF, (byte) 0x00}); + final String foreignValue = "foreignValue"; + final SubscriptionResponseWrapper srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue); + final SubscriptionResponseWrapperSerde srwSerde = new SubscriptionResponseWrapperSerde(Serdes.String()); + final byte[] serResponse = srwSerde.serializer().serialize(null, srw); + final SubscriptionResponseWrapper result = (SubscriptionResponseWrapper) srwSerde.deserializer().deserialize(null, serResponse); + + assertArrayEquals(hashedValue, result.getOriginalValueHash()); + assertEquals(foreignValue, result.getForeignValue()); + } + + @Test + @SuppressWarnings("unchecked") + public void shouldSerdeWithNullForeignValueTest() { + final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0x01, (byte) 0x9A, (byte) 0xFF, (byte) 0x00}); + final SubscriptionResponseWrapper srw = new SubscriptionResponseWrapper<>(hashedValue, null); + final SubscriptionResponseWrapperSerde srwSerde = new SubscriptionResponseWrapperSerde(Serdes.String()); + final byte[] serResponse = srwSerde.serializer().serialize(null, srw); + final SubscriptionResponseWrapper result = (SubscriptionResponseWrapper) srwSerde.deserializer().deserialize(null, serResponse); + + assertArrayEquals(hashedValue, result.getOriginalValueHash()); + assertNull(result.getForeignValue()); + } + + @Test + @SuppressWarnings("unchecked") + public void shouldSerdeWithNullHashTest() { + final long[] hashedValue = null; + final String foreignValue = "foreignValue"; + final SubscriptionResponseWrapper srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue); + final SubscriptionResponseWrapperSerde srwSerde = new SubscriptionResponseWrapperSerde(Serdes.String()); + final byte[] serResponse = srwSerde.serializer().serialize(null, srw); + final SubscriptionResponseWrapper result = (SubscriptionResponseWrapper) srwSerde.deserializer().deserialize(null, serResponse); + + assertArrayEquals(hashedValue, result.getOriginalValueHash()); + assertEquals(foreignValue, result.getForeignValue()); + } + + @Test + @SuppressWarnings("unchecked") + public void shouldSerdeWithNullsTest() { + final long[] hashedValue = null; + final String foreignValue = null; + final SubscriptionResponseWrapper srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue); + final SubscriptionResponseWrapperSerde srwSerde = new SubscriptionResponseWrapperSerde(Serdes.String()); + final byte[] serResponse = srwSerde.serializer().serialize(null, srw); + final SubscriptionResponseWrapper result = (SubscriptionResponseWrapper) srwSerde.deserializer().deserialize(null, serResponse); + + assertArrayEquals(hashedValue, result.getOriginalValueHash()); + assertEquals(foreignValue, result.getForeignValue()); + } + + @Test (expected = UnsupportedVersionException.class) + @SuppressWarnings("unchecked") + public void shouldThrowExceptionWithBadVersionTest() { + final long[] hashedValue = null; + new SubscriptionResponseWrapper<>(hashedValue, "foreignValue", (byte) 0xFF); + } +} diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java new file mode 100644 index 00000000000..d948d1f2a35 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.kstream.internals.foreignkeyjoin; + +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.streams.state.internals.Murmur3; +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +@SuppressWarnings({"unchecked", "rawtypes"}) +public class SubscriptionWrapperSerdeTest { + + @Test + @SuppressWarnings("unchecked") + public void shouldSerdeTest() { + final String originalKey = "originalKey"; + final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(Serdes.String()); + final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19}); + final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, originalKey); + final byte[] serialized = swSerde.serializer().serialize(null, wrapper); + final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer().deserialize(null, serialized); + + assertEquals(SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, deserialized.getInstruction()); + assertArrayEquals(hashedValue, deserialized.getHash()); + assertEquals(originalKey, deserialized.getPrimaryKey()); + } + + @Test + @SuppressWarnings("unchecked") + public void shouldSerdeNullHashTest() { + final String originalKey = "originalKey"; + final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(Serdes.String()); + final long[] hashedValue = null; + final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey); + final byte[] serialized = swSerde.serializer().serialize(null, wrapper); + final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer().deserialize(null, serialized); + + assertEquals(SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, deserialized.getInstruction()); + assertArrayEquals(hashedValue, deserialized.getHash()); + assertEquals(originalKey, deserialized.getPrimaryKey()); + } + + @Test (expected = NullPointerException.class) + @SuppressWarnings("unchecked") + public void shouldThrowExceptionOnNullKeyTest() { + final String originalKey = null; + final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(Serdes.String()); + final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19}); + final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey); + swSerde.serializer().serialize(null, wrapper); + } + + @Test (expected = NullPointerException.class) + @SuppressWarnings("unchecked") + public void shouldThrowExceptionOnNullInstructionTest() { + final String originalKey = "originalKey"; + final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(Serdes.String()); + final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19}); + final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, null, originalKey); + swSerde.serializer().serialize(null, wrapper); + } + + @Test (expected = UnsupportedVersionException.class) + public void shouldThrowExceptionOnUnsupportedVersionTest() { + final String originalKey = "originalKey"; + final long[] hashedValue = null; + new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey, (byte) 0x80); + } +} diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreTest.java index bdf0379ddb3..8db040ea4b7 100644 --- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreTest.java @@ -87,7 +87,7 @@ public class RocksDBKeyValueStoreTest extends AbstractKeyValueStoreTest { } @Test - public void shouldCloseOpenIteratorsWhenStoreClosedAndThrowInvalidStateStoreOnHasNextAndNext() { + public void shouldCloseOpenRangeIteratorsWhenStoreClosedAndThrowInvalidStateStoreOnHasNextAndNext() { context.setTime(1L); store.put(1, "hi"); store.put(2, "goodbye"); @@ -127,5 +127,4 @@ public class RocksDBKeyValueStoreTest extends AbstractKeyValueStoreTest { // ok } } - } diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBStoreTest.java index 4c1932e2853..0a28a773e9a 100644 --- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBStoreTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBStoreTest.java @@ -557,7 +557,7 @@ public class RocksDBStoreTest { rocksDBStore.init(context, rocksDBStore); try { rocksDBStore.range(null, new Bytes(stringSerializer.serialize(null, "2"))); - fail("Should have thrown NullPointerException on deleting null key"); + fail("Should have thrown NullPointerException on null range key"); } catch (final NullPointerException e) { // this is good } diff --git a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala index 754f1cc1aad..7cd3ac83283 100644 --- a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala +++ b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala @@ -61,6 +61,12 @@ private[scala] object FunctionsCompatConversions { } } + implicit class FunctionFromFunction[V, VR](val f: V => VR) extends AnyVal { + def asJavaFunction: java.util.function.Function[V, VR] = new java.util.function.Function[V, VR] { + override def apply(value: V): VR = f(value) + } + } + implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal { def asValueMapper: ValueMapper[V, VR] = new ValueMapper[V, VR] { override def apply(value: V): VR = f(value) diff --git a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala index 56549ad3875..288550bc47d 100644 --- a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala +++ b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala @@ -21,7 +21,7 @@ package org.apache.kafka.streams.scala package kstream import org.apache.kafka.common.utils.Bytes -import org.apache.kafka.streams.kstream.{Suppressed, ValueTransformerWithKeySupplier, KTable => KTableJ} +import org.apache.kafka.streams.kstream.{Suppressed, ValueJoiner, ValueTransformerWithKeySupplier, KTable => KTableJ} import org.apache.kafka.streams.scala.FunctionsCompatConversions._ import org.apache.kafka.streams.scala.ImplicitConversions._ import org.apache.kafka.streams.state.KeyValueStore @@ -318,6 +318,42 @@ class KTable[K, V](val inner: KTableJ[K, V]) { ): KTable[K, VR] = inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized) + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO](other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTable[K, VR] = + inner.join(other.inner, keyExtractor.asJavaFunction, joiner, materialized) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO](other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTable[K, VR] = + inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, materialized) + /** * Get the name of the local state store used that can be used to query this [[KTable]]. *