diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh
index fcf442b8cfa..2551338e094 100755
--- a/bin/kafka-run-class.sh
+++ b/bin/kafka-run-class.sh
@@ -57,7 +57,7 @@ do
CLASSPATH=$CLASSPATH:$file
done
-for file in $base_dir/stream/build/libs/kafka-streams*.jar;
+for file in $base_dir/streams/build/libs/kafka-streams*.jar;
do
CLASSPATH=$CLASSPATH:$file
done
diff --git a/build.gradle b/build.gradle
index e24279ea6ba..224f550403c 100644
--- a/build.gradle
+++ b/build.gradle
@@ -561,6 +561,8 @@ project(':streams') {
compile project(':clients')
compile "$slf4jlog4j"
compile 'org.rocksdb:rocksdbjni:3.10.1'
+ compile 'com.101tec:zkclient:0.7' // this dependency should be removed after KIP-4
+ compile "com.fasterxml.jackson.core:jackson-databind:$jackson_version" // this dependency should be removed after KIP-4
testCompile "$junit"
testCompile project(path: ':clients', configuration: 'archives')
diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml
index e221dce633b..a65a2dc3ad0 100644
--- a/checkstyle/import-control.xml
+++ b/checkstyle/import-control.xml
@@ -129,6 +129,14 @@
+
+
+
+
+
+
+
+
diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreaming.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreaming.java
index fc1fdae2f1b..0d99739959a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreaming.java
+++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreaming.java
@@ -73,9 +73,7 @@ public class KafkaStreaming {
private static final Logger log = LoggerFactory.getLogger(KafkaStreaming.class);
private static final AtomicInteger STREAMING_CLIENT_ID_SEQUENCE = new AtomicInteger(1);
- private static final String JMX_PREFIX = "kafka.streaming";
-
- private final Time time;
+ private static final String JMX_PREFIX = "kafka.streams";
// container states
private static final int CREATED = 0;
@@ -85,29 +83,39 @@ public class KafkaStreaming {
private final StreamThread[] threads;
- private String clientId;
- private final UUID uuid;
- private final Metrics metrics;
+ // processId is expected to be unique across JVMs and to be used
+ // in userData of the subscription request to allow assignor be aware
+ // of the co-location of stream thread's consumers. It is for internal
+ // usage only and should not be exposed to users at all.
+ private final UUID processId;
public KafkaStreaming(TopologyBuilder builder, StreamingConfig config) throws Exception {
// create the metrics
- this.time = new SystemTime();
- this.uuid = UUID.randomUUID();
+ Time time = new SystemTime();
+
+ this.processId = UUID.randomUUID();
+
+ String jobId = config.getString(StreamingConfig.JOB_ID_CONFIG);
+ if (jobId.length() <= 0)
+ jobId = "kafka-streams";
+
+ String clientId = config.getString(StreamingConfig.CLIENT_ID_CONFIG);
+ if (clientId.length() <= 0)
+ clientId = jobId + "-" + STREAMING_CLIENT_ID_SEQUENCE.getAndIncrement();
+
+ List reporters = config.getConfiguredInstances(StreamingConfig.METRIC_REPORTER_CLASSES_CONFIG,
+ MetricsReporter.class);
+ reporters.add(new JmxReporter(JMX_PREFIX));
MetricConfig metricConfig = new MetricConfig().samples(config.getInt(StreamingConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(StreamingConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG),
TimeUnit.MILLISECONDS);
- clientId = config.getString(StreamingConfig.CLIENT_ID_CONFIG);
- if (clientId.length() <= 0)
- clientId = "streaming-" + STREAMING_CLIENT_ID_SEQUENCE.getAndIncrement();
- List reporters = config.getConfiguredInstances(StreamingConfig.METRIC_REPORTER_CLASSES_CONFIG,
- MetricsReporter.class);
- reporters.add(new JmxReporter(JMX_PREFIX));
- this.metrics = new Metrics(metricConfig, reporters, time);
+
+ Metrics metrics = new Metrics(metricConfig, reporters, time);
this.threads = new StreamThread[config.getInt(StreamingConfig.NUM_STREAM_THREADS_CONFIG)];
for (int i = 0; i < this.threads.length; i++) {
- this.threads[i] = new StreamThread(builder, config, this.clientId, this.uuid, this.metrics, this.time);
+ this.threads[i] = new StreamThread(builder, config, jobId, clientId, processId, metrics, time);
}
}
diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamingConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamingConfig.java
index 437afd8b81e..e89d03086da 100644
--- a/streams/src/main/java/org/apache/kafka/streams/StreamingConfig.java
+++ b/streams/src/main/java/org/apache/kafka/streams/StreamingConfig.java
@@ -42,6 +42,10 @@ public class StreamingConfig extends AbstractConfig {
public static final String STATE_DIR_CONFIG = "state.dir";
private static final String STATE_DIR_DOC = "Directory location for state store.";
+ /** zookeeper.connect
*/
+ public static final String ZOOKEEPER_CONNECT_CONFIG = "zookeeper.connect";
+ private static final String ZOOKEEPER_CONNECT_DOC = "Zookeeper connect string for Kafka topics management.";
+
/** commit.interval.ms
*/
public static final String COMMIT_INTERVAL_MS_CONFIG = "commit.interval.ms";
private static final String COMMIT_INTERVAL_MS_DOC = "The frequency with which to save the position of the processor.";
@@ -83,8 +87,9 @@ public class StreamingConfig extends AbstractConfig {
public static final String PARTITION_GROUPER_CLASS_CONFIG = "partition.grouper";
private static final String PARTITION_GROUPER_CLASS_DOC = "Partition grouper class that implements the PartitionGrouper
interface.";
- /** client.id
*/
- public static final String CLIENT_ID_CONFIG = CommonClientConfigs.CLIENT_ID_CONFIG;
+ /** job.id
*/
+ public static final String JOB_ID_CONFIG = "job.id";
+ public static final String JOB_ID_DOC = "An id string to identify for the stream job. It is used as 1) the default client-id prefix, 2) the group-id for membership management, 3) the changelog topic prefix.";
/** key.serializer
*/
public static final String KEY_SERIALIZER_CLASS_CONFIG = ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG;
@@ -107,19 +112,30 @@ public class StreamingConfig extends AbstractConfig {
/** metric.reporters
*/
public static final String METRIC_REPORTER_CLASSES_CONFIG = CommonClientConfigs.METRIC_REPORTER_CLASSES_CONFIG;
- /**
- * bootstrap.servers
- */
+ /** bootstrap.servers
*/
public static final String BOOTSTRAP_SERVERS_CONFIG = CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG;
+ /** client.id
*/
+ public static final String CLIENT_ID_CONFIG = CommonClientConfigs.CLIENT_ID_CONFIG;
+
private static final String SYSTEM_TEMP_DIRECTORY = System.getProperty("java.io.tmpdir");
static {
- CONFIG = new ConfigDef().define(CLIENT_ID_CONFIG,
+ CONFIG = new ConfigDef().define(JOB_ID_CONFIG,
+ Type.STRING,
+ "",
+ Importance.MEDIUM,
+ StreamingConfig.JOB_ID_DOC)
+ .define(CLIENT_ID_CONFIG,
Type.STRING,
"",
Importance.MEDIUM,
CommonClientConfigs.CLIENT_ID_DOC)
+ .define(ZOOKEEPER_CONNECT_CONFIG,
+ Type.STRING,
+ "",
+ Importance.HIGH,
+ StreamingConfig.ZOOKEEPER_CONNECT_DOC)
.define(STATE_DIR_CONFIG,
Type.STRING,
SYSTEM_TEMP_DIRECTORY,
@@ -221,20 +237,27 @@ public class StreamingConfig extends AbstractConfig {
super(CONFIG, props);
}
- public Map getConsumerConfigs(StreamThread streamThread) {
+ public Map getConsumerConfigs(StreamThread streamThread, String groupId, String clientId) {
Map props = getBaseConsumerConfigs();
+
+ props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+ props.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId + "-consumer");
props.put(StreamingConfig.NUM_STANDBY_REPLICAS_CONFIG, getInt(StreamingConfig.NUM_STANDBY_REPLICAS_CONFIG));
- props.put(StreamingConfig.InternalConfig.STREAM_THREAD_INSTANCE, streamThread);
props.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, KafkaStreamingPartitionAssignor.class.getName());
+
+ props.put(StreamingConfig.InternalConfig.STREAM_THREAD_INSTANCE, streamThread);
+
return props;
}
- public Map getRestoreConsumerConfigs() {
+ public Map getRestoreConsumerConfigs(String clientId) {
Map props = getBaseConsumerConfigs();
- // no group id for a restore consumer
+ // no need to set group id for a restore consumer
props.remove(ConsumerConfig.GROUP_ID_CONFIG);
+ props.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId + "-restore-consumer");
+
return props;
}
@@ -248,11 +271,12 @@ public class StreamingConfig extends AbstractConfig {
props.remove(StreamingConfig.KEY_SERIALIZER_CLASS_CONFIG);
props.remove(StreamingConfig.VALUE_SERIALIZER_CLASS_CONFIG);
props.remove(StreamingConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG);
+ props.remove(StreamingConfig.NUM_STANDBY_REPLICAS_CONFIG);
return props;
}
- public Map getProducerConfigs() {
+ public Map getProducerConfigs(String clientId) {
Map props = this.originals();
// set producer default property values
@@ -263,6 +287,8 @@ public class StreamingConfig extends AbstractConfig {
props.remove(StreamingConfig.VALUE_DESERIALIZER_CLASS_CONFIG);
props.remove(StreamingConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG);
+ props.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId + "-producer");
+
return props;
}
diff --git a/streams/src/main/java/org/apache/kafka/streams/examples/KStreamJob.java b/streams/src/main/java/org/apache/kafka/streams/examples/KStreamJob.java
index 87368c1c465..819bd687b18 100644
--- a/streams/src/main/java/org/apache/kafka/streams/examples/KStreamJob.java
+++ b/streams/src/main/java/org/apache/kafka/streams/examples/KStreamJob.java
@@ -34,7 +34,7 @@ public class KStreamJob {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
- props.put(StreamingConfig.CLIENT_ID_CONFIG, "Example-KStream-Job");
+ props.put(StreamingConfig.JOB_ID_CONFIG, "example-kstream");
props.put(StreamingConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamingConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(StreamingConfig.VALUE_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
diff --git a/streams/src/main/java/org/apache/kafka/streams/examples/ProcessorJob.java b/streams/src/main/java/org/apache/kafka/streams/examples/ProcessorJob.java
index 882c7ed0fbd..2d0b79fa075 100644
--- a/streams/src/main/java/org/apache/kafka/streams/examples/ProcessorJob.java
+++ b/streams/src/main/java/org/apache/kafka/streams/examples/ProcessorJob.java
@@ -49,7 +49,7 @@ public class ProcessorJob {
public void init(ProcessorContext context) {
this.context = context;
this.context.schedule(1000);
- this.kvStore = (KeyValueStore) context.getStateStore("local-state");
+ this.kvStore = (KeyValueStore) context.getStateStore("LOCAL-STATE");
}
@Override
@@ -90,8 +90,9 @@ public class ProcessorJob {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
- props.put(StreamingConfig.CLIENT_ID_CONFIG, "Example-Processor-Job");
+ props.put(StreamingConfig.JOB_ID_CONFIG, "example-processor");
props.put(StreamingConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+ props.put(StreamingConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
props.put(StreamingConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(StreamingConfig.VALUE_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
props.put(StreamingConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
@@ -104,8 +105,7 @@ public class ProcessorJob {
builder.addSource("SOURCE", new StringDeserializer(), new StringDeserializer(), "topic-source");
builder.addProcessor("PROCESS", new MyProcessorSupplier(), "SOURCE");
- builder.addStateStore(Stores.create("local-state").withStringKeys().withIntegerValues().inMemory().build());
- builder.connectProcessorAndStateStores("local-state", "PROCESS");
+ builder.addStateStore(Stores.create("LOCAL-STATE").withStringKeys().withIntegerValues().inMemory().build(), "PROCESS");
builder.addSink("SINK", "topic-sink", new StringSerializer(), new IntegerSerializer(), "PROCESS");
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/SlidingWindowSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/SlidingWindowSupplier.java
index 0cf969f744f..80e548f94c7 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/SlidingWindowSupplier.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/SlidingWindowSupplier.java
@@ -85,7 +85,7 @@ public class SlidingWindowSupplier implements WindowSupplier {
this.context = context;
this.partition = context.id().partition;
SlidingWindowRegistryCallback restoreFunc = new SlidingWindowRegistryCallback();
- context.register(this, restoreFunc);
+ context.register(this, true, restoreFunc);
for (ValueList valueList : map.values()) {
valueList.clearDirtyValues();
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
index 5b2b0313502..47c9b09ee6c 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
@@ -177,7 +177,8 @@ public class KTableImpl implements KTable {
if (!source.isMaterialized()) {
StateStoreSupplier storeSupplier =
new KTableStoreSupplier(topic, keySerializer, keyDeserializer, valSerializer, valDeserializer, null);
- topology.addStateStore(storeSupplier, name);
+ // mark this state is non internal hence it is read directly from a user topic
+ topology.addStateStore(storeSupplier, false, name);
source.materialize();
}
}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java b/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java
index 7d2188a2f2c..923a2173a99 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java
@@ -29,9 +29,9 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-public class DefaultPartitionGrouper extends PartitionGrouper {
+public class DefaultPartitionGrouper implements PartitionGrouper {
- public Map> partitionGroups(Cluster metadata) {
+ public Map> partitionGroups(Map> topicGroups, Cluster metadata) {
Map> groups = new HashMap<>();
for (Map.Entry> entry : topicGroups.entrySet()) {
@@ -71,3 +71,6 @@ public class DefaultPartitionGrouper extends PartitionGrouper {
}
}
+
+
+
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java b/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java
index 187c4ce11f4..a40a1c42aa4 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java
@@ -19,39 +19,18 @@ package org.apache.kafka.streams.processor;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.streams.processor.internals.KafkaStreamingPartitionAssignor;
import java.util.Map;
import java.util.Set;
-public abstract class PartitionGrouper {
-
- protected Map> topicGroups;
-
- private KafkaStreamingPartitionAssignor partitionAssignor = null;
+public interface PartitionGrouper {
/**
* Returns a map of task ids to groups of partitions.
*
- * @param metadata
+ * @param topicGroups The subscribed topic groups
+ * @param metadata Metadata of the consuming cluster
* @return a map of task ids to groups of partitions
*/
- public abstract Map> partitionGroups(Cluster metadata);
-
- public void topicGroups(Map> topicGroups) {
- this.topicGroups = topicGroups;
- }
-
- public void partitionAssignor(KafkaStreamingPartitionAssignor partitionAssignor) {
- this.partitionAssignor = partitionAssignor;
- }
-
- public Set taskIds(TopicPartition partition) {
- return partitionAssignor.taskIds(partition);
- }
-
- public Map> standbyTasks() {
- return partitionAssignor.standbyTasks();
- }
-
-}
+ Map> partitionGroups(Map> topicGroups, Cluster metadata);
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java
index 88ac64e29b0..fa19ed798a4 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java
@@ -79,7 +79,7 @@ public interface ProcessorContext {
*
* @param store the storage engine
*/
- void register(StateStore store, StateRestoreCallback stateRestoreCallback);
+ void register(StateStore store, boolean loggingEnabled, StateRestoreCallback stateRestoreCallback);
StateStore getStateStore(String name);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
index 021a47ffd20..3cfb22bd936 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
@@ -45,14 +45,17 @@ import java.util.Set;
* its child nodes. A {@link Processor processor} is a node in the graph that receives input messages from upstream nodes,
* processes that message, and optionally forwarding new messages to one or all of its children. Finally, a {@link SinkNode sink}
* is a node in the graph that receives messages from upstream nodes and writes them to a Kafka topic. This builder allows you
- * to construct an acyclic graph of these nodes, and the builder is then passed into a new {@link KafkaStreaming} instance
- * that will then {@link KafkaStreaming#start() begin consuming, processing, and producing messages}.
+ * to construct an acyclic graph of these nodes, and the builder is then passed into a new {@link org.apache.kafka.streams.KafkaStreaming}
+ * instance that will then {@link org.apache.kafka.streams.KafkaStreaming#start() begin consuming, processing, and producing messages}.
*/
public class TopologyBuilder {
// node factories in a topological order
private final LinkedHashMap nodeFactories = new LinkedHashMap<>();
+ // state factories
+ private final Map stateFactories = new HashMap<>();
+
private final Set sourceTopicNames = new HashSet<>();
private final QuickUnion nodeGrouper = new QuickUnion<>();
@@ -60,8 +63,18 @@ public class TopologyBuilder {
private final HashMap nodeToTopics = new HashMap<>();
private Map> nodeGroups = null;
- private Map stateStores = new HashMap<>();
- private Map> stateStoreUsers = new HashMap();
+ private static class StateStoreFactory {
+ public final Set users;
+
+ public final boolean isInternal;
+ public final StateStoreSupplier supplier;
+
+ StateStoreFactory(boolean isInternal, StateStoreSupplier supplier) {
+ this.isInternal = isInternal;
+ this.supplier = supplier;
+ this.users = new HashSet<>();
+ }
+ }
private static abstract class NodeFactory {
public final String name;
@@ -88,6 +101,7 @@ public class TopologyBuilder {
stateStoreNames.add(stateStoreName);
}
+ @SuppressWarnings("unchecked")
@Override
public ProcessorNode build() {
return new ProcessorNode(name, supplier.get(), stateStoreNames);
@@ -106,6 +120,7 @@ public class TopologyBuilder {
this.valDeserializer = valDeserializer;
}
+ @SuppressWarnings("unchecked")
@Override
public ProcessorNode build() {
return new SourceNode(name, keyDeserializer, valDeserializer);
@@ -125,12 +140,40 @@ public class TopologyBuilder {
this.keySerializer = keySerializer;
this.valSerializer = valSerializer;
}
+
+ @SuppressWarnings("unchecked")
@Override
public ProcessorNode build() {
return new SinkNode(name, topic, keySerializer, valSerializer);
}
}
+ public static class TopicsInfo {
+ public Set sourceTopics;
+ public Set stateNames;
+
+ public TopicsInfo(Set sourceTopics, Set stateNames) {
+ this.sourceTopics = sourceTopics;
+ this.stateNames = stateNames;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof TopicsInfo) {
+ TopicsInfo other = (TopicsInfo) o;
+ return other.sourceTopics.equals(this.sourceTopics) && other.stateNames.equals(this.stateNames);
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ long n = ((long) sourceTopics.hashCode() << 32) | (long) stateNames.hashCode();
+ return (int) (n % 0xFFFFFFFFL);
+ }
+ }
+
/**
* Create a new builder.
*/
@@ -138,9 +181,9 @@ public class TopologyBuilder {
/**
* Add a new source that consumes the named topics and forwards the messages to child processor and/or sink nodes.
- * The source will use the {@link StreamingConfig#KEY_DESERIALIZER_CLASS_CONFIG default key deserializer} and
- * {@link StreamingConfig#VALUE_DESERIALIZER_CLASS_CONFIG default value deserializer} specified in the
- * {@link StreamingConfig streaming configuration}.
+ * The source will use the {@link org.apache.kafka.streams.StreamingConfig#KEY_DESERIALIZER_CLASS_CONFIG default key deserializer} and
+ * {@link org.apache.kafka.streams.StreamingConfig#VALUE_DESERIALIZER_CLASS_CONFIG default value deserializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
@@ -158,11 +201,11 @@ public class TopologyBuilder {
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param keyDeserializer the {@link Deserializer key deserializer} used when consuming messages; may be null if the source
- * should use the {@link StreamingConfig#KEY_DESERIALIZER_CLASS_CONFIG default key deserializer} specified in the
- * {@link StreamingConfig streaming configuration}
+ * should use the {@link org.apache.kafka.streams.StreamingConfig#KEY_DESERIALIZER_CLASS_CONFIG default key deserializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}
* @param valDeserializer the {@link Deserializer value deserializer} used when consuming messages; may be null if the source
- * should use the {@link StreamingConfig#VALUE_DESERIALIZER_CLASS_CONFIG default value deserializer} specified in the
- * {@link StreamingConfig streaming configuration}
+ * should use the {@link org.apache.kafka.streams.StreamingConfig#VALUE_DESERIALIZER_CLASS_CONFIG default value deserializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}
* @param topics the name of one or more Kafka topics that this source is to consume
* @return this builder instance so methods can be chained together; never null
*/
@@ -186,9 +229,9 @@ public class TopologyBuilder {
/**
* Add a new sink that forwards messages from upstream parent processor and/or source nodes to the named Kafka topic.
- * The sink will use the {@link StreamingConfig#KEY_SERIALIZER_CLASS_CONFIG default key serializer} and
- * {@link StreamingConfig#VALUE_SERIALIZER_CLASS_CONFIG default value serializer} specified in the
- * {@link StreamingConfig streaming configuration}.
+ * The sink will use the {@link org.apache.kafka.streams.StreamingConfig#KEY_SERIALIZER_CLASS_CONFIG default key serializer} and
+ * {@link org.apache.kafka.streams.StreamingConfig#VALUE_SERIALIZER_CLASS_CONFIG default value serializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its messages
@@ -205,11 +248,11 @@ public class TopologyBuilder {
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its messages
* @param keySerializer the {@link Serializer key serializer} used when consuming messages; may be null if the sink
- * should use the {@link StreamingConfig#KEY_SERIALIZER_CLASS_CONFIG default key serializer} specified in the
- * {@link StreamingConfig streaming configuration}
+ * should use the {@link org.apache.kafka.streams.StreamingConfig#KEY_SERIALIZER_CLASS_CONFIG default key serializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}
* @param valSerializer the {@link Serializer value serializer} used when consuming messages; may be null if the sink
- * should use the {@link StreamingConfig#VALUE_SERIALIZER_CLASS_CONFIG default value serializer} specified in the
- * {@link StreamingConfig streaming configuration}
+ * should use the {@link org.apache.kafka.streams.StreamingConfig#VALUE_SERIALIZER_CLASS_CONFIG default value serializer} specified in the
+ * {@link org.apache.kafka.streams.StreamingConfig streaming configuration}
* @param parentNames the name of one or more source or processor nodes whose output message this sink should consume
* and write to its topic
* @return this builder instance so methods can be chained together; never null
@@ -271,12 +314,12 @@ public class TopologyBuilder {
* @param supplier the supplier used to obtain this state store {@link StateStore} instance
* @return this builder instance so methods can be chained together; never null
*/
- public final TopologyBuilder addStateStore(StateStoreSupplier supplier, String... processorNames) {
- if (stateStores.containsKey(supplier.name())) {
+ public final TopologyBuilder addStateStore(StateStoreSupplier supplier, boolean isInternal, String... processorNames) {
+ if (stateFactories.containsKey(supplier.name())) {
throw new TopologyException("StateStore " + supplier.name() + " is already added.");
}
- stateStores.put(supplier.name(), supplier);
- stateStoreUsers.put(supplier.name(), new HashSet());
+
+ stateFactories.put(supplier.name(), new StateStoreFactory(isInternal, supplier));
if (processorNames != null) {
for (String processorName : processorNames) {
@@ -287,6 +330,16 @@ public class TopologyBuilder {
return this;
}
+ /**
+ * Adds a state store
+ *
+ * @param supplier the supplier used to obtain this state store {@link StateStore} instance
+ * @return this builder instance so methods can be chained together; never null
+ */
+ public final TopologyBuilder addStateStore(StateStoreSupplier supplier, String... processorNames) {
+ return this.addStateStore(supplier, true, processorNames);
+ }
+
/**
* Connects the processor and the state stores
*
@@ -305,22 +358,22 @@ public class TopologyBuilder {
}
private void connectProcessorAndStateStore(String processorName, String stateStoreName) {
- if (!stateStores.containsKey(stateStoreName))
+ if (!stateFactories.containsKey(stateStoreName))
throw new TopologyException("StateStore " + stateStoreName + " is not added yet.");
if (!nodeFactories.containsKey(processorName))
throw new TopologyException("Processor " + processorName + " is not added yet.");
- Set users = stateStoreUsers.get(stateStoreName);
- Iterator iter = users.iterator();
+ StateStoreFactory stateStoreFactory = stateFactories.get(stateStoreName);
+ Iterator iter = stateStoreFactory.users.iterator();
if (iter.hasNext()) {
String user = iter.next();
nodeGrouper.unite(user, processorName);
}
- users.add(processorName);
+ stateStoreFactory.users.add(processorName);
- NodeFactory factory = nodeFactories.get(processorName);
- if (factory instanceof ProcessorNodeFactory) {
- ((ProcessorNodeFactory) factory).addStateStore(stateStoreName);
+ NodeFactory nodeFactory = nodeFactories.get(processorName);
+ if (nodeFactory instanceof ProcessorNodeFactory) {
+ ((ProcessorNodeFactory) nodeFactory).addStateStore(stateStoreName);
} else {
throw new TopologyException("cannot connect a state store " + stateStoreName + " to a source node or a sink node.");
}
@@ -332,20 +385,32 @@ public class TopologyBuilder {
*
* @return groups of topic names
*/
- public Map> topicGroups() {
- Map> topicGroups = new HashMap<>();
+ public Map topicGroups() {
+ Map topicGroups = new HashMap<>();
if (nodeGroups == null)
nodeGroups = makeNodeGroups();
for (Map.Entry> entry : nodeGroups.entrySet()) {
- Set topicGroup = new HashSet<>();
+ Set sourceTopics = new HashSet<>();
+ Set stateNames = new HashSet<>();
for (String node : entry.getValue()) {
+ // if the node is a source node, add to the source topics
String[] topics = nodeToTopics.get(node);
if (topics != null)
- topicGroup.addAll(Arrays.asList(topics));
+ sourceTopics.addAll(Arrays.asList(topics));
+
+ // if the node is connected to a state, add to the state topics
+ for (StateStoreFactory stateFactory : stateFactories.values()) {
+
+ if (stateFactory.isInternal && stateFactory.users.contains(node)) {
+ stateNames.add(stateFactory.supplier.name());
+ }
+ }
}
- topicGroups.put(entry.getKey(), Collections.unmodifiableSet(topicGroup));
+ topicGroups.put(entry.getKey(), new TopicsInfo(
+ Collections.unmodifiableSet(sourceTopics),
+ Collections.unmodifiableSet(stateNames)));
}
return Collections.unmodifiableMap(topicGroups);
@@ -431,9 +496,9 @@ public class TopologyBuilder {
/**
* Build the topology for the specified topic group. This is called automatically when passing this builder into the
- * {@link KafkaStreaming#KafkaStreaming(TopologyBuilder, StreamingConfig)} constructor.
+ * {@link org.apache.kafka.streams.KafkaStreaming#KafkaStreaming(TopologyBuilder, org.apache.kafka.streams.StreamingConfig)} constructor.
*
- * @see KafkaStreaming#KafkaStreaming(TopologyBuilder, StreamingConfig)
+ * @see org.apache.kafka.streams.KafkaStreaming#KafkaStreaming(TopologyBuilder, org.apache.kafka.streams.StreamingConfig)
*/
public ProcessorTopology build(Integer topicGroupId) {
Set nodeGroup;
@@ -467,7 +532,7 @@ public class TopologyBuilder {
}
for (String stateStoreName : ((ProcessorNodeFactory) factory).stateStoreNames) {
if (!stateStoreMap.containsKey(stateStoreName)) {
- stateStoreMap.put(stateStoreName, stateStores.get(stateStoreName));
+ stateStoreMap.put(stateStoreName, stateFactories.get(stateStoreName).supplier);
}
}
} else if (factory instanceof SourceNodeFactory) {
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
index e1b4d62fdc1..b3255bbb2a5 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
@@ -43,6 +43,7 @@ public abstract class AbstractTask {
protected ProcessorContext processorContext;
protected AbstractTask(TaskId id,
+ String jobId,
Collection partitions,
ProcessorTopology topology,
Consumer consumer,
@@ -58,7 +59,7 @@ public abstract class AbstractTask {
try {
File stateFile = new File(config.getString(StreamingConfig.STATE_DIR_CONFIG), id.toString());
// if partitions is null, this is a standby task
- this.stateMgr = new ProcessorStateManager(id.partition, stateFile, restoreConsumer, isStandby);
+ this.stateMgr = new ProcessorStateManager(jobId, id.partition, stateFile, restoreConsumer, isStandby);
} catch (IOException e) {
throw new KafkaException("Error while creating the state manager", e);
}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/KafkaStreamingPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/KafkaStreamingPartitionAssignor.java
index 54d5567fc1e..29c67f284bc 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/KafkaStreamingPartitionAssignor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/KafkaStreamingPartitionAssignor.java
@@ -21,9 +21,11 @@ import org.apache.kafka.clients.consumer.internals.PartitionAssignor;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.Configurable;
import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.streams.StreamingConfig;
import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.TopologyBuilder;
import org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo;
import org.apache.kafka.streams.processor.internals.assignment.ClientState;
import org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo;
@@ -32,7 +34,21 @@ import org.apache.kafka.streams.processor.internals.assignment.TaskAssignor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.zookeeper.ZooDefs;
+
+import org.I0Itec.zkclient.exception.ZkNodeExistsException;
+import org.I0Itec.zkclient.exception.ZkNoNodeException;
+import org.I0Itec.zkclient.serialize.ZkSerializer;
+import org.I0Itec.zkclient.ZkClient;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -46,10 +62,146 @@ public class KafkaStreamingPartitionAssignor implements PartitionAssignor, Confi
private static final Logger log = LoggerFactory.getLogger(KafkaStreamingPartitionAssignor.class);
private StreamThread streamThread;
+
private int numStandbyReplicas;
+ private Map topicGroups;
private Map> partitionToTaskIds;
+ private Map> stateNameToTaskIds;
private Map> standbyTasks;
+
+ // TODO: the following ZK dependency should be removed after KIP-4
+ private static final String ZK_TOPIC_PATH = "/brokers/topics";
+ private static final String ZK_BROKER_PATH = "/brokers/ids";
+ private static final String ZK_DELETE_TOPIC_PATH = "/admin/delete_topics";
+
+ private ZkClient zkClient;
+
+ private class ZKStringSerializer implements ZkSerializer {
+
+ @Override
+ public byte[] serialize(Object data) {
+ try {
+ return ((String) data).getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError(e);
+ }
+ }
+
+ @Override
+ public Object deserialize(byte[] bytes) {
+ try {
+ if (bytes == null)
+ return null;
+ else
+ return new String(bytes, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new AssertionError(e);
+ }
+ }
+ }
+
+ private List getBrokers() {
+ List brokers = new ArrayList<>();
+ for (String broker: zkClient.getChildren(ZK_BROKER_PATH)) {
+ brokers.add(Integer.parseInt(broker));
+ }
+ Collections.sort(brokers);
+
+ log.debug("Read brokers {} from ZK in partition assignor.", brokers);
+
+ return brokers;
+ }
+
+ @SuppressWarnings("unchecked")
+ private Map> getTopicMetadata(String topic) {
+ String data = zkClient.readData(ZK_TOPIC_PATH + "/" + topic, true);
+
+ if (data == null) return null;
+
+ try {
+ ObjectMapper mapper = new ObjectMapper();
+
+ Map dataMap = mapper.readValue(data, new TypeReference