mirror of https://github.com/apache/kafka.git
KAFKA-8981 Add rate limiting to NetworkDegradeSpec (#7446)
* Add rate limiting to tc * Feedback from PR * Add a sanity test for tc * Add iperf to vagrant scripts * Dynamically determine the network interface * Add some temp code for testing on AWS * Temp: use hostname instead of external IP * Temp: more AWS debugging * More AWS WIP * More AWS temp * Lower latency some * AWS wip * Trying this again now that ping should work * Add cluster decorator to tests * Fix broken import * Fix device name * Fix decorator arg * Remove errant import * Increase timeouts * Fix tbf command, relax assertion on latency test * Fix log line * Final bit of cleanup * Newline * Revert Trogdor retry count * PR feedback * More PR feedback * Feedback from PR * Remove unused argument
This commit is contained in:
parent
32bf0774e9
commit
d04699486d
|
@ -32,7 +32,7 @@ ARG ducker_creator=default
|
||||||
LABEL ducker.creator=$ducker_creator
|
LABEL ducker.creator=$ducker_creator
|
||||||
|
|
||||||
# Update Linux and install necessary utilities.
|
# Update Linux and install necessary utilities.
|
||||||
RUN apt update && apt install -y sudo netcat iptables rsync unzip wget curl jq coreutils openssh-server net-tools vim python-pip python-dev libffi-dev libssl-dev cmake pkg-config libfuse-dev && apt-get -y clean
|
RUN apt update && apt install -y sudo netcat iptables rsync unzip wget curl jq coreutils openssh-server net-tools vim python-pip python-dev libffi-dev libssl-dev cmake pkg-config libfuse-dev iperf traceroute && apt-get -y clean
|
||||||
RUN python -m pip install -U pip==9.0.3;
|
RUN python -m pip install -U pip==9.0.3;
|
||||||
RUN pip install --upgrade cffi virtualenv pyasn1 boto3 pycrypto pywinrm ipaddress enum34 && pip install --upgrade ducktape==0.7.6
|
RUN pip install --upgrade cffi virtualenv pyasn1 boto3 pycrypto pywinrm ipaddress enum34 && pip install --upgrade ducktape==0.7.6
|
||||||
|
|
||||||
|
|
|
@ -23,15 +23,26 @@ class DegradedNetworkFaultSpec(TaskSpec):
|
||||||
Degrades the network so that traffic on a subset of nodes has higher latency
|
Degrades the network so that traffic on a subset of nodes has higher latency
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, start_ms, duration_ms, node_specs):
|
def __init__(self, start_ms, duration_ms):
|
||||||
"""
|
"""
|
||||||
Create a new NetworkDegradeFaultSpec.
|
Create a new NetworkDegradeFaultSpec.
|
||||||
|
|
||||||
:param start_ms: The start time, as described in task_spec.py
|
:param start_ms: The start time, as described in task_spec.py
|
||||||
:param duration_ms: The duration in milliseconds.
|
:param duration_ms: The duration in milliseconds.
|
||||||
:param node_latencies: A dict of node name to desired latency
|
|
||||||
:param network_device: The name of the network device
|
|
||||||
"""
|
"""
|
||||||
super(DegradedNetworkFaultSpec, self).__init__(start_ms, duration_ms)
|
super(DegradedNetworkFaultSpec, self).__init__(start_ms, duration_ms)
|
||||||
self.message["class"] = "org.apache.kafka.trogdor.fault.DegradedNetworkFaultSpec"
|
self.message["class"] = "org.apache.kafka.trogdor.fault.DegradedNetworkFaultSpec"
|
||||||
self.message["nodeSpecs"] = node_specs
|
self.message["nodeSpecs"] = {}
|
||||||
|
|
||||||
|
def add_node_spec(self, node, networkDevice, latencyMs=0, rateLimitKbit=0):
|
||||||
|
"""
|
||||||
|
Add a node spec to this fault spec
|
||||||
|
:param node: The node name which is to be degraded
|
||||||
|
:param networkDevice: The network device name (e.g., eth0) to apply the degradation to
|
||||||
|
:param latencyMs: Optional. How much latency to add to each packet
|
||||||
|
:param rateLimitKbit: Optional. Maximum throughput in kilobits per second to allow
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
self.message["nodeSpecs"][node] = {
|
||||||
|
"rateLimitKbit": rateLimitKbit, "latencyMs": latencyMs, "networkDevice": networkDevice
|
||||||
|
}
|
||||||
|
|
|
@ -300,6 +300,16 @@ class TrogdorTask(object):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.trogdor = trogdor
|
self.trogdor = trogdor
|
||||||
|
|
||||||
|
def task_state_or_error(self):
|
||||||
|
task_state = self.trogdor.tasks()["tasks"][self.id]
|
||||||
|
if task_state is None:
|
||||||
|
raise RuntimeError("Coordinator did not know about %s." % self.id)
|
||||||
|
error = task_state.get("error")
|
||||||
|
if error is None or error == "":
|
||||||
|
return task_state["state"], None
|
||||||
|
else:
|
||||||
|
return None, error
|
||||||
|
|
||||||
def done(self):
|
def done(self):
|
||||||
"""
|
"""
|
||||||
Check if this task is done.
|
Check if this task is done.
|
||||||
|
@ -308,14 +318,26 @@ class TrogdorTask(object):
|
||||||
:returns: True if the task is in DONE_STATE;
|
:returns: True if the task is in DONE_STATE;
|
||||||
False if it is in a different state.
|
False if it is in a different state.
|
||||||
"""
|
"""
|
||||||
task_state = self.trogdor.tasks()["tasks"][self.id]
|
(task_state, error) = self.task_state_or_error()
|
||||||
if task_state is None:
|
if task_state is not None:
|
||||||
raise RuntimeError("Coordinator did not know about %s." % self.id)
|
return task_state == TrogdorTask.DONE_STATE
|
||||||
error = task_state.get("error")
|
else:
|
||||||
if error is None or error == "":
|
|
||||||
return task_state["state"] == TrogdorTask.DONE_STATE
|
|
||||||
raise RuntimeError("Failed to gracefully stop %s: got task error: %s" % (self.id, error))
|
raise RuntimeError("Failed to gracefully stop %s: got task error: %s" % (self.id, error))
|
||||||
|
|
||||||
|
def running(self):
|
||||||
|
"""
|
||||||
|
Check if this task is running.
|
||||||
|
|
||||||
|
:raises RuntimeError: If the task encountered an error.
|
||||||
|
:returns: True if the task is in RUNNING_STATE;
|
||||||
|
False if it is in a different state.
|
||||||
|
"""
|
||||||
|
(task_state, error) = self.task_state_or_error()
|
||||||
|
if task_state is not None:
|
||||||
|
return task_state == TrogdorTask.RUNNING_STATE
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Failed to start %s: got task error: %s" % (self.id, error))
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""
|
"""
|
||||||
Stop this task.
|
Stop this task.
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ducktape.mark import parametrize
|
||||||
|
from ducktape.mark.resource import cluster
|
||||||
|
from ducktape.tests.test import Test
|
||||||
|
from ducktape.utils.util import wait_until
|
||||||
|
|
||||||
|
from kafkatest.services.trogdor.degraded_network_fault_spec import DegradedNetworkFaultSpec
|
||||||
|
from kafkatest.services.trogdor.trogdor import TrogdorService
|
||||||
|
from kafkatest.services.zookeeper import ZookeeperService
|
||||||
|
|
||||||
|
|
||||||
|
class NetworkDegradeTest(Test):
|
||||||
|
"""
|
||||||
|
These tests ensure that the network degrade Trogdor specs (which use "tc") are working as expected in whatever
|
||||||
|
environment the system tests may be running in. The linux tools "ping" and "iperf" are used for validation
|
||||||
|
and need to be available along with "tc" in the test environment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, test_context):
|
||||||
|
super(NetworkDegradeTest, self).__init__(test_context)
|
||||||
|
self.zk = ZookeeperService(test_context, num_nodes=3)
|
||||||
|
self.trogdor = TrogdorService(context=self.test_context, client_services=[self.zk])
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.zk.start()
|
||||||
|
self.trogdor.start()
|
||||||
|
|
||||||
|
def teardown(self):
|
||||||
|
self.trogdor.stop()
|
||||||
|
self.zk.stop()
|
||||||
|
|
||||||
|
@cluster(num_nodes=5)
|
||||||
|
@parametrize(task_name="latency-100", device_name="eth0", latency_ms=50, rate_limit_kbit=0)
|
||||||
|
@parametrize(task_name="latency-100-rate-1000", device_name="eth0", latency_ms=50, rate_limit_kbit=1000)
|
||||||
|
def test_latency(self, task_name, device_name, latency_ms, rate_limit_kbit):
|
||||||
|
spec = DegradedNetworkFaultSpec(0, 10000)
|
||||||
|
for node in self.zk.nodes:
|
||||||
|
spec.add_node_spec(node.name, device_name, latency_ms, rate_limit_kbit)
|
||||||
|
|
||||||
|
latency = self.trogdor.create_task(task_name, spec)
|
||||||
|
|
||||||
|
zk0 = self.zk.nodes[0]
|
||||||
|
zk1 = self.zk.nodes[1]
|
||||||
|
|
||||||
|
# Capture the ping times from the ping stdout
|
||||||
|
# 64 bytes from ducker01 (172.24.0.2): icmp_seq=1 ttl=64 time=0.325 ms
|
||||||
|
r = re.compile(r".*time=(?P<time>[\d.]+)\sms.*")
|
||||||
|
|
||||||
|
times = []
|
||||||
|
for line in zk0.account.ssh_capture("ping -i 1 -c 20 %s" % zk1.account.hostname):
|
||||||
|
self.logger.debug("Ping output: %s" % line)
|
||||||
|
m = r.match(line)
|
||||||
|
if m is not None and m.group("time"):
|
||||||
|
times.append(float(m.group("time")))
|
||||||
|
self.logger.info("Parsed ping time of %d" % float(m.group("time")))
|
||||||
|
self.logger.debug("Captured ping times: %s" % times)
|
||||||
|
|
||||||
|
# We expect to see some low ping times (before and after the task runs) as well as high ping times
|
||||||
|
# (during the task). For the high time, it's twice the configured latency since both links apply the
|
||||||
|
# rule, 80% for a little variance buffer
|
||||||
|
high_time_ms = 0.8 * 2 * latency_ms
|
||||||
|
low_time_ms = 10
|
||||||
|
slow_times = [t for t in times if t > high_time_ms]
|
||||||
|
fast_times = [t for t in times if t < low_time_ms]
|
||||||
|
|
||||||
|
latency.stop()
|
||||||
|
latency.wait_for_done()
|
||||||
|
|
||||||
|
# We captured 20 ping times. Assert that at least 5 were "fast" and 5 were "slow"
|
||||||
|
assert len(slow_times) > 5, "Expected to see more slow ping times (lower than %d)" % low_time_ms
|
||||||
|
assert len(fast_times) > 5, "Expected to see more fast ping times (higher than %d)" % high_time_ms
|
||||||
|
|
||||||
|
@cluster(num_nodes=5)
|
||||||
|
@parametrize(task_name="rate-1000", device_name="eth0", latency_ms=0, rate_limit_kbit=1000000)
|
||||||
|
@parametrize(task_name="rate-1000-latency-50", device_name="eth0", latency_ms=50, rate_limit_kbit=1000000)
|
||||||
|
def test_rate(self, task_name, device_name, latency_ms, rate_limit_kbit):
|
||||||
|
zk0 = self.zk.nodes[0]
|
||||||
|
zk1 = self.zk.nodes[1]
|
||||||
|
|
||||||
|
spec = DegradedNetworkFaultSpec(0, 60000)
|
||||||
|
spec.add_node_spec(zk0.name, device_name, latency_ms, rate_limit_kbit)
|
||||||
|
|
||||||
|
# start the task and wait
|
||||||
|
rate_limit = self.trogdor.create_task(task_name, spec)
|
||||||
|
wait_until(lambda: rate_limit.running(),
|
||||||
|
timeout_sec=10,
|
||||||
|
err_msg="%s failed to start within 10 seconds." % rate_limit)
|
||||||
|
|
||||||
|
# Run iperf server on zk1, iperf client on zk0
|
||||||
|
iperf_server = zk1.account.ssh_capture("iperf -s")
|
||||||
|
|
||||||
|
# Capture the measured kbps between the two nodes.
|
||||||
|
# [ 3] 0.0- 1.0 sec 2952576 KBytes 24187503 Kbits/sec
|
||||||
|
r = re.compile(r"^.*\s(?P<rate>[\d.]+)\sKbits/sec$")
|
||||||
|
|
||||||
|
measured_rates = []
|
||||||
|
for line in zk0.account.ssh_capture("iperf -i 1 -t 20 -f k -c %s" % zk1.account.hostname):
|
||||||
|
self.logger.info("iperf output %s" % line)
|
||||||
|
m = r.match(line)
|
||||||
|
if m is not None:
|
||||||
|
measured_rate = float(m.group("rate"))
|
||||||
|
measured_rates.append(measured_rate)
|
||||||
|
self.logger.info("Parsed rate of %d kbit/s from iperf" % measured_rate)
|
||||||
|
|
||||||
|
# kill iperf server and consume the stdout to ensure clean exit
|
||||||
|
zk1.account.kill_process("iperf")
|
||||||
|
for _ in iperf_server:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rate_limit.stop()
|
||||||
|
rate_limit.wait_for_done()
|
||||||
|
|
||||||
|
self.logger.info("Measured rates: %s" % measured_rates)
|
||||||
|
|
||||||
|
# We expect to see measured rates within an order of magnitude of our target rate
|
||||||
|
low_kbps = rate_limit_kbit / 10
|
||||||
|
high_kbps = rate_limit_kbit * 10
|
||||||
|
acceptable_rates = [r for r in measured_rates if low_kbps < r < high_kbps]
|
||||||
|
|
||||||
|
msg = "Expected most of the measured rates to be within an order of magnitude of target %d." % rate_limit_kbit
|
||||||
|
msg += " This means `tc` did not limit the bandwidth as expected."
|
||||||
|
assert len(acceptable_rates) > 5, msg
|
|
@ -97,10 +97,9 @@ class RoundTripFaultTest(Test):
|
||||||
def test_produce_consume_with_latency(self):
|
def test_produce_consume_with_latency(self):
|
||||||
workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
|
workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
node_specs = {}
|
spec = DegradedNetworkFaultSpec(0, 60000)
|
||||||
for node in self.kafka.nodes + self.zk.nodes:
|
for node in self.kafka.nodes + self.zk.nodes:
|
||||||
node_specs[node.name] = {"latencyMs": 500, "networkDevice": "eth0"}
|
spec.add_node_spec(node.name, "eth0", latencyMs=100, rateLimitKbit=3000)
|
||||||
spec = DegradedNetworkFaultSpec(0, 60000, node_specs)
|
|
||||||
slow1 = self.trogdor.create_task("slow1", spec)
|
slow1 = self.trogdor.create_task("slow1", spec)
|
||||||
workload1.wait_for_done(timeout_sec=600)
|
workload1.wait_for_done(timeout_sec=600)
|
||||||
slow1.stop()
|
slow1.stop()
|
||||||
|
|
|
@ -31,12 +31,15 @@ public class DegradedNetworkFaultSpec extends TaskSpec {
|
||||||
public static class NodeDegradeSpec {
|
public static class NodeDegradeSpec {
|
||||||
private final String networkDevice;
|
private final String networkDevice;
|
||||||
private final int latencyMs;
|
private final int latencyMs;
|
||||||
|
private final int rateLimitKbit;
|
||||||
|
|
||||||
public NodeDegradeSpec(
|
public NodeDegradeSpec(
|
||||||
@JsonProperty("networkDevice") String networkDevice,
|
@JsonProperty("networkDevice") String networkDevice,
|
||||||
@JsonProperty("latencyMs") int latencyMs) {
|
@JsonProperty("latencyMs") Integer latencyMs,
|
||||||
|
@JsonProperty("rateLimitKbit") Integer rateLimitKbit) {
|
||||||
this.networkDevice = networkDevice == null ? "" : networkDevice;
|
this.networkDevice = networkDevice == null ? "" : networkDevice;
|
||||||
this.latencyMs = latencyMs;
|
this.latencyMs = latencyMs == null ? 0 : latencyMs;
|
||||||
|
this.rateLimitKbit = rateLimitKbit == null ? 0 : rateLimitKbit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("networkDevice")
|
@JsonProperty("networkDevice")
|
||||||
|
@ -48,6 +51,20 @@ public class DegradedNetworkFaultSpec extends TaskSpec {
|
||||||
public int latencyMs() {
|
public int latencyMs() {
|
||||||
return latencyMs;
|
return latencyMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("rateLimitKbit")
|
||||||
|
public int rateLimitKbit() {
|
||||||
|
return rateLimitKbit;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "NodeDegradeSpec{" +
|
||||||
|
"networkDevice='" + networkDevice + '\'' +
|
||||||
|
", latencyMs=" + latencyMs +
|
||||||
|
", rateLimitKbit=" + rateLimitKbit +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<String, NodeDegradeSpec> nodeSpecs;
|
private final Map<String, NodeDegradeSpec> nodeSpecs;
|
||||||
|
|
|
@ -28,13 +28,17 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.NetworkInterface;
|
import java.net.NetworkInterface;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Uses the linux utility <pre>tc</pre> (traffic controller) to simulate latency on a specified network device
|
* Uses the linux utility <pre>tc</pre> (traffic controller) to degrade performance on a specified network device
|
||||||
*/
|
*/
|
||||||
public class DegradedNetworkFaultWorker implements TaskWorker {
|
public class DegradedNetworkFaultWorker implements TaskWorker {
|
||||||
|
|
||||||
|
@ -58,10 +62,10 @@ public class DegradedNetworkFaultWorker implements TaskWorker {
|
||||||
DegradedNetworkFaultSpec.NodeDegradeSpec nodeSpec = nodeSpecs.get(curNode.name());
|
DegradedNetworkFaultSpec.NodeDegradeSpec nodeSpec = nodeSpecs.get(curNode.name());
|
||||||
if (nodeSpec != null) {
|
if (nodeSpec != null) {
|
||||||
for (String device : devicesForSpec(nodeSpec)) {
|
for (String device : devicesForSpec(nodeSpec)) {
|
||||||
if (nodeSpec.latencyMs() < 0) {
|
if (nodeSpec.latencyMs() < 0 || nodeSpec.rateLimitKbit() < 0) {
|
||||||
throw new RuntimeException("Expected a positive value for latencyMs, but got " + nodeSpec.latencyMs());
|
throw new RuntimeException("Expected non-negative values for latencyMs and rateLimitKbit, but got " + nodeSpec);
|
||||||
} else {
|
} else {
|
||||||
enableTrafficControl(platform, device, nodeSpec.latencyMs());
|
enableTrafficControl(platform, device, nodeSpec.latencyMs(), nodeSpec.rateLimitKbit());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -96,14 +100,71 @@ public class DegradedNetworkFaultWorker implements TaskWorker {
|
||||||
return devices;
|
return devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void enableTrafficControl(Platform platform, String networkDevice, int delayMs) throws IOException {
|
/**
|
||||||
|
* Constructs the appropriate "tc" commands to apply latency and rate limiting, if they are non zero.
|
||||||
|
*/
|
||||||
|
private void enableTrafficControl(Platform platform, String networkDevice, int delayMs, int rateLimitKbps) throws IOException {
|
||||||
|
if (delayMs > 0) {
|
||||||
int deviationMs = Math.max(1, (int) Math.sqrt(delayMs));
|
int deviationMs = Math.max(1, (int) Math.sqrt(delayMs));
|
||||||
platform.runCommand(new String[] {
|
List<String> delay = new ArrayList<>();
|
||||||
"sudo", "tc", "qdisc", "add", "dev", networkDevice, "root", "netem",
|
rootHandler(networkDevice, delay::add);
|
||||||
"delay", String.format("%dms", delayMs), String.format("%dms", deviationMs), "distribution", "normal"
|
netemDelay(delayMs, deviationMs, delay::add);
|
||||||
});
|
platform.runCommand(delay.toArray(new String[]{}));
|
||||||
|
|
||||||
|
if (rateLimitKbps > 0) {
|
||||||
|
List<String> rate = new ArrayList<>();
|
||||||
|
childHandler(networkDevice, rate::add);
|
||||||
|
tbfRate(rateLimitKbps, rate::add);
|
||||||
|
platform.runCommand(rate.toArray(new String[]{}));
|
||||||
|
}
|
||||||
|
} else if (rateLimitKbps > 0) {
|
||||||
|
List<String> rate = new ArrayList<>();
|
||||||
|
rootHandler(networkDevice, rate::add);
|
||||||
|
tbfRate(rateLimitKbps, rate::add);
|
||||||
|
platform.runCommand(rate.toArray(new String[]{}));
|
||||||
|
} else {
|
||||||
|
log.warn("Not applying any rate limiting or latency");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the first part of a "tc" command to define a qdisc root handler for the given network interface
|
||||||
|
*/
|
||||||
|
private void rootHandler(String networkDevice, Consumer<String> consumer) {
|
||||||
|
Stream.of("sudo", "tc", "qdisc", "add", "dev", networkDevice, "root", "handle", "1:0").forEach(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the first part of a "tc" command to define a qdisc child handler for the given interface. This can
|
||||||
|
* only be used if a root handler has been appropriately defined first (as in {@link #rootHandler}).
|
||||||
|
*/
|
||||||
|
private void childHandler(String networkDevice, Consumer<String> consumer) {
|
||||||
|
Stream.of("sudo", "tc", "qdisc", "add", "dev", networkDevice, "parent", "1:1", "handle", "10:").forEach(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the second part of a "tc" command that defines a netem (Network Emulator) filter that will apply some
|
||||||
|
* amount of latency with a small amount of deviation. The distribution of the latency deviation follows a so-called
|
||||||
|
* Pareto-normal distribution. This is the formal name for the 80/20 rule, which might better represent real-world
|
||||||
|
* patterns.
|
||||||
|
*/
|
||||||
|
private void netemDelay(int delayMs, int deviationMs, Consumer<String> consumer) {
|
||||||
|
Stream.of("netem", "delay", String.format("%dms", delayMs), String.format("%dms", deviationMs),
|
||||||
|
"distribution", "paretonormal").forEach(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the second part of a "tc" command that defines a tbf (token buffer filter) that will rate limit the
|
||||||
|
* packets going through a qdisc.
|
||||||
|
*/
|
||||||
|
private void tbfRate(int rateLimitKbit, Consumer<String> consumer) {
|
||||||
|
Stream.of("tbf", "rate", String.format("%dkbit", rateLimitKbit), "burst", "1mbit", "latency", "500ms").forEach(consumer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete any previously defined qdisc for the given network interface.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
private void disableTrafficControl(Platform platform, String networkDevice) throws IOException {
|
private void disableTrafficControl(Platform platform, String networkDevice) throws IOException {
|
||||||
platform.runCommand(new String[] {
|
platform.runCommand(new String[] {
|
||||||
"sudo", "tc", "qdisc", "del", "dev", networkDevice, "root"
|
"sudo", "tc", "qdisc", "del", "dev", networkDevice, "root"
|
||||||
|
|
|
@ -25,7 +25,9 @@ sudo apt-get install -y \
|
||||||
ruby-dev \
|
ruby-dev \
|
||||||
zlib1g-dev \
|
zlib1g-dev \
|
||||||
realpath \
|
realpath \
|
||||||
python-setuptools
|
python-setuptools \
|
||||||
|
iperf \
|
||||||
|
traceroute
|
||||||
|
|
||||||
base_dir=`dirname $0`/../..
|
base_dir=`dirname $0`/../..
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,9 @@ popd
|
||||||
popd
|
popd
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
# Install iperf
|
||||||
|
apt-get install -y iperf traceroute
|
||||||
|
|
||||||
# Test multiple Kafka versions
|
# Test multiple Kafka versions
|
||||||
# We want to use the latest Scala version per Kafka version
|
# We want to use the latest Scala version per Kafka version
|
||||||
# Previously we could not pull in Scala 2.12 builds, because Scala 2.12 requires Java 8 and we were running the system
|
# Previously we could not pull in Scala 2.12 builds, because Scala 2.12 requires Java 8 and we were running the system
|
||||||
|
|
Loading…
Reference in New Issue