mirror of https://github.com/apache/kafka.git
MINOR: Improve Trogdor external command worker docs (#6438)
Reviewers: Colin McCabe <cmccabe@apache.org>, Xi Yang <xi@confluent.io>
This commit is contained in:
parent
59d3a56740
commit
58aa04f91e
23
TROGDOR.md
23
TROGDOR.md
|
@ -16,7 +16,7 @@ Running Kafka:
|
|||
|
||||
> ./bin/kafka-server-start.sh ./config/server.properties &> /tmp/kafka.log &
|
||||
|
||||
Then, we want to run a Trogdor Agent, plus a Trogdor broker.
|
||||
Then, we want to run a Trogdor Agent, plus a Trogdor Coordinator.
|
||||
|
||||
To run the Trogdor Agent:
|
||||
|
||||
|
@ -125,6 +125,27 @@ ProcessStopFault stops a process by sending it a SIGSTOP signal. When the fault
|
|||
### NetworkPartitionFault
|
||||
NetworkPartitionFault sets up an artificial network partition between one or more sets of nodes. Currently, this is implemented using iptables. The iptables rules are set up on the outbound traffic from the affected nodes. Therefore, the affected nodes should still be reachable from outside the cluster.
|
||||
|
||||
External Processes
|
||||
========================================
|
||||
Trogdor supports running arbitrary commands in external processes. This is a generic way to run any configurable command in the Trogdor framework - be it a Python program, bash script, docker image, etc.
|
||||
|
||||
### ExternalCommandWorker
|
||||
ExternalCommandWorker starts an external command defined by the ExternalCommandSpec. It essentially allows you to run any command on any Trogdor agent node.
|
||||
The worker communicates with the external process via its stdin, stdout and stderr in a JSON protocol. It uses stdout for any actionable communication and only logs what it sees in stderr.
|
||||
On startup the worker will first send a message describing the workload to the external process in this format:
|
||||
```
|
||||
{"id":<task ID string>, "workload":<configured workload JSON object>}
|
||||
```
|
||||
and will then listen for messages from the external process, again in a JSON format.
|
||||
Said JSON can contain the following fields:
|
||||
- status: If the object contains this field, the status of the worker will be set to the given value.
|
||||
- error: If the object contains this field, the error of the worker will be set to the given value. Once an error occurs, the external process will be terminated.
|
||||
- log: If the object contains this field, a log message will be issued with this text.
|
||||
An example:
|
||||
```json
|
||||
{"log": "Finished successfully.", "status": {"p99ProduceLatency": "100ms", "messagesSent": 10000}}
|
||||
```
|
||||
|
||||
Exec Mode
|
||||
========================================
|
||||
Sometimes, you just want to run a test quickly on a single node. In this case, you can use "exec mode." This mode allows you to run a single Trogdor Agent without a Coordinator.
|
||||
|
|
|
@ -20,7 +20,7 @@ import time
|
|||
|
||||
#
|
||||
# This is an example of an external script which can be run through Trogdor's
|
||||
# ExternalCommandWorker.
|
||||
# ExternalCommandWorker. It sleeps for the given amount of time expressed by the delayMs field in the ExternalCommandSpec
|
||||
#
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -28,11 +28,14 @@ if __name__ == '__main__':
|
|||
line = sys.stdin.readline()
|
||||
start_message = json.loads(line)
|
||||
workload = start_message["workload"]
|
||||
print("Starting external_trogdor_command_example with task id %s, workload %s" \
|
||||
% (start_message["id"], workload))
|
||||
print("Starting external_trogdor_command_example with task id %s, workload %s"
|
||||
% (start_message["id"], workload))
|
||||
sys.stdout.flush()
|
||||
`print(json.dumps({"status": "running"}))`
|
||||
|
||||
# pretend to start some workload
|
||||
print(json.dumps({"status": "running"}))
|
||||
sys.stdout.flush()
|
||||
time.sleep(0.001 * workload["delayMs"])
|
||||
`print(json.dumps({"status": "exiting after %s delayMs" % workload["delayMs"]}))`
|
||||
|
||||
print(json.dumps({"status": "exiting after %s delayMs" % workload["delayMs"]}))
|
||||
sys.stdout.flush()
|
||||
|
|
|
@ -389,7 +389,7 @@ public class ExternalCommandWorker implements TaskWorker {
|
|||
spec.shutdownGracePeriodMs().get() : DEFAULT_SHUTDOWN_GRACE_PERIOD_MS;
|
||||
if (!executor.awaitTermination(shutdownGracePeriodMs, TimeUnit.MILLISECONDS)) {
|
||||
terminatorActionQueue.add(TerminatorAction.DESTROY_FORCIBLY);
|
||||
executor.awaitTermination(1000, TimeUnit.DAYS);
|
||||
executor.awaitTermination(1, TimeUnit.DAYS);
|
||||
}
|
||||
this.status = null;
|
||||
this.doneFuture = null;
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
package org.apache.kafka.trogdor.workload;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.IntNode;
|
||||
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
|
@ -109,7 +108,7 @@ public class ExternalCommandWorkerTest {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test attempting to run an exeutable which doesn't exist.
|
||||
* Test attempting to run an executable which doesn't exist.
|
||||
* We use a path which starts with /dev/null, since that should never be a
|
||||
* directory in UNIX.
|
||||
*/
|
||||
|
@ -172,12 +171,7 @@ public class ExternalCommandWorkerTest {
|
|||
}
|
||||
}
|
||||
CompletableFuture<String> statusFuture = new CompletableFuture<>();
|
||||
final WorkerStatusTracker statusTracker = new WorkerStatusTracker() {
|
||||
@Override
|
||||
public void update(JsonNode status) {
|
||||
statusFuture .complete(status.textValue().toString());
|
||||
}
|
||||
};
|
||||
final WorkerStatusTracker statusTracker = status -> statusFuture .complete(status.textValue());
|
||||
ExternalCommandWorker worker = new ExternalCommandWorkerBuilder("testForceKillTask").
|
||||
shutdownGracePeriodMs(1).
|
||||
command("bash", tempFile.getAbsolutePath()).
|
||||
|
|
Loading…
Reference in New Issue