diff --git a/vagrant/base.sh b/vagrant/base.sh index b302dd16744..4c0add543aa 100755 --- a/vagrant/base.sh +++ b/vagrant/base.sh @@ -57,15 +57,6 @@ if [ -h /opt/kafka-dev ]; then fi ln -s /vagrant /opt/kafka-dev -# Verification to catch provisioning errors. -if [[ ! -x /opt/kafka-dev/bin/kafka-run-class.sh ]]; then - echo "ERROR: kafka-run-class.sh not found/executable in /opt/kafka-dev/bin" - find /opt/kafka-dev - ls -la /opt/kafka-dev/bin/kafka-run-class.sh || true - exit 1 -fi - - get_kafka() { version=$1 diff --git a/vagrant/vagrant-up.sh b/vagrant/vagrant-up.sh index b01c10db006..5b88144c0ac 100755 --- a/vagrant/vagrant-up.sh +++ b/vagrant/vagrant-up.sh @@ -226,8 +226,25 @@ function bring_up_aws { if [[ ! -z "$worker_machines" ]]; then echo "Bringing up test worker machines in parallel" - vagrant_batch_command "vagrant up $debug --provider=aws" "$worker_machines" "$max_parallel" + # Currently it seems that the AWS provider will always run + # rsync as part of vagrant up. However, + # https://github.com/mitchellh/vagrant/issues/7531 means + # it is not safe to do so. Since the bug doesn't seem to + # cause any direct errors, just missing data on some + # nodes, follow up with serial rsyncing to ensure we're in + # a clean state. Use custom TMPDIR values to ensure we're + # isolated from any other instances of this script that + # are running/ran recently and may cause different + # instances to sync to the wrong nodes + local vagrant_rsync_temp_dir=$(mktemp -d); + TMPDIR=$vagrant_rsync_temp_dir vagrant_batch_command "vagrant up $debug --provider=aws" "$worker_machines" "$max_parallel" + rm -rf $vagrant_rsync_temp_dir vagrant hostmanager + for worker in $worker_machines; do + local vagrant_rsync_temp_dir=$(mktemp -d); + TMPDIR=$vagrant_rsync_temp_dir vagrant rsync $worker; + rm -rf $vagrant_rsync_temp_dir + done fi else vagrant up --provider=aws --no-parallel --no-provision $debug