mirror of https://github.com/apache/kafka.git
				
				
				
			
		
			
				
	
	
		
			267 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			267 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
| #!/usr/bin/env bash
 | |
| # Licensed to the Apache Software Foundation (ASF) under one or more
 | |
| # contributor license agreements.  See the NOTICE file distributed with
 | |
| # this work for additional information regarding copyright ownership.
 | |
| # The ASF licenses this file to You under the Apache License, Version 2.0
 | |
| # (the "License"); you may not use this file except in compliance with
 | |
| # the License.  You may obtain a copy of the License at
 | |
| #
 | |
| #    http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| set -o nounset
 | |
| set -o errexit  # exit script if any command exits with nonzero value
 | |
| 
 | |
| readonly PROG_NAME=$(basename $0)
 | |
| readonly PROG_DIR=$(dirname $(realpath $0))
 | |
| readonly INVOKE_DIR=$(pwd)
 | |
| readonly ARGS="$@"
 | |
| 
 | |
| # overrideable defaults
 | |
| AWS=false
 | |
| PARALLEL=true
 | |
| MAX_PARALLEL=5
 | |
| DEBUG=false
 | |
| 
 | |
| readonly USAGE="Usage: $PROG_NAME [-h | --help] [--aws [--no-parallel] [--max-parallel MAX]]"
 | |
| readonly HELP="$(cat <<EOF
 | |
| Tool to bring up a vagrant cluster on local machine or aws.
 | |
| 
 | |
|     -h | --help             Show this help message
 | |
|     --aws                   Use if you are running in aws
 | |
|     --no-parallel           Bring up machines not in parallel. Only applicable on aws
 | |
|     --max-parallel  MAX     Maximum number of machines to bring up in parallel. Note: only applicable on test worker machines on aws. default: $MAX_PARALLEL
 | |
|     --debug                 Enable debug information for vagrant
 | |
| Approximately speaking, this wrapper script essentially wraps 2 commands:
 | |
|     vagrant up
 | |
|     vagrant hostmanager
 | |
| 
 | |
| The situation on aws is complicated by the fact that aws imposes a maximum request rate,
 | |
| which effectively caps the number of machines we are able to bring up in parallel. Therefore, on aws,
 | |
| this wrapper script attempts to bring up machines in small batches.
 | |
| 
 | |
| If you are seeing rate limit exceeded errors, you may need to use a reduced --max-parallel setting.
 | |
| 
 | |
| EOF
 | |
| )"
 | |
| 
 | |
| function help {
 | |
|     echo "$USAGE"
 | |
|     echo "$HELP"
 | |
|     exit 0
 | |
| }
 | |
| 
 | |
| while [[ $# > 0 ]]; do
 | |
|     key="$1"
 | |
|     case $key in
 | |
|         -h | --help)
 | |
|             help
 | |
|             ;;
 | |
|         --aws)
 | |
|             AWS=true
 | |
|             ;;
 | |
|         --no-parallel)
 | |
|             PARALLEL=false
 | |
|             ;;
 | |
|         --max-parallel)
 | |
|             MAX_PARALLEL="$2"
 | |
|             shift
 | |
|             ;;
 | |
|         --debug)
 | |
|             DEBUG=true
 | |
|             ;;
 | |
|         *)
 | |
|             # unknown option
 | |
|             echo "Unknown option $1"
 | |
|             exit 1
 | |
|             ;;
 | |
| esac
 | |
| shift # past argument or value
 | |
| done
 | |
| 
 | |
| # Get a list of vagrant machines (in any state)
 | |
| function read_vagrant_machines {
 | |
|     local ignore_state="ignore"
 | |
|     local reading_state="reading"
 | |
|     local tmp_file="tmp-$RANDOM"
 | |
| 
 | |
|     local state="$ignore_state"
 | |
|     local machines=""
 | |
| 
 | |
|     while read -r line; do
 | |
|         # Lines before the first empty line are ignored
 | |
|         # The first empty line triggers change from ignore state to reading state
 | |
|         # When in reading state, we parse in machine names until we hit the next empty line,
 | |
|         # which signals that we're done parsing
 | |
|         if [[ -z "$line" ]]; then
 | |
|             if [[ "$state" == "$ignore_state" ]]; then
 | |
|                 state="$reading_state"
 | |
|             else
 | |
|                 # all done
 | |
|                 echo "$machines"
 | |
|                 return
 | |
|             fi
 | |
|             continue
 | |
|         fi
 | |
| 
 | |
|         # Parse machine name while in reading state
 | |
|         if [[ "$state" == "$reading_state" ]]; then
 | |
|             line=$(echo "$line" | cut -d ' ' -f 1)
 | |
|             if [[ -z "$machines" ]]; then
 | |
|                 machines="$line"
 | |
|             else
 | |
|                 machines="${machines} ${line}"
 | |
|             fi
 | |
|         fi
 | |
|     done < <(vagrant status)
 | |
| }
 | |
| 
 | |
| # Filter "list", returning a list of strings containing pattern as a substring
 | |
| function filter {
 | |
|     local list="$1"
 | |
|     local pattern="$2"
 | |
| 
 | |
|     local result=""
 | |
|     for item in $list; do
 | |
|         if [[ ! -z "$(echo $item | grep "$pattern")"  ]]; then
 | |
|             result="$result $item"
 | |
|         fi
 | |
|     done
 | |
|     echo "$result"
 | |
| }
 | |
| 
 | |
| # Given a list of machine names, return only test worker machines
 | |
| function worker {
 | |
|     local machines="$1"
 | |
|     local workers=$(filter "$machines" "worker")
 | |
|     workers=$(echo "$workers" | xargs)  # trim leading/trailing whitespace
 | |
|     echo "$workers"
 | |
| }
 | |
| 
 | |
| # Given a list of machine names, return only zookeeper and broker machines
 | |
| function zk_broker {
 | |
|     local machines="$1"
 | |
|     local zk_broker_list=$(filter "$machines" "zk")
 | |
|     zk_broker_list="$zk_broker_list $(filter "$machines" "broker")"
 | |
|     zk_broker_list=$(echo "$zk_broker_list" | xargs)  # trim leading/trailing whitespace
 | |
|     echo "$zk_broker_list"
 | |
| }
 | |
| 
 | |
| # Run a vagrant command on batches of machines of size $group_size
 | |
| # This is annoying but necessary on aws to avoid errors due to AWS request rate
 | |
| # throttling
 | |
| #
 | |
| # Example
 | |
| #   $ vagrant_batch_command "vagrant up" "m1 m2 m3 m4 m5" "2"
 | |
| #
 | |
| #   This is equivalent to running "vagrant up" on groups of machines of size 2 or less, i.e.:
 | |
| #   $ vagrant up m1 m2
 | |
| #   $ vagrant up m3 m4
 | |
| #   $ vagrant up m5
 | |
| function vagrant_batch_command {
 | |
|     local vagrant_cmd="$1"
 | |
|     local machines="$2"
 | |
|     local group_size="$3"
 | |
| 
 | |
|     local count=1
 | |
|     local m_group=""
 | |
|     # Using --provision flag makes this command useable both when bringing up a cluster from scratch,
 | |
|     # and when bringing up a halted cluster. Permissions on certain directores set during provisioning
 | |
|     # seem to revert when machines are halted, so --provision ensures permissions are set correctly in all cases
 | |
|     for machine in $machines; do
 | |
|         m_group="$m_group $machine"
 | |
| 
 | |
|         if [[ $(expr $count % $group_size) == 0 ]]; then
 | |
|             # We've reached a full group
 | |
|             # Bring up this part of the cluster
 | |
|             $vagrant_cmd $m_group
 | |
|             m_group=""
 | |
|         fi
 | |
|         ((count++))
 | |
|     done
 | |
| 
 | |
|     # Take care of any leftover partially complete group
 | |
|     if [[ ! -z "$m_group" ]]; then
 | |
|         $vagrant_cmd $m_group
 | |
|     fi
 | |
| }
 | |
| 
 | |
| # We assume vagrant-hostmanager is installed, but may or may not be disabled during vagrant up
 | |
| # In this fashion, we ensure we run hostmanager after machines are up, and before provisioning.
 | |
| # This sequence of commands is necessary for example for bringing up a multi-node zookeeper cluster
 | |
| function bring_up_local {
 | |
|     vagrant up --no-provision
 | |
|     vagrant hostmanager
 | |
|     vagrant provision
 | |
| }
 | |
| 
 | |
| function bring_up_aws {
 | |
|     local parallel="$1"
 | |
|     local max_parallel="$2"
 | |
|     local machines="$(read_vagrant_machines)"
 | |
|     case "$3" in
 | |
|           true)
 | |
|             local debug="--debug"
 | |
|             ;;
 | |
|           false)
 | |
|             local debug=""
 | |
|             ;;
 | |
|     esac
 | |
|     zk_broker_machines=$(zk_broker "$machines")
 | |
|     worker_machines=$(worker "$machines")
 | |
| 
 | |
|     if [[ "$parallel" == "true" ]]; then
 | |
|         if [[ ! -z "$zk_broker_machines" ]]; then
 | |
|             # We still have to bring up zookeeper/broker nodes serially
 | |
|             echo "Bringing up zookeeper/broker machines serially"
 | |
|             vagrant up --provider=aws --no-parallel --no-provision $zk_broker_machines $debug
 | |
|             vagrant hostmanager --provider=aws
 | |
|             vagrant provision
 | |
|         fi
 | |
| 
 | |
|         if [[ ! -z "$worker_machines" ]]; then
 | |
|             echo "Bringing up test worker machines in parallel"
 | |
| 	    # Try to isolate this job in its own /tmp space. See note
 | |
| 	    # below about vagrant issue
 | |
|             local vagrant_rsync_temp_dir=$(mktemp -d);
 | |
|             TMPDIR=$vagrant_rsync_temp_dir vagrant_batch_command "vagrant up $debug --provider=aws" "$worker_machines" "$max_parallel"
 | |
|             rm -rf $vagrant_rsync_temp_dir
 | |
|             vagrant hostmanager --provider=aws
 | |
|         fi
 | |
|     else
 | |
|         vagrant up --provider=aws --no-parallel --no-provision $debug
 | |
|         vagrant hostmanager --provider=aws
 | |
|         vagrant provision
 | |
|     fi
 | |
| 
 | |
|     # Currently it seems that the AWS provider will always run rsync
 | |
|     # as part of vagrant up. However,
 | |
|     # https://github.com/mitchellh/vagrant/issues/7531 means it is not
 | |
|     # safe to do so. Since the bug doesn't seem to cause any direct
 | |
|     # errors, just missing data on some nodes, follow up with serial
 | |
|     # rsyncing to ensure we're in a clean state. Use custom TMPDIR
 | |
|     # values to ensure we're isolated from any other instances of this
 | |
|     # script that are running/ran recently and may cause different
 | |
|     # instances to sync to the wrong nodes
 | |
|     for worker in $worker_machines; do
 | |
|         local vagrant_rsync_temp_dir=$(mktemp -d);
 | |
|         TMPDIR=$vagrant_rsync_temp_dir vagrant rsync $worker;
 | |
|         rm -rf $vagrant_rsync_temp_dir
 | |
|     done
 | |
| }
 | |
| 
 | |
| function main {
 | |
|     if [[ "$AWS" == "true" ]]; then
 | |
|         bring_up_aws "$PARALLEL" "$MAX_PARALLEL" "$DEBUG"
 | |
|     else
 | |
|         bring_up_local
 | |
|     fi
 | |
| }
 | |
| 
 | |
| main
 |