2012-08-30 02:07:58 +08:00
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
# or more contributor license agreements. See the NOTICE file
|
|
|
|
# distributed with this work for additional information
|
|
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
|
|
# to you under the Apache License, Version 2.0 (the
|
|
|
|
# "License"); you may not use this file except in compliance
|
|
|
|
# with the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
# under the License.
|
2012-08-23 01:16:26 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
# ===================================
|
|
|
|
# replica_basic_test.py
|
|
|
|
# ===================================
|
|
|
|
|
|
|
|
import inspect
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import signal
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import time
|
2012-08-30 02:07:58 +08:00
|
|
|
import traceback
|
2012-08-23 01:16:26 +08:00
|
|
|
|
|
|
|
from system_test_env import SystemTestEnv
|
|
|
|
sys.path.append(SystemTestEnv.SYSTEM_TEST_UTIL_DIR)
|
2012-10-11 00:56:57 +08:00
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
from setup_utils import SetupUtils
|
2012-10-11 00:56:57 +08:00
|
|
|
from replication_utils import ReplicationUtils
|
2012-08-23 01:16:26 +08:00
|
|
|
import system_test_utils
|
|
|
|
from testcase_env import TestcaseEnv
|
|
|
|
|
|
|
|
# product specific: Kafka
|
|
|
|
import kafka_system_test_utils
|
2012-08-30 02:07:58 +08:00
|
|
|
import metrics
|
2012-08-23 01:16:26 +08:00
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
class ReplicaBasicTest(ReplicationUtils, SetupUtils):
|
2012-08-23 01:16:26 +08:00
|
|
|
|
|
|
|
testModuleAbsPathName = os.path.realpath(__file__)
|
|
|
|
testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName))
|
|
|
|
|
|
|
|
def __init__(self, systemTestEnv):
|
|
|
|
|
|
|
|
# SystemTestEnv - provides cluster level environment settings
|
|
|
|
# such as entity_id, hostname, kafka_home, java_home which
|
|
|
|
# are available in a list of dictionary named
|
|
|
|
# "clusterEntityConfigDictList"
|
|
|
|
self.systemTestEnv = systemTestEnv
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
super(ReplicaBasicTest, self).__init__(self)
|
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# dict to pass user-defined attributes to logger argument: "extra"
|
|
|
|
d = {'name_of_class': self.__class__.__name__}
|
|
|
|
|
|
|
|
def signal_handler(self, signal, frame):
|
|
|
|
self.log_message("Interrupt detected - User pressed Ctrl+c")
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# perform the necessary cleanup here when user presses Ctrl+c and it may be product specific
|
|
|
|
self.log_message("stopping all entities - please wait ...")
|
|
|
|
kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv)
|
2012-08-23 01:16:26 +08:00
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
def runTest(self):
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# ======================================================================
|
2012-08-23 01:16:26 +08:00
|
|
|
# get all testcase directories under this testsuite
|
2012-10-11 00:56:57 +08:00
|
|
|
# ======================================================================
|
2012-08-23 01:16:26 +08:00
|
|
|
testCasePathNameList = system_test_utils.get_dir_paths_with_prefix(
|
|
|
|
self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX)
|
|
|
|
testCasePathNameList.sort()
|
|
|
|
|
|
|
|
# =============================================================
|
|
|
|
# launch each testcase one by one: testcase_1, testcase_2, ...
|
|
|
|
# =============================================================
|
|
|
|
for testCasePathName in testCasePathNameList:
|
2012-10-11 00:56:57 +08:00
|
|
|
|
|
|
|
skipThisTestCase = False
|
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
try:
|
2012-10-11 00:56:57 +08:00
|
|
|
# ======================================================================
|
|
|
|
# A new instance of TestcaseEnv to keep track of this testcase's env vars
|
|
|
|
# and initialize some env vars as testCasePathName is available now
|
|
|
|
# ======================================================================
|
2012-08-23 01:16:26 +08:00
|
|
|
self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self)
|
|
|
|
self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName
|
2012-10-11 00:56:57 +08:00
|
|
|
self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName)
|
|
|
|
self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"]
|
|
|
|
|
2012-09-15 01:20:04 +08:00
|
|
|
# ======================================================================
|
2012-10-11 00:56:57 +08:00
|
|
|
# SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json
|
2012-09-15 01:20:04 +08:00
|
|
|
# ======================================================================
|
2012-10-11 00:56:57 +08:00
|
|
|
testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"]
|
|
|
|
|
|
|
|
if self.systemTestEnv.printTestDescriptionsOnly:
|
|
|
|
self.testcaseEnv.printTestCaseDescription(testcaseDirName)
|
|
|
|
continue
|
|
|
|
elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName):
|
|
|
|
self.log_message("Skipping : " + testcaseDirName)
|
|
|
|
skipThisTestCase = True
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
self.testcaseEnv.printTestCaseDescription(testcaseDirName)
|
|
|
|
system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName)
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================== #
|
|
|
|
# ============================================================================== #
|
|
|
|
# Product Specific Testing Code Starts Here: #
|
|
|
|
# ============================================================================== #
|
|
|
|
# ============================================================================== #
|
|
|
|
|
|
|
|
# initialize self.testcaseEnv with user-defined environment variables (product specific)
|
2012-08-23 01:16:26 +08:00
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = ""
|
2012-09-15 01:20:04 +08:00
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False
|
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False
|
2012-10-11 00:56:57 +08:00
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"] = []
|
2012-09-15 01:20:04 +08:00
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# initialize signal handler
|
|
|
|
signal.signal(signal.SIGINT, self.signal_handler)
|
|
|
|
|
|
|
|
# create "LOCAL" log directories for metrics, dashboards for each entity under this testcase
|
|
|
|
# for collecting logs from remote machines
|
|
|
|
kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
|
|
|
|
# TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file:
|
|
|
|
# system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json
|
2012-10-11 00:56:57 +08:00
|
|
|
self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data(
|
|
|
|
self.testcaseEnv.testcasePropJsonPathName)
|
2012-08-23 01:16:26 +08:00
|
|
|
|
|
|
|
# TestcaseEnv - initialize producer & consumer config / log file pathnames
|
|
|
|
kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv)
|
2012-10-11 00:56:57 +08:00
|
|
|
|
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties
|
|
|
|
kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv)
|
2012-08-31 07:47:55 +08:00
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# generate remote hosts log/config dirs if not exist
|
|
|
|
kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
|
|
|
|
# generate properties files for zookeeper, kafka, producer, consumer:
|
|
|
|
# 1. copy system_test/<suite_name>_testsuite/config/*.properties to
|
|
|
|
# system_test/<suite_name>_testsuite/testcase_<n>/config/
|
|
|
|
# 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config
|
|
|
|
# by overriding the settings specified in:
|
|
|
|
# system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json
|
2012-10-11 00:56:57 +08:00
|
|
|
kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName,
|
|
|
|
self.testcaseEnv, self.systemTestEnv)
|
2012-09-15 01:20:04 +08:00
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# =============================================
|
|
|
|
# preparing all entities to start the test
|
|
|
|
# =============================================
|
|
|
|
self.log_message("starting zookeepers")
|
|
|
|
kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
self.anonLogger.info("sleeping for 2s")
|
|
|
|
time.sleep(2)
|
2012-09-15 01:20:04 +08:00
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
self.log_message("starting brokers")
|
|
|
|
kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv)
|
2012-10-11 00:56:57 +08:00
|
|
|
self.anonLogger.info("sleeping for 5s")
|
|
|
|
time.sleep(5)
|
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
self.log_message("creating topics")
|
|
|
|
kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
self.anonLogger.info("sleeping for 5s")
|
|
|
|
time.sleep(5)
|
2012-10-11 00:56:57 +08:00
|
|
|
|
2012-08-31 07:47:55 +08:00
|
|
|
# =============================================
|
2012-08-23 01:16:26 +08:00
|
|
|
# starting producer
|
2012-08-31 07:47:55 +08:00
|
|
|
# =============================================
|
|
|
|
self.log_message("starting producer in the background")
|
2012-10-11 00:56:57 +08:00
|
|
|
kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False)
|
|
|
|
msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"]
|
|
|
|
self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages")
|
|
|
|
time.sleep(int(msgProducingFreeTimeSec))
|
2012-09-15 01:20:04 +08:00
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
|
|
|
# A while-loop to bounce leader as specified
|
|
|
|
# by "num_iterations" in testcase_n_properties.json
|
|
|
|
# =============================================
|
2012-09-15 01:20:04 +08:00
|
|
|
i = 1
|
|
|
|
numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"])
|
|
|
|
while i <= numIterations:
|
|
|
|
|
|
|
|
self.log_message("Iteration " + str(i) + " of " + str(numIterations))
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
self.log_message("looking up leader")
|
|
|
|
leaderDict = kafka_system_test_utils.get_leader_elected_log_line(
|
|
|
|
self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict)
|
2012-09-15 01:20:04 +08:00
|
|
|
|
|
|
|
# ==========================
|
|
|
|
# leaderDict looks like this:
|
|
|
|
# ==========================
|
|
|
|
#{'entity_id': u'3',
|
|
|
|
# 'partition': '0',
|
|
|
|
# 'timestamp': 1345050255.8280001,
|
|
|
|
# 'hostname': u'localhost',
|
|
|
|
# 'topic': 'test_1',
|
|
|
|
# 'brokerid': '3'}
|
|
|
|
|
|
|
|
# =============================================
|
|
|
|
# validate to see if leader election is successful
|
|
|
|
# =============================================
|
|
|
|
self.log_message("validating leader election")
|
|
|
|
result = kafka_system_test_utils.validate_leader_election_successful( \
|
|
|
|
self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict)
|
|
|
|
|
|
|
|
# =============================================
|
2012-10-11 00:56:57 +08:00
|
|
|
# trigger leader re-election by stopping leader
|
|
|
|
# to get re-election latency
|
2012-09-15 01:20:04 +08:00
|
|
|
# =============================================
|
|
|
|
bounceLeaderFlag = self.testcaseEnv.testcaseArgumentsDict["bounce_leader"]
|
|
|
|
self.log_message("bounce_leader flag : " + bounceLeaderFlag)
|
|
|
|
if (bounceLeaderFlag.lower() == "true"):
|
2012-10-11 00:56:57 +08:00
|
|
|
reelectionLatency = kafka_system_test_utils.get_reelection_latency(
|
|
|
|
self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict)
|
2012-09-15 01:20:04 +08:00
|
|
|
latencyKeyName = "Leader Election Latency - iter " + str(i) + " brokerid " + leaderDict["brokerid"]
|
|
|
|
self.testcaseEnv.validationStatusDict[latencyKeyName] = str("{0:.2f}".format(reelectionLatency * 1000)) + " ms"
|
2012-10-11 00:56:57 +08:00
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"].append("{0:.2f}".format(reelectionLatency * 1000))
|
|
|
|
|
2012-09-15 01:20:04 +08:00
|
|
|
# =============================================
|
|
|
|
# starting previously terminated broker
|
|
|
|
# =============================================
|
|
|
|
if bounceLeaderFlag.lower() == "true":
|
|
|
|
self.log_message("starting the previously terminated broker")
|
|
|
|
stoppedLeaderEntityId = leaderDict["entity_id"]
|
|
|
|
kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, stoppedLeaderEntityId)
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
self.anonLogger.info("sleeping for 15s")
|
|
|
|
time.sleep(15)
|
2012-09-15 01:20:04 +08:00
|
|
|
i += 1
|
|
|
|
# while loop
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = None
|
|
|
|
try:
|
|
|
|
self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = \
|
|
|
|
min(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"])
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = None
|
|
|
|
try:
|
|
|
|
self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = \
|
|
|
|
max(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"])
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# =============================================
|
2012-09-15 01:20:04 +08:00
|
|
|
# tell producer to stop
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
2012-09-15 01:20:04 +08:00
|
|
|
self.testcaseEnv.lock.acquire()
|
|
|
|
self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True
|
|
|
|
time.sleep(1)
|
|
|
|
self.testcaseEnv.lock.release()
|
|
|
|
time.sleep(1)
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
|
|
|
# wait for producer thread's update of
|
|
|
|
# "backgroundProducerStopped" to be "True"
|
|
|
|
# =============================================
|
2012-09-15 01:20:04 +08:00
|
|
|
while 1:
|
|
|
|
self.testcaseEnv.lock.acquire()
|
|
|
|
self.logger.info("status of backgroundProducerStopped : [" + \
|
|
|
|
str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d)
|
|
|
|
if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]:
|
|
|
|
time.sleep(1)
|
2012-10-11 00:56:57 +08:00
|
|
|
self.logger.info("all producer threads completed", extra=self.d)
|
2012-09-15 01:20:04 +08:00
|
|
|
break
|
|
|
|
time.sleep(1)
|
|
|
|
self.testcaseEnv.lock.release()
|
|
|
|
time.sleep(2)
|
2012-08-23 01:16:26 +08:00
|
|
|
|
2012-08-31 07:47:55 +08:00
|
|
|
# =============================================
|
2012-08-23 01:16:26 +08:00
|
|
|
# starting consumer
|
2012-08-31 07:47:55 +08:00
|
|
|
# =============================================
|
|
|
|
self.log_message("starting consumer in the background")
|
2012-08-23 01:16:26 +08:00
|
|
|
kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv)
|
2012-08-31 07:47:55 +08:00
|
|
|
self.anonLogger.info("sleeping for 10s")
|
2012-08-30 02:07:58 +08:00
|
|
|
time.sleep(10)
|
2012-09-15 01:20:04 +08:00
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
|
|
|
# this testcase is completed - stop all entities
|
|
|
|
# =============================================
|
2012-08-23 01:16:26 +08:00
|
|
|
self.log_message("stopping all entities")
|
2012-10-11 00:56:57 +08:00
|
|
|
for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items():
|
|
|
|
kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid)
|
|
|
|
|
|
|
|
for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items():
|
2012-08-23 01:16:26 +08:00
|
|
|
kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid)
|
2012-09-15 01:20:04 +08:00
|
|
|
|
|
|
|
# make sure all entities are stopped
|
|
|
|
kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv)
|
|
|
|
|
2012-08-23 01:16:26 +08:00
|
|
|
# =============================================
|
|
|
|
# collect logs from remote hosts
|
|
|
|
# =============================================
|
|
|
|
kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
|
|
|
# validate the data matched and checksum
|
|
|
|
# =============================================
|
|
|
|
self.log_message("validating data matched")
|
|
|
|
kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv)
|
|
|
|
|
|
|
|
# =============================================
|
2012-08-30 02:07:58 +08:00
|
|
|
# draw graphs
|
2012-10-11 00:56:57 +08:00
|
|
|
# =============================================
|
2012-08-30 02:07:58 +08:00
|
|
|
metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME,
|
|
|
|
self.testcaseEnv,
|
|
|
|
self.systemTestEnv.clusterEntityConfigDictList)
|
|
|
|
|
|
|
|
# build dashboard, one for each role
|
|
|
|
metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME,
|
|
|
|
self.testcaseEnv.testCaseDashboardsDir,
|
|
|
|
self.systemTestEnv.clusterEntityConfigDictList)
|
2012-08-23 01:16:26 +08:00
|
|
|
except Exception as e:
|
2012-08-30 02:07:58 +08:00
|
|
|
self.log_message("Exception while running test {0}".format(e))
|
|
|
|
traceback.print_exc()
|
2012-08-31 07:47:55 +08:00
|
|
|
|
|
|
|
finally:
|
2012-10-11 00:56:57 +08:00
|
|
|
if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly:
|
|
|
|
self.log_message("stopping all entities - please wait ...")
|
|
|
|
kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv)
|
2012-08-30 02:07:58 +08:00
|
|
|
|