| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  | # Licensed to the Apache Software Foundation (ASF) under one | 
					
						
							|  |  |  | # or more contributor license agreements.  See the NOTICE file | 
					
						
							|  |  |  | # distributed with this work for additional information | 
					
						
							|  |  |  | # regarding copyright ownership.  The ASF licenses this file | 
					
						
							|  |  |  | # to you under the Apache License, Version 2.0 (the | 
					
						
							|  |  |  | # "License"); you may not use this file except in compliance | 
					
						
							|  |  |  | # with the License.  You may obtain a copy of the License at | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #   http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Unless required by applicable law or agreed to in writing, | 
					
						
							|  |  |  | # software distributed under the License is distributed on an | 
					
						
							|  |  |  | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
					
						
							|  |  |  | # KIND, either express or implied.  See the License for the | 
					
						
							|  |  |  | # specific language governing permissions and limitations | 
					
						
							|  |  |  | # under the License. | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # =================================== | 
					
						
							|  |  |  | # replica_basic_test.py | 
					
						
							|  |  |  | # =================================== | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import inspect | 
					
						
							|  |  |  | import logging | 
					
						
							|  |  |  | import os | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | import pprint | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | import signal | 
					
						
							|  |  |  | import subprocess | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import time | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  | import traceback | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | from   system_test_env    import SystemTestEnv | 
					
						
							|  |  |  | sys.path.append(SystemTestEnv.SYSTEM_TEST_UTIL_DIR) | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | from   setup_utils        import SetupUtils | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | from   replication_utils  import ReplicationUtils | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | import system_test_utils | 
					
						
							|  |  |  | from   testcase_env       import TestcaseEnv | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # product specific: Kafka | 
					
						
							|  |  |  | import kafka_system_test_utils | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  | import metrics | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | class ReplicaBasicTest(ReplicationUtils, SetupUtils): | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     testModuleAbsPathName = os.path.realpath(__file__) | 
					
						
							|  |  |  |     testSuiteAbsPathName  = os.path.abspath(os.path.dirname(testModuleAbsPathName)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, systemTestEnv): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # SystemTestEnv - provides cluster level environment settings | 
					
						
							|  |  |  |         #     such as entity_id, hostname, kafka_home, java_home which | 
					
						
							|  |  |  |         #     are available in a list of dictionary named  | 
					
						
							|  |  |  |         #     "clusterEntityConfigDictList" | 
					
						
							|  |  |  |         self.systemTestEnv = systemTestEnv | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |         super(ReplicaBasicTest, self).__init__(self) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |         # dict to pass user-defined attributes to logger argument: "extra" | 
					
						
							|  |  |  |         d = {'name_of_class': self.__class__.__name__} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def signal_handler(self, signal, frame): | 
					
						
							|  |  |  |         self.log_message("Interrupt detected - User pressed Ctrl+c") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |         # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific | 
					
						
							|  |  |  |         self.log_message("stopping all entities - please wait ...") | 
					
						
							|  |  |  |         kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |         sys.exit(1)  | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def runTest(self): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |         # ====================================================================== | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |         # get all testcase directories under this testsuite | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |         # ====================================================================== | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |         testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( | 
					
						
							|  |  |  |             self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) | 
					
						
							|  |  |  |         testCasePathNameList.sort() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # ============================================================= | 
					
						
							|  |  |  |         # launch each testcase one by one: testcase_1, testcase_2, ... | 
					
						
							|  |  |  |         # ============================================================= | 
					
						
							|  |  |  |         for testCasePathName in testCasePathNameList: | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             skipThisTestCase = False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |             try:  | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ====================================================================== | 
					
						
							|  |  |  |                 # A new instance of TestcaseEnv to keep track of this testcase's env vars | 
					
						
							|  |  |  |                 # and initialize some env vars as testCasePathName is available now | 
					
						
							|  |  |  |                 # ====================================================================== | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) | 
					
						
							|  |  |  |                 self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) | 
					
						
							|  |  |  |                 self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 # ====================================================================== | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 # ====================================================================== | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if self.systemTestEnv.printTestDescriptionsOnly: | 
					
						
							|  |  |  |                     self.testcaseEnv.printTestCaseDescription(testcaseDirName) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): | 
					
						
							|  |  |  |                     self.log_message("Skipping : " + testcaseDirName) | 
					
						
							|  |  |  |                     skipThisTestCase = True | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     self.testcaseEnv.printTestCaseDescription(testcaseDirName) | 
					
						
							|  |  |  |                     system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # ============================================================================== # | 
					
						
							|  |  |  |                 # ============================================================================== # | 
					
						
							|  |  |  |                 #                   Product Specific Testing Code Starts Here:                   # | 
					
						
							|  |  |  |                 # ============================================================================== # | 
					
						
							|  |  |  |                 # ============================================================================== # | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # get optional testcase arguments | 
					
						
							|  |  |  |                 logRetentionTest = "false" | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     logRetentionTest = self.testcaseEnv.testcaseArgumentsDict["log_retention_test"] | 
					
						
							|  |  |  |                 except: | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # initialize self.testcaseEnv with user-defined environment variables (product specific) | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"]    = False | 
					
						
							|  |  |  |                 self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"] = [] | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # initialize signal handler | 
					
						
							|  |  |  |                 signal.signal(signal.SIGINT, self.signal_handler) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |                 # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase | 
					
						
							|  |  |  |                 # for collecting logs from remote machines | 
					
						
							|  |  |  |                 kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |                 # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: | 
					
						
							|  |  |  |                 #   system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( | 
					
						
							|  |  |  |                     self.testcaseEnv.testcasePropJsonPathName) | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |      | 
					
						
							|  |  |  |                 # TestcaseEnv - initialize producer & consumer config / log file pathnames | 
					
						
							|  |  |  |                 kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                  | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties | 
					
						
							|  |  |  |                 kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # generate remote hosts log/config dirs if not exist | 
					
						
							|  |  |  |                 kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |                 # generate properties files for zookeeper, kafka, producer, consumer: | 
					
						
							|  |  |  |                 # 1. copy system_test/<suite_name>_testsuite/config/*.properties to  | 
					
						
							|  |  |  |                 #    system_test/<suite_name>_testsuite/testcase_<n>/config/ | 
					
						
							|  |  |  |                 # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config | 
					
						
							|  |  |  |                 #    by overriding the settings specified in: | 
					
						
							|  |  |  |                 #    system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, | 
					
						
							|  |  |  |                     self.testcaseEnv, self.systemTestEnv) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |     | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # preparing all entities to start the test | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 self.log_message("starting zookeepers") | 
					
						
							|  |  |  |                 kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                 self.anonLogger.info("sleeping for 2s") | 
					
						
							|  |  |  |                 time.sleep(2) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |          | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 self.log_message("starting brokers") | 
					
						
							|  |  |  |                 kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 self.anonLogger.info("sleeping for 5s") | 
					
						
							|  |  |  |                 time.sleep(5) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 self.log_message("creating topics") | 
					
						
							|  |  |  |                 kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                 self.anonLogger.info("sleeping for 5s") | 
					
						
							|  |  |  |                 time.sleep(5) | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # start ConsoleConsumer if this is a Log Retention test                 | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 if logRetentionTest.lower() == "true": | 
					
						
							|  |  |  |                     self.log_message("starting consumer in the background") | 
					
						
							|  |  |  |                     kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                     time.sleep(1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # starting producer  | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 self.log_message("starting producer in the background") | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False) | 
					
						
							|  |  |  |                 msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] | 
					
						
							|  |  |  |                 self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") | 
					
						
							|  |  |  |                 time.sleep(int(msgProducingFreeTimeSec)) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # A while-loop to bounce leader as specified | 
					
						
							|  |  |  |                 # by "num_iterations" in testcase_n_properties.json | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 i = 1 | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                 numIterations    = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) | 
					
						
							|  |  |  |                 brokerType       = self.testcaseEnv.testcaseArgumentsDict["broker_type"] | 
					
						
							|  |  |  |                 bounceBrokerFlag = self.testcaseEnv.testcaseArgumentsDict["bounce_broker"] | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                  | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 while i <= numIterations: | 
					
						
							|  |  |  |                     self.log_message("Iteration " + str(i) + " of " + str(numIterations)) | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                     self.log_message("bounce_broker flag : " + bounceBrokerFlag) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     leaderDict            = None | 
					
						
							|  |  |  |                     controllerDict        = None | 
					
						
							|  |  |  |                     stoppedBrokerEntityId = "" | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                     # ============================================== | 
					
						
							|  |  |  |                     # Find out the entity id for the stopping broker | 
					
						
							|  |  |  |                     # ============================================== | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                     if brokerType == "leader" or brokerType == "follower": | 
					
						
							|  |  |  |                         self.log_message("looking up leader") | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                         leaderDict = kafka_system_test_utils.get_leader_elected_log_line(self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                         # ========================== | 
					
						
							|  |  |  |                         # leaderDict looks like this: | 
					
						
							|  |  |  |                         # ========================== | 
					
						
							|  |  |  |                         #{'entity_id': u'3', | 
					
						
							|  |  |  |                         # 'partition': '0', | 
					
						
							|  |  |  |                         # 'timestamp': 1345050255.8280001, | 
					
						
							|  |  |  |                         # 'hostname': u'localhost', | 
					
						
							|  |  |  |                         # 'topic': 'test_1', | 
					
						
							|  |  |  |                         # 'brokerid': '3'} | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                         if brokerType == "leader": | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                             stoppedBrokerEntityId = leaderDict["entity_id"] | 
					
						
							|  |  |  |                             self.log_message("Found leader with entity id: " + stoppedBrokerEntityId) | 
					
						
							|  |  |  |                         else: # Follower | 
					
						
							|  |  |  |                             self.log_message("looking up follower") | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                             # a list of all brokers | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                             brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts(self.systemTestEnv.clusterEntityConfigDictList, "role", "broker", "entity_id") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                             # we pick the first non-leader broker as the follower | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                             firstFollowerEntityId = None | 
					
						
							|  |  |  |                             for brokerEntityId in brokerEntityIdList: | 
					
						
							|  |  |  |                                 if brokerEntityId != leaderDict["entity_id"]: | 
					
						
							|  |  |  |                                     firstFollowerEntityId = brokerEntityId | 
					
						
							|  |  |  |                                     break | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                             stoppedBrokerEntityId  = firstFollowerEntityId | 
					
						
							|  |  |  |                             self.log_message("Found follower with entity id: " + stoppedBrokerEntityId) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                     elif brokerType == "controller": | 
					
						
							|  |  |  |                         self.log_message("looking up controller") | 
					
						
							|  |  |  |                         controllerDict = kafka_system_test_utils.get_controller_attributes(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                         # ========================== | 
					
						
							|  |  |  |                         # controllerDict looks like this: | 
					
						
							|  |  |  |                         # ========================== | 
					
						
							|  |  |  |                         #{'entity_id': u'3', | 
					
						
							|  |  |  |                         # 'timestamp': 1345050255.8280001, | 
					
						
							|  |  |  |                         # 'hostname': u'localhost', | 
					
						
							|  |  |  |                         # 'brokerid': '3'} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                         stoppedBrokerEntityId = controllerDict["entity_id"] | 
					
						
							|  |  |  |                         self.log_message("Found controller with entity id: " + stoppedBrokerEntityId) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     # ============================================= | 
					
						
							|  |  |  |                     # Bounce the broker | 
					
						
							|  |  |  |                     # ============================================= | 
					
						
							|  |  |  |                     if bounceBrokerFlag.lower() == "true": | 
					
						
							|  |  |  |                         if brokerType == "leader": | 
					
						
							|  |  |  |                             # validate to see if leader election is successful | 
					
						
							|  |  |  |                             self.log_message("validating leader election") | 
					
						
							|  |  |  |                             kafka_system_test_utils.validate_leader_election_successful(self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict) | 
					
						
							|  |  |  |                  | 
					
						
							|  |  |  |                             # trigger leader re-election by stopping leader to get re-election latency | 
					
						
							|  |  |  |                             reelectionLatency = kafka_system_test_utils.get_reelection_latency(self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict) | 
					
						
							|  |  |  |                             latencyKeyName = "Leader Election Latency - iter " + str(i) + " brokerid " + leaderDict["brokerid"] | 
					
						
							|  |  |  |                             self.testcaseEnv.validationStatusDict[latencyKeyName] = str("{0:.2f}".format(reelectionLatency * 1000)) + " ms" | 
					
						
							|  |  |  |                             self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"].append("{0:.2f}".format(reelectionLatency * 1000)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                         elif brokerType == "follower": | 
					
						
							|  |  |  |                             # stopping Follower | 
					
						
							|  |  |  |                             self.log_message("stopping follower with entity id: " + firstFollowerEntityId) | 
					
						
							|  |  |  |                             kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, firstFollowerEntityId, self.testcaseEnv.entityBrokerParentPidDict[firstFollowerEntityId]) | 
					
						
							|  |  |  |     | 
					
						
							|  |  |  |                         elif brokerType == "controller": | 
					
						
							|  |  |  |                             # stopping Controller | 
					
						
							|  |  |  |                             self.log_message("stopping controller : " + controllerDict["brokerid"]) | 
					
						
							|  |  |  |                             kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, controllerDict["entity_id"], self.testcaseEnv.entityBrokerParentPidDict[controllerDict["entity_id"]]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                         brokerDownTimeInSec = 5 | 
					
						
							|  |  |  |                         try: | 
					
						
							|  |  |  |                             brokerDownTimeInSec = int(self.testcaseEnv.testcaseArgumentsDict["broker_down_time_in_sec"]) | 
					
						
							|  |  |  |                         except: | 
					
						
							|  |  |  |                             pass # take default | 
					
						
							|  |  |  |                         time.sleep(brokerDownTimeInSec) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                         # starting previously terminated broker  | 
					
						
							|  |  |  |                         self.log_message("starting the previously terminated broker") | 
					
						
							|  |  |  |                         kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, stoppedBrokerEntityId) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         # GC Pause simulation | 
					
						
							|  |  |  |                         pauseTime = None  | 
					
						
							|  |  |  |                         try: | 
					
						
							|  |  |  |                             hostname  = leaderDict["hostname"] | 
					
						
							|  |  |  |                             pauseTime = self.testcaseEnv.testcaseArgumentsDict["pause_time_in_seconds"] | 
					
						
							|  |  |  |                             parentPid = self.testcaseEnv.entityBrokerParentPidDict[leaderDict["entity_id"]] | 
					
						
							|  |  |  |                             pidStack  = system_test_utils.get_remote_child_processes(hostname, parentPid) | 
					
						
							|  |  |  |                             system_test_utils.simulate_garbage_collection_pause_in_remote_process(hostname, pidStack, pauseTime) | 
					
						
							|  |  |  |                         except: | 
					
						
							|  |  |  |                             pass | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                     self.anonLogger.info("sleeping for 15s") | 
					
						
							|  |  |  |                     time.sleep(15) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                     i += 1 | 
					
						
							|  |  |  |                 # while loop | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                 # update Leader Election Latency MIN/MAX to testcaseEnv.validationStatusDict | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = None | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = \ | 
					
						
							|  |  |  |                         min(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) | 
					
						
							|  |  |  |                 except: | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = None | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = \ | 
					
						
							|  |  |  |                         max(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) | 
					
						
							|  |  |  |                 except: | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 # tell producer to stop | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 self.testcaseEnv.lock.acquire() | 
					
						
							|  |  |  |                 self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True | 
					
						
							|  |  |  |                 time.sleep(1) | 
					
						
							|  |  |  |                 self.testcaseEnv.lock.release() | 
					
						
							|  |  |  |                 time.sleep(1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # wait for producer thread's update of | 
					
						
							|  |  |  |                 # "backgroundProducerStopped" to be "True" | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                 while 1: | 
					
						
							|  |  |  |                     self.testcaseEnv.lock.acquire() | 
					
						
							|  |  |  |                     self.logger.info("status of backgroundProducerStopped : [" + \ | 
					
						
							|  |  |  |                         str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) | 
					
						
							|  |  |  |                     if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: | 
					
						
							|  |  |  |                         time.sleep(1) | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                         self.logger.info("all producer threads completed", extra=self.d) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                         break | 
					
						
							|  |  |  |                     time.sleep(1) | 
					
						
							|  |  |  |                     self.testcaseEnv.lock.release() | 
					
						
							|  |  |  |                     time.sleep(2) | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                 # collect logs from remote hosts to find the | 
					
						
							|  |  |  |                 # minimum common offset of a certain log | 
					
						
							|  |  |  |                 # segment file among all replicas | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 minStartingOffsetDict = None | 
					
						
							|  |  |  |                 if logRetentionTest.lower() == "true": | 
					
						
							|  |  |  |                     self.anonLogger.info("sleeping for 10s before collecting logs") | 
					
						
							|  |  |  |                     time.sleep(10) | 
					
						
							|  |  |  |                     kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     minStartingOffsetDict = kafka_system_test_utils.getMinCommonStartingOffset(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                     print | 
					
						
							|  |  |  |                     pprint.pprint(minStartingOffsetDict) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # starting debug consumer | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-10-25 04:56:16 +08:00
										 |  |  |                 self.log_message("starting debug consumers in the background") | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  |                 kafka_system_test_utils.start_simple_consumer(self.systemTestEnv, self.testcaseEnv, minStartingOffsetDict) | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  |                 self.anonLogger.info("sleeping for 10s") | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  |                 time.sleep(10) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  |                      | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # this testcase is completed - stop all entities | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 self.log_message("stopping all entities") | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): | 
					
						
							|  |  |  |                     kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                     kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) | 
					
						
							| 
									
										
										
										
											2012-09-15 01:20:04 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # make sure all entities are stopped | 
					
						
							|  |  |  |                 kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # collect logs from remote hosts | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |      | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 # validate the data matched and checksum | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							|  |  |  |                 self.log_message("validating data matched") | 
					
						
							| 
									
										
										
										
											2012-11-10 06:58:23 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 if logRetentionTest.lower() == "true": | 
					
						
							|  |  |  |                     kafka_system_test_utils.validate_simple_consumer_data_matched_across_replicas(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                     kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     #kafka_system_test_utils.validate_simple_consumer_data_matched(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                     kafka_system_test_utils.validate_simple_consumer_data_matched_across_replicas(self.systemTestEnv, self.testcaseEnv) | 
					
						
							|  |  |  |                     kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  |                 # draw graphs | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 # ============================================= | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  |                 metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME,  | 
					
						
							|  |  |  |                                         self.testcaseEnv,  | 
					
						
							|  |  |  |                                         self.systemTestEnv.clusterEntityConfigDictList) | 
					
						
							|  |  |  |                  | 
					
						
							|  |  |  |                 # build dashboard, one for each role | 
					
						
							|  |  |  |                 metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME, | 
					
						
							|  |  |  |                                              self.testcaseEnv.testCaseDashboardsDir, | 
					
						
							|  |  |  |                                              self.systemTestEnv.clusterEntityConfigDictList) | 
					
						
							| 
									
										
										
										
											2012-08-23 01:16:26 +08:00
										 |  |  |             except Exception as e: | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  |                 self.log_message("Exception while running test {0}".format(e)) | 
					
						
							|  |  |  |                 traceback.print_exc() | 
					
						
							| 
									
										
										
										
											2012-08-31 07:47:55 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             finally: | 
					
						
							| 
									
										
										
										
											2012-10-11 00:56:57 +08:00
										 |  |  |                 if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: | 
					
						
							|  |  |  |                     self.log_message("stopping all entities - please wait ...") | 
					
						
							|  |  |  |                     kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) | 
					
						
							| 
									
										
										
										
											2012-08-30 02:07:58 +08:00
										 |  |  | 
 |