mirror of https://github.com/apache/kafka.git
KAFKA-18307: Don't report on disabled/removed tests (#18318)
This patch ensures we only analyze trunk builds for the Develocity report. Some other minor fixes are also included Reviewers: David Arthur <mumrah@gmail.com>
This commit is contained in:
parent
9818e9db26
commit
e546b0225b
|
@ -204,7 +204,7 @@ class TestAnalyzer:
|
||||||
Returns:
|
Returns:
|
||||||
A formatted query string.
|
A formatted query string.
|
||||||
"""
|
"""
|
||||||
return f'project:{project} buildStartTime:[{chunk_start.isoformat()} TO {chunk_end.isoformat()}] gradle.requestedTasks:{test_type}'
|
return f'project:{project} buildStartTime:[{chunk_start.isoformat()} TO {chunk_end.isoformat()}] gradle.requestedTasks:{test_type} tag:github tag:trunk'
|
||||||
|
|
||||||
def process_chunk(
|
def process_chunk(
|
||||||
self,
|
self,
|
||||||
|
@ -212,7 +212,7 @@ class TestAnalyzer:
|
||||||
chunk_end: datetime,
|
chunk_end: datetime,
|
||||||
project: str,
|
project: str,
|
||||||
test_type: str,
|
test_type: str,
|
||||||
remaining_build_ids: set,
|
remaining_build_ids: set | None,
|
||||||
max_builds_per_request: int
|
max_builds_per_request: int
|
||||||
) -> Dict[str, BuildInfo]:
|
) -> Dict[str, BuildInfo]:
|
||||||
"""Helper method to process a single chunk of build information"""
|
"""Helper method to process a single chunk of build information"""
|
||||||
|
@ -225,7 +225,7 @@ class TestAnalyzer:
|
||||||
from_build = None
|
from_build = None
|
||||||
continue_chunk = True
|
continue_chunk = True
|
||||||
|
|
||||||
while continue_chunk and remaining_build_ids:
|
while continue_chunk and (remaining_build_ids is None or remaining_build_ids):
|
||||||
query_params = {
|
query_params = {
|
||||||
'query': query,
|
'query': query,
|
||||||
'models': ['gradle-attributes'],
|
'models': ['gradle-attributes'],
|
||||||
|
@ -273,7 +273,7 @@ class TestAnalyzer:
|
||||||
continue_chunk = False
|
continue_chunk = False
|
||||||
break
|
break
|
||||||
|
|
||||||
if build_id in remaining_build_ids:
|
if remaining_build_ids is None or build_id in remaining_build_ids:
|
||||||
if 'problem' not in gradle_attrs:
|
if 'problem' not in gradle_attrs:
|
||||||
chunk_builds[build_id] = BuildInfo(
|
chunk_builds[build_id] = BuildInfo(
|
||||||
id=build_id,
|
id=build_id,
|
||||||
|
@ -281,6 +281,8 @@ class TestAnalyzer:
|
||||||
duration=attrs.get('buildDuration'),
|
duration=attrs.get('buildDuration'),
|
||||||
has_failed=attrs.get('hasFailed', False)
|
has_failed=attrs.get('hasFailed', False)
|
||||||
)
|
)
|
||||||
|
if remaining_build_ids is not None:
|
||||||
|
remaining_build_ids.remove(build_id)
|
||||||
|
|
||||||
if continue_chunk and response_json:
|
if continue_chunk and response_json:
|
||||||
from_build = response_json[-1]['id']
|
from_build = response_json[-1]['id']
|
||||||
|
@ -290,38 +292,47 @@ class TestAnalyzer:
|
||||||
time.sleep(0.5) # Rate limiting between pagination requests
|
time.sleep(0.5) # Rate limiting between pagination requests
|
||||||
|
|
||||||
return chunk_builds
|
return chunk_builds
|
||||||
|
def get_build_info(self, build_ids: List[str] = None, project: str = None, test_type: str = None, query_days: int = None, bypass_cache: bool = False, fetch_all: bool = False) -> Dict[str, BuildInfo]:
|
||||||
def get_build_info(self, build_ids: List[str], project: str, test_type: str, query_days: int) -> Dict[str, BuildInfo]:
|
|
||||||
builds = {}
|
builds = {}
|
||||||
max_builds_per_request = 100
|
max_builds_per_request = 100
|
||||||
cutoff_date = datetime.now(pytz.UTC) - timedelta(days=query_days)
|
cutoff_date = datetime.now(pytz.UTC) - timedelta(days=query_days)
|
||||||
|
current_time = datetime.now(pytz.UTC)
|
||||||
|
|
||||||
# Get builds from cache if available
|
if not fetch_all and not build_ids:
|
||||||
if self.build_cache:
|
raise ValueError("Either build_ids must be provided or fetch_all must be True")
|
||||||
|
|
||||||
|
# Get builds from cache if available and bypass_cache is False
|
||||||
|
if not bypass_cache and self.build_cache:
|
||||||
cached_builds = self.build_cache.builds
|
cached_builds = self.build_cache.builds
|
||||||
cached_cutoff = self.build_cache.last_update - timedelta(days=query_days)
|
cached_cutoff = self.build_cache.last_update - timedelta(days=query_days)
|
||||||
|
|
||||||
# Use cached data for builds within the cache period
|
if fetch_all:
|
||||||
for build_id in build_ids:
|
# Use all cached builds within the time period
|
||||||
if build_id in cached_builds:
|
for build_id, build in cached_builds.items():
|
||||||
build = cached_builds[build_id]
|
|
||||||
if build.timestamp >= cached_cutoff:
|
if build.timestamp >= cached_cutoff:
|
||||||
builds[build_id] = build
|
builds[build_id] = build
|
||||||
|
else:
|
||||||
|
# Use cached data for specific builds within the cache period
|
||||||
|
for build_id in build_ids:
|
||||||
|
if build_id in cached_builds:
|
||||||
|
build = cached_builds[build_id]
|
||||||
|
if build.timestamp >= cached_cutoff:
|
||||||
|
builds[build_id] = build
|
||||||
|
|
||||||
# Update cutoff date to only fetch new data
|
# Update cutoff date to only fetch new data
|
||||||
cutoff_date = self.build_cache.last_update
|
cutoff_date = self.build_cache.last_update
|
||||||
logger.info(f"Using cached data up to {cutoff_date.isoformat()}")
|
logger.info(f"Using cached data up to {cutoff_date.isoformat()}")
|
||||||
|
|
||||||
# Remove already found builds from the search list
|
if not fetch_all:
|
||||||
build_ids = [bid for bid in build_ids if bid not in builds]
|
# Remove already found builds from the search list
|
||||||
|
build_ids = [bid for bid in build_ids if bid not in builds]
|
||||||
|
|
||||||
if not build_ids:
|
if not build_ids:
|
||||||
logger.info("All builds found in cache")
|
logger.info("All builds found in cache")
|
||||||
return builds
|
return builds
|
||||||
|
|
||||||
# Fetch remaining builds from API
|
# Fetch remaining builds from API
|
||||||
remaining_build_ids = set(build_ids)
|
remaining_build_ids = set(build_ids) if not fetch_all else None
|
||||||
current_time = datetime.now(pytz.UTC)
|
|
||||||
chunk_size = self.default_chunk_size
|
chunk_size = self.default_chunk_size
|
||||||
|
|
||||||
# Create time chunks
|
# Create time chunks
|
||||||
|
@ -343,7 +354,7 @@ class TestAnalyzer:
|
||||||
chunk[1],
|
chunk[1],
|
||||||
project,
|
project,
|
||||||
test_type,
|
test_type,
|
||||||
remaining_build_ids.copy(),
|
remaining_build_ids.copy() if remaining_build_ids else None,
|
||||||
max_builds_per_request
|
max_builds_per_request
|
||||||
): chunk for chunk in chunks
|
): chunk for chunk in chunks
|
||||||
}
|
}
|
||||||
|
@ -352,7 +363,8 @@ class TestAnalyzer:
|
||||||
try:
|
try:
|
||||||
chunk_builds = future.result()
|
chunk_builds = future.result()
|
||||||
builds.update(chunk_builds)
|
builds.update(chunk_builds)
|
||||||
remaining_build_ids -= set(chunk_builds.keys())
|
if remaining_build_ids:
|
||||||
|
remaining_build_ids -= set(chunk_builds.keys())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Chunk processing generated an exception: {str(e)}")
|
logger.error(f"Chunk processing generated an exception: {str(e)}")
|
||||||
|
|
||||||
|
@ -361,11 +373,11 @@ class TestAnalyzer:
|
||||||
f"\nBuild Info Performance:"
|
f"\nBuild Info Performance:"
|
||||||
f"\n Total Duration: {total_duration:.2f}s"
|
f"\n Total Duration: {total_duration:.2f}s"
|
||||||
f"\n Builds Retrieved: {len(builds)}"
|
f"\n Builds Retrieved: {len(builds)}"
|
||||||
f"\n Builds Not Found: {len(remaining_build_ids)}"
|
f"\n Builds Not Found: {len(remaining_build_ids) if remaining_build_ids else 0}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update cache with new data
|
# Update cache with new data if not bypassing cache
|
||||||
if builds:
|
if builds and not bypass_cache:
|
||||||
if not self.build_cache:
|
if not self.build_cache:
|
||||||
self.build_cache = BuildCache(current_time, {})
|
self.build_cache = BuildCache(current_time, {})
|
||||||
self.build_cache.builds.update(builds)
|
self.build_cache.builds.update(builds)
|
||||||
|
@ -373,7 +385,6 @@ class TestAnalyzer:
|
||||||
self._save_cache()
|
self._save_cache()
|
||||||
|
|
||||||
return builds
|
return builds
|
||||||
|
|
||||||
def get_test_results(self, project: str, threshold_days: int, test_type: str = "quarantinedTest",
|
def get_test_results(self, project: str, threshold_days: int, test_type: str = "quarantinedTest",
|
||||||
outcomes: List[str] = None) -> List[TestResult]:
|
outcomes: List[str] = None) -> List[TestResult]:
|
||||||
"""Fetch test results with timeline information"""
|
"""Fetch test results with timeline information"""
|
||||||
|
@ -465,6 +476,11 @@ class TestAnalyzer:
|
||||||
result.timeline = sorted(timeline, key=lambda x: x.timestamp)
|
result.timeline = sorted(timeline, key=lambda x: x.timestamp)
|
||||||
logger.debug(f"Final timeline entries for {test_name}: {len(result.timeline)}")
|
logger.debug(f"Final timeline entries for {test_name}: {len(result.timeline)}")
|
||||||
|
|
||||||
|
# Print build details for debugging
|
||||||
|
logger.debug("Timeline entries:")
|
||||||
|
for entry in timeline:
|
||||||
|
logger.debug(f"Build ID: {entry.build_id}, Timestamp: {entry.timestamp}, Outcome: {entry.outcome}")
|
||||||
|
|
||||||
# Calculate recent failure rate
|
# Calculate recent failure rate
|
||||||
recent_cutoff = datetime.now(pytz.UTC) - timedelta(days=30)
|
recent_cutoff = datetime.now(pytz.UTC) - timedelta(days=30)
|
||||||
recent_runs = [t for t in timeline if t.timestamp >= recent_cutoff]
|
recent_runs = [t for t in timeline if t.timestamp >= recent_cutoff]
|
||||||
|
@ -768,25 +784,52 @@ class TestAnalyzer:
|
||||||
|
|
||||||
return cleared_tests
|
return cleared_tests
|
||||||
|
|
||||||
def get_develocity_class_link(class_name: str, threshold_days: int) -> str:
|
def update_cache(self, builds: Dict[str, BuildInfo]):
|
||||||
|
"""
|
||||||
|
Update the build cache with new build information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
builds: Dictionary of build IDs to BuildInfo objects
|
||||||
|
"""
|
||||||
|
current_time = datetime.now(pytz.UTC)
|
||||||
|
|
||||||
|
# Initialize cache if it doesn't exist
|
||||||
|
if not self.build_cache:
|
||||||
|
self.build_cache = BuildCache(current_time, {})
|
||||||
|
|
||||||
|
# Update builds and last update time
|
||||||
|
self.build_cache.builds.update(builds)
|
||||||
|
self.build_cache.last_update = current_time
|
||||||
|
|
||||||
|
# Save to all cache providers
|
||||||
|
self._save_cache()
|
||||||
|
|
||||||
|
logger.info(f"Updated cache with {len(builds)} builds")
|
||||||
|
|
||||||
|
def get_develocity_class_link(class_name: str, threshold_days: int, test_type: str = None) -> str:
|
||||||
"""
|
"""
|
||||||
Generate Develocity link for a test class
|
Generate Develocity link for a test class
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
class_name: Name of the test class
|
class_name: Name of the test class
|
||||||
threshold_days: Number of days to look back in search
|
threshold_days: Number of days to look back in search
|
||||||
|
test_type: Type of test (e.g., "quarantinedTest", "test")
|
||||||
"""
|
"""
|
||||||
base_url = "https://ge.apache.org/scans/tests"
|
base_url = "https://ge.apache.org/scans/tests"
|
||||||
params = {
|
params = {
|
||||||
"search.rootProjectNames": "kafka",
|
"search.rootProjectNames": "kafka",
|
||||||
"search.tags": "github,trunk",
|
"search.tags": "github,trunk",
|
||||||
"search.timeZoneId": "America/New_York",
|
"search.timeZoneId": "UTC",
|
||||||
"search.relativeStartTime": f"P{threshold_days}D",
|
"search.relativeStartTime": f"P{threshold_days}D",
|
||||||
"tests.container": class_name
|
"tests.container": class_name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if test_type:
|
||||||
|
params["search.tasks"] = test_type
|
||||||
|
|
||||||
return f"{base_url}?{'&'.join(f'{k}={requests.utils.quote(str(v))}' for k, v in params.items())}"
|
return f"{base_url}?{'&'.join(f'{k}={requests.utils.quote(str(v))}' for k, v in params.items())}"
|
||||||
|
|
||||||
def get_develocity_method_link(class_name: str, method_name: str, threshold_days: int) -> str:
|
def get_develocity_method_link(class_name: str, method_name: str, threshold_days: int, test_type: str = None) -> str:
|
||||||
"""
|
"""
|
||||||
Generate Develocity link for a test method
|
Generate Develocity link for a test method
|
||||||
|
|
||||||
|
@ -794,6 +837,7 @@ def get_develocity_method_link(class_name: str, method_name: str, threshold_days
|
||||||
class_name: Name of the test class
|
class_name: Name of the test class
|
||||||
method_name: Name of the test method
|
method_name: Name of the test method
|
||||||
threshold_days: Number of days to look back in search
|
threshold_days: Number of days to look back in search
|
||||||
|
test_type: Type of test (e.g., "quarantinedTest", "test")
|
||||||
"""
|
"""
|
||||||
base_url = "https://ge.apache.org/scans/tests"
|
base_url = "https://ge.apache.org/scans/tests"
|
||||||
|
|
||||||
|
@ -804,14 +848,18 @@ def get_develocity_method_link(class_name: str, method_name: str, threshold_days
|
||||||
params = {
|
params = {
|
||||||
"search.rootProjectNames": "kafka",
|
"search.rootProjectNames": "kafka",
|
||||||
"search.tags": "github,trunk",
|
"search.tags": "github,trunk",
|
||||||
"search.timeZoneId": "America/New_York",
|
"search.timeZoneId": "UTC",
|
||||||
"search.relativeStartTime": f"P{threshold_days}D",
|
"search.relativeStartTime": f"P{threshold_days}D",
|
||||||
"tests.container": class_name,
|
"tests.container": class_name,
|
||||||
"tests.test": method_name
|
"tests.test": method_name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if test_type:
|
||||||
|
params["search.tasks"] = test_type
|
||||||
|
|
||||||
return f"{base_url}?{'&'.join(f'{k}={requests.utils.quote(str(v))}' for k, v in params.items())}"
|
return f"{base_url}?{'&'.join(f'{k}={requests.utils.quote(str(v))}' for k, v in params.items())}"
|
||||||
|
|
||||||
def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_days: int):
|
def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_days: int, test_type: str = None):
|
||||||
"""Print a summary of the most problematic tests"""
|
"""Print a summary of the most problematic tests"""
|
||||||
print("\n## Most Problematic Tests")
|
print("\n## Most Problematic Tests")
|
||||||
if not problematic_tests:
|
if not problematic_tests:
|
||||||
|
@ -827,7 +875,7 @@ def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_d
|
||||||
for test_name, details in sorted(problematic_tests.items(),
|
for test_name, details in sorted(problematic_tests.items(),
|
||||||
key=lambda x: x[1]['failure_rate'],
|
key=lambda x: x[1]['failure_rate'],
|
||||||
reverse=True):
|
reverse=True):
|
||||||
class_link = get_develocity_class_link(test_name, threshold_days)
|
class_link = get_develocity_class_link(test_name, threshold_days, test_type=test_type)
|
||||||
print(f"<tr><td colspan=\"4\">{test_name}</td><td><a href=\"{class_link}\">↗️</a></td></tr>")
|
print(f"<tr><td colspan=\"4\">{test_name}</td><td><a href=\"{class_link}\">↗️</a></td></tr>")
|
||||||
|
|
||||||
for test_case in sorted(details['test_cases'],
|
for test_case in sorted(details['test_cases'],
|
||||||
|
@ -836,7 +884,7 @@ def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_d
|
||||||
reverse=True):
|
reverse=True):
|
||||||
method_name = test_case.name.split('.')[-1]
|
method_name = test_case.name.split('.')[-1]
|
||||||
if method_name != 'N/A':
|
if method_name != 'N/A':
|
||||||
method_link = get_develocity_method_link(test_name, test_case.name, threshold_days)
|
method_link = get_develocity_method_link(test_name, test_case.name, threshold_days, test_type="quarantinedTest")
|
||||||
total_runs = test_case.outcome_distribution.total
|
total_runs = test_case.outcome_distribution.total
|
||||||
failure_rate = (test_case.outcome_distribution.failed + test_case.outcome_distribution.flaky) / total_runs if total_runs > 0 else 0
|
failure_rate = (test_case.outcome_distribution.failed + test_case.outcome_distribution.flaky) / total_runs if total_runs > 0 else 0
|
||||||
print(f"<tr><td></td><td>{method_name}</td>"
|
print(f"<tr><td></td><td>{method_name}</td>"
|
||||||
|
@ -925,7 +973,7 @@ def print_flaky_regressions(flaky_regressions: Dict[str, Dict], threshold_days:
|
||||||
|
|
||||||
print("</details>")
|
print("</details>")
|
||||||
|
|
||||||
def print_cleared_tests(cleared_tests: Dict[str, Dict], threshold_days: int):
|
def print_cleared_tests(cleared_tests: Dict[str, Dict], threshold_days: int, test_type: str = None):
|
||||||
"""Print tests that are ready to be unquarantined"""
|
"""Print tests that are ready to be unquarantined"""
|
||||||
print("\n## Cleared Tests (Ready for Unquarantine)")
|
print("\n## Cleared Tests (Ready for Unquarantine)")
|
||||||
if not cleared_tests:
|
if not cleared_tests:
|
||||||
|
@ -945,7 +993,7 @@ def print_cleared_tests(cleared_tests: Dict[str, Dict], threshold_days: int):
|
||||||
for test_name, details in sorted(cleared_tests.items(),
|
for test_name, details in sorted(cleared_tests.items(),
|
||||||
key=lambda x: x[1]['success_rate'],
|
key=lambda x: x[1]['success_rate'],
|
||||||
reverse=True):
|
reverse=True):
|
||||||
class_link = get_develocity_class_link(test_name, threshold_days)
|
class_link = get_develocity_class_link(test_name, threshold_days, test_type=test_type)
|
||||||
print(f"<tr><td colspan=\"5\">{test_name}</td><td><a href=\"{class_link}\">↗️</a></td></tr>")
|
print(f"<tr><td colspan=\"5\">{test_name}</td><td><a href=\"{class_link}\">↗️</a></td></tr>")
|
||||||
print(f"<tr><td></td><td>Class Overall</td>"
|
print(f"<tr><td></td><td>Class Overall</td>"
|
||||||
f"<td>{details['success_rate']:.2%}</td>"
|
f"<td>{details['success_rate']:.2%}</td>"
|
||||||
|
@ -1015,6 +1063,12 @@ def main():
|
||||||
analyzer = TestAnalyzer(BASE_URL, token)
|
analyzer = TestAnalyzer(BASE_URL, token)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
quarantined_builds = analyzer.get_build_info([], PROJECT, "quarantinedTest", 7, bypass_cache=True, fetch_all=True)
|
||||||
|
regular_builds = analyzer.get_build_info([], PROJECT, "test", 7, bypass_cache=True, fetch_all=True)
|
||||||
|
|
||||||
|
analyzer.update_cache(quarantined_builds)
|
||||||
|
analyzer.update_cache(regular_builds)
|
||||||
|
|
||||||
# Get test results
|
# Get test results
|
||||||
quarantined_results = analyzer.get_test_results(
|
quarantined_results = analyzer.get_test_results(
|
||||||
PROJECT,
|
PROJECT,
|
||||||
|
@ -1054,9 +1108,9 @@ def main():
|
||||||
print(f"This report was run on {datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
print(f"This report was run on {datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
||||||
|
|
||||||
# Print each section
|
# Print each section
|
||||||
print_most_problematic_tests(problematic_tests, QUARANTINE_THRESHOLD_DAYS)
|
print_most_problematic_tests(problematic_tests, QUARANTINE_THRESHOLD_DAYS, test_type="quarantinedTest")
|
||||||
print_flaky_regressions(flaky_regressions, QUARANTINE_THRESHOLD_DAYS)
|
print_flaky_regressions(flaky_regressions, QUARANTINE_THRESHOLD_DAYS)
|
||||||
print_cleared_tests(cleared_tests, QUARANTINE_THRESHOLD_DAYS)
|
print_cleared_tests(cleared_tests, QUARANTINE_THRESHOLD_DAYS, test_type="quarantinedTest")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Error occurred during report generation")
|
logger.exception("Error occurred during report generation")
|
||||||
|
|
Loading…
Reference in New Issue