Fix multi-indexing runs for checkVec (#130408)

We now can do multiple query time parameters, but this sort of broke
running multiple indexing loads. 

This corrects multiple indexing & search tests for the KnnIndexTester:

Example output.

```
index_name                           index_type  num_docs  index_time(ms)  force_merge_time(ms)  num_segments
-----------------------------------  ----------  --------  --------------  --------------------  ------------
corpus-quora-E5-small.fvec.flat             ivf    100000            4386                  4522             0
cohere-wikipedia-docs-768d.vec              ivf    100000            7784                  8331             0
corpus-dbpedia-entity-arctic-0.fvec         ivf    100000            7513                  8969             0

index_name                           index_type  n_probe  latency(ms)  net_cpu_time(ms)  avg_cpu_count      QPS  recall   visited
-----------------------------------  ----------  -------  -----------  ----------------  -------------  -------  ------  --------
corpus-quora-E5-small.fvec.flat             ivf       10         0.87              0.00           0.00  1149.43    0.88   5511.60
corpus-quora-E5-small.fvec.flat             ivf       20         0.73              0.00           0.00  1369.86    0.94  10101.70
corpus-quora-E5-small.fvec.flat             ivf       30         0.76              0.00           0.00  1315.79    0.95  14382.15
corpus-quora-E5-small.fvec.flat             ivf       40         0.84              0.00           0.00  1190.48    0.96  18554.68
corpus-quora-E5-small.fvec.flat             ivf       50         0.99              0.00           0.00  1010.10    0.97  22609.69
cohere-wikipedia-docs-768d.vec              ivf       10         0.39              0.00           0.00  2564.10    0.61   2567.00
cohere-wikipedia-docs-768d.vec              ivf       20         0.53              0.00           0.00  1886.79    0.78   5017.25
cohere-wikipedia-docs-768d.vec              ivf       30         0.66              0.00           0.00  1515.15    0.84   7486.77
cohere-wikipedia-docs-768d.vec              ivf       40         0.80              0.00           0.00  1250.00    0.88  10177.80
cohere-wikipedia-docs-768d.vec              ivf       50         0.95              0.00           0.00  1052.63    0.90  12744.67
corpus-dbpedia-entity-arctic-0.fvec         ivf       10         0.48              0.00           0.00  2083.33    0.62   3302.07
corpus-dbpedia-entity-arctic-0.fvec         ivf       20         0.66              0.00           0.00  1515.15    0.74   6333.91
corpus-dbpedia-entity-arctic-0.fvec         ivf       30         0.78              0.00           0.00  1282.05    0.81   9545.16
corpus-dbpedia-entity-arctic-0.fvec         ivf       40         0.93              0.00           0.00  1075.27    0.85  12786.62
corpus-dbpedia-entity-arctic-0.fvec         ivf       50         1.08              0.00           0.00   925.93    0.87  15749.40
```
This commit is contained in:
Benjamin Trent 2025-07-01 15:58:32 -04:00 committed by GitHub
parent 2144baeb8c
commit c1a4f8ae68
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 42 additions and 35 deletions

View File

@ -177,9 +177,11 @@ public class KnnIndexTester {
int[] nProbes = cmdLineArgs.indexType().equals(IndexType.IVF) && cmdLineArgs.numQueries() > 0
? cmdLineArgs.nProbes()
: new int[] { 0 };
String indexType = cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT);
Results indexResults = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
Results[] results = new Results[nProbes.length];
for (int i = 0; i < nProbes.length; i++) {
results[i] = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs());
results[i] = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
}
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
Codec codec = createCodec(cmdLineArgs);
@ -199,12 +201,12 @@ public class KnnIndexTester {
throw new IllegalArgumentException("Index path does not exist: " + indexPath);
}
if (cmdLineArgs.reindex()) {
knnIndexer.createIndex(results[0]);
knnIndexer.createIndex(indexResults);
}
if (cmdLineArgs.forceMerge()) {
knnIndexer.forceMerge(results[0]);
knnIndexer.forceMerge(indexResults);
} else {
knnIndexer.numSegments(results[0]);
knnIndexer.numSegments(indexResults);
}
}
if (cmdLineArgs.queryVectors() != null && cmdLineArgs.numQueries() > 0) {
@ -214,24 +216,27 @@ public class KnnIndexTester {
knnSearcher.runSearch(results[i], cmdLineArgs.earlyTermination());
}
}
formattedResults.results.addAll(List.of(results));
formattedResults.queryResults.addAll(List.of(results));
formattedResults.indexResults.add(indexResults);
}
logger.info("Results: \n" + formattedResults);
}
static class FormattedResults {
List<Results> results = new ArrayList<>();
List<Results> indexResults = new ArrayList<>();
List<Results> queryResults = new ArrayList<>();
@Override
public String toString() {
if (results.isEmpty()) {
if (indexResults.isEmpty() && queryResults.isEmpty()) {
return "No results available.";
}
String[] indexingHeaders = { "index_type", "num_docs", "index_time(ms)", "force_merge_time(ms)", "num_segments" };
String[] indexingHeaders = { "index_name", "index_type", "num_docs", "index_time(ms)", "force_merge_time(ms)", "num_segments" };
// Define column headers
String[] searchHeaders = {
"index_name",
"index_type",
"n_probe",
"latency(ms)",
@ -245,33 +250,34 @@ public class KnnIndexTester {
StringBuilder sb = new StringBuilder();
Results indexResult = results.get(0); // Assuming all results have the same index type and numDocs
String[] indexData = {
indexResult.indexType,
Integer.toString(indexResult.numDocs),
Long.toString(indexResult.indexTimeMS),
Long.toString(indexResult.forceMergeTimeMS),
Integer.toString(indexResult.numSegments) };
printBlock(sb, indexingHeaders, new String[][] { indexData });
String[][] searchData = new String[results.size()][];
// Format and append each row of data
for (int i = 0; i < results.size(); i++) {
Results result = results.get(i);
searchData[i] = new String[] {
result.indexType,
Integer.toString(result.nProbe),
String.format(Locale.ROOT, "%.2f", result.avgLatency),
String.format(Locale.ROOT, "%.2f", result.netCpuTimeMS),
String.format(Locale.ROOT, "%.2f", result.avgCpuCount),
String.format(Locale.ROOT, "%.2f", result.qps),
String.format(Locale.ROOT, "%.2f", result.avgRecall),
String.format(Locale.ROOT, "%.2f", result.averageVisited) };
String[][] indexResultsArray = new String[indexResults.size()][];
for (int i = 0; i < indexResults.size(); i++) {
Results indexResult = indexResults.get(i);
indexResultsArray[i] = new String[] {
indexResult.indexName,
indexResult.indexType,
Integer.toString(indexResult.numDocs),
Long.toString(indexResult.indexTimeMS),
Long.toString(indexResult.forceMergeTimeMS),
Integer.toString(indexResult.numSegments) };
}
printBlock(sb, indexingHeaders, indexResultsArray);
String[][] queryResultsArray = new String[queryResults.size()][];
for (int i = 0; i < queryResults.size(); i++) {
Results queryResult = queryResults.get(i);
queryResultsArray[i] = new String[] {
queryResult.indexName,
queryResult.indexType,
Integer.toString(queryResult.nProbe),
String.format(Locale.ROOT, "%.2f", queryResult.avgLatency),
String.format(Locale.ROOT, "%.2f", queryResult.netCpuTimeMS),
String.format(Locale.ROOT, "%.2f", queryResult.avgCpuCount),
String.format(Locale.ROOT, "%.2f", queryResult.qps),
String.format(Locale.ROOT, "%.2f", queryResult.avgRecall),
String.format(Locale.ROOT, "%.2f", queryResult.averageVisited) };
}
printBlock(sb, searchHeaders, searchData);
printBlock(sb, searchHeaders, queryResultsArray);
return sb.toString();
}
@ -331,7 +337,7 @@ public class KnnIndexTester {
}
static class Results {
final String indexType;
final String indexType, indexName;
final int numDocs;
long indexTimeMS;
long forceMergeTimeMS;
@ -344,7 +350,8 @@ public class KnnIndexTester {
double netCpuTimeMS;
double avgCpuCount;
Results(String indexType, int numDocs) {
Results(String indexName, String indexType, int numDocs) {
this.indexName = indexName;
this.indexType = indexType;
this.numDocs = numDocs;
}