ESQL: Pragma to load from stored fields (#122891)

This creates a `pragma` you can use to request that fields load from a
stored field rather than doc values. It implements that pragma for
`keyword` and number fields.

We expect that, for some disk configuration and some number of fields,
that it's faster to load those fields from _source or stored fields than
it is to use doc values. Our default is doc values and on my laptop it's
*always* faster to use doc values. But we don't ship my laptop to every
cluster.

This will let us experiment and debug slow queries by trying to load
fields a different way.

You access this pragma with:
```
curl -HContent-Type:application/json -XPOST localhost:9200/_query?pretty -d '{
    "query": "FROM foo",
    "pragma": {
        "field_extract_preference": "STORED"
    }
}'
```

On a release build you'll need to add `"accept_pragma_risks": true`.
This commit is contained in:
Nik Everett 2025-03-12 09:40:42 -04:00 committed by GitHub
parent 690454000c
commit 50aaa1c2a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 319 additions and 86 deletions

View File

@ -0,0 +1,5 @@
pr: 122891
summary: Pragma to load from stored fields
area: ES|QL
type: enhancement
issues: []

View File

@ -320,7 +320,7 @@ public class ScaledFloatFieldMapper extends FieldMapper {
// Counters are not supported by ESQL so we load them in null
return BlockLoader.CONSTANT_NULLS;
}
if (hasDocValues()) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l / scalingFactor);
}
if (isSyntheticSource) {
@ -333,7 +333,8 @@ public class ScaledFloatFieldMapper extends FieldMapper {
}
ValueFetcher valueFetcher = sourceValueFetcher(blContext.sourcePaths(name()));
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
BlockSourceReader.LeafIteratorLookup lookup = hasDocValues() == false && (isStored() || isIndexed())
// We only write the field names field if there aren't doc values or norms
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return new BlockSourceReader.DoublesBlockLoader(valueFetcher, lookup);

View File

@ -18,8 +18,8 @@ import java.util.List;
import java.util.Map;
public class ScaledFloatFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Double> {
public ScaledFloatFieldBlockLoaderTests() {
super(FieldType.SCALED_FLOAT);
public ScaledFloatFieldBlockLoaderTests(Params params) {
super(FieldType.SCALED_FLOAT, params);
}
@Override

View File

@ -748,7 +748,7 @@ public final class KeywordFieldMapper extends FieldMapper {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues()) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name());
}
if (isStored()) {
@ -806,7 +806,8 @@ public final class KeywordFieldMapper extends FieldMapper {
if (getTextSearchInfo().hasNorms()) {
return BlockSourceReader.lookupFromNorms(name());
}
if (isIndexed() || isStored()) {
if (hasDocValues() == false && (isIndexed() || isStored())) {
// We only write the field names field if there aren't doc values or norms
return BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());
}
return BlockSourceReader.lookupMatchingAll();

View File

@ -692,8 +692,13 @@ public abstract class MappedFieldType {
/**
* No preference. Leave the choice of where to load the field from up to the FieldType.
*/
NONE;
NONE,
/**
* Prefer loading from stored fields like {@code _source} because we're
* loading many fields. The {@link MappedFieldType} can chose a different
* method to load the field if it needs to.
*/
STORED;
}
/**

View File

@ -1912,7 +1912,7 @@ public class NumberFieldMapper extends FieldMapper {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues()) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
return type.blockLoaderFromDocValues(name());
}
@ -1920,7 +1920,8 @@ public class NumberFieldMapper extends FieldMapper {
return type.blockLoaderFromFallbackSyntheticSource(name(), nullValue, coerce);
}
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
BlockSourceReader.LeafIteratorLookup lookup = hasDocValues() == false && (isStored() || isIndexed())
// We only write the field names field if there aren't doc values or norms
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return type.blockLoaderFromSource(sourceValueFetcher(blContext.sourcePaths(name())), lookup);

View File

@ -17,13 +17,13 @@ import java.util.Map;
import java.util.Objects;
public class BooleanFieldBlockLoaderTests extends BlockLoaderTestCase {
public BooleanFieldBlockLoaderTests() {
super(FieldType.BOOLEAN);
public BooleanFieldBlockLoaderTests(Params params) {
super(FieldType.BOOLEAN, params);
}
@Override
@SuppressWarnings("unchecked")
protected Object expected(Map<String, Object> fieldMapping, Object value, boolean syntheticSource) {
protected Object expected(Map<String, Object> fieldMapping, Object value) {
var nullValue = switch (fieldMapping.get("null_value")) {
case Boolean b -> b;
case String s -> Boolean.parseBoolean(s);

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class ByteFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Integer> {
public ByteFieldBlockLoaderTests() {
super(FieldType.BYTE);
public ByteFieldBlockLoaderTests(Params params) {
super(FieldType.BYTE, params);
}
@Override

View File

@ -23,13 +23,13 @@ import java.util.Map;
import java.util.Objects;
public class DateFieldBlockLoaderTests extends BlockLoaderTestCase {
public DateFieldBlockLoaderTests() {
super(FieldType.DATE);
public DateFieldBlockLoaderTests(Params params) {
super(FieldType.DATE, params);
}
@Override
@SuppressWarnings("unchecked")
protected Object expected(Map<String, Object> fieldMapping, Object value, boolean syntheticSource) {
protected Object expected(Map<String, Object> fieldMapping, Object value) {
var format = (String) fieldMapping.get("format");
var nullValue = fieldMapping.get("null_value") != null ? format(fieldMapping.get("null_value"), format) : null;

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class DoubleFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Double> {
public DoubleFieldBlockLoaderTests() {
super(FieldType.DOUBLE);
public DoubleFieldBlockLoaderTests(Params params) {
super(FieldType.DOUBLE, params);
}
@Override

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class FloatFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Double> {
public FloatFieldBlockLoaderTests() {
super(FieldType.FLOAT);
public FloatFieldBlockLoaderTests(Params params) {
super(FieldType.FLOAT, params);
}
@Override

View File

@ -16,8 +16,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class HalfFloatFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Double> {
public HalfFloatFieldBlockLoaderTests() {
super(FieldType.HALF_FLOAT);
public HalfFloatFieldBlockLoaderTests(Params params) {
super(FieldType.HALF_FLOAT, params);
}
@Override

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class IntegerFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Integer> {
public IntegerFieldBlockLoaderTests() {
super(FieldType.INTEGER);
public IntegerFieldBlockLoaderTests(Params params) {
super(FieldType.INTEGER, params);
}
@Override

View File

@ -11,6 +11,7 @@ package org.elasticsearch.index.mapper.blockloader;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.List;
@ -20,13 +21,13 @@ import java.util.function.Function;
import java.util.stream.Stream;
public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
public KeywordFieldBlockLoaderTests() {
super(FieldType.KEYWORD);
public KeywordFieldBlockLoaderTests(Params params) {
super(FieldType.KEYWORD, params);
}
@SuppressWarnings("unchecked")
@Override
protected Object expected(Map<String, Object> fieldMapping, Object value, boolean syntheticSource) {
protected Object expected(Map<String, Object> fieldMapping, Object value) {
var nullValue = (String) fieldMapping.get("null_value");
var ignoreAbove = fieldMapping.get("ignore_above") == null
@ -44,7 +45,9 @@ public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
Function<Stream<String>, Stream<BytesRef>> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove))
.filter(Objects::nonNull);
if ((boolean) fieldMapping.getOrDefault("doc_values", false)) {
boolean hasDocValues = hasDocValues(fieldMapping, false);
boolean useDocValues = params.preference() == MappedFieldType.FieldExtractPreference.NONE || params.syntheticSource();
if (hasDocValues && useDocValues) {
// Sorted and no duplicates
var resultList = convertValues.andThen(Stream::distinct)
.andThen(Stream::sorted)

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class LongFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Long> {
public LongFieldBlockLoaderTests() {
super(FieldType.LONG);
public LongFieldBlockLoaderTests(Params params) {
super(FieldType.LONG, params);
}
@Override

View File

@ -15,8 +15,8 @@ import org.elasticsearch.logsdb.datageneration.FieldType;
import java.util.Map;
public class ShortFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Integer> {
public ShortFieldBlockLoaderTests() {
super(FieldType.SHORT);
public ShortFieldBlockLoaderTests(Params params) {
super(FieldType.SHORT, params);
}
@Override

View File

@ -9,6 +9,8 @@
package org.elasticsearch.index.mapper;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.store.Directory;
@ -40,13 +42,33 @@ import java.util.Set;
import java.util.stream.Stream;
public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
private static final MappedFieldType.FieldExtractPreference[] PREFERENCES = new MappedFieldType.FieldExtractPreference[] {
MappedFieldType.FieldExtractPreference.NONE,
MappedFieldType.FieldExtractPreference.STORED };
@ParametersFactory(argumentFormatting = "preference=%s")
public static List<Object[]> args() {
List<Object[]> args = new ArrayList<>();
for (boolean syntheticSource : new boolean[] { false, true }) {
for (MappedFieldType.FieldExtractPreference preference : PREFERENCES) {
args.add(new Object[] { new Params(syntheticSource, preference) });
}
}
return args;
}
public record Params(boolean syntheticSource, MappedFieldType.FieldExtractPreference preference) {}
private final FieldType fieldType;
protected final Params params;
private final String fieldName;
private final MappingGenerator mappingGenerator;
private final DocumentGenerator documentGenerator;
protected BlockLoaderTestCase(FieldType fieldType) {
protected BlockLoaderTestCase(FieldType fieldType, Params params) {
this.fieldType = fieldType;
this.params = params;
this.fieldName = randomAlphaOfLengthBetween(5, 10);
var specification = DataGeneratorSpecification.builder()
@ -83,10 +105,9 @@ public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
public void testBlockLoader() throws IOException {
var template = new Template(Map.of(fieldName, new Template.Leaf(fieldName, fieldType)));
var syntheticSource = randomBoolean();
var mapping = mappingGenerator.generate(template);
runTest(template, mapping, syntheticSource, fieldName);
runTest(template, mapping, fieldName);
}
@SuppressWarnings("unchecked")
@ -110,34 +131,34 @@ public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
currentLevel.put(fieldName, new Template.Leaf(fieldName, fieldType));
var template = new Template(top);
var syntheticSource = randomBoolean();
var mapping = mappingGenerator.generate(template);
if (syntheticSource && randomBoolean()) {
if (params.syntheticSource && randomBoolean()) {
// force fallback synthetic source in the hierarchy
var docMapping = (Map<String, Object>) mapping.raw().get("_doc");
var topLevelMapping = (Map<String, Object>) ((Map<String, Object>) docMapping.get("properties")).get("top");
topLevelMapping.put("synthetic_source_keep", "all");
}
runTest(template, mapping, syntheticSource, fullFieldName.toString());
runTest(template, mapping, fullFieldName.toString());
}
private void runTest(Template template, Mapping mapping, boolean syntheticSource, String fieldName) throws IOException {
private void runTest(Template template, Mapping mapping, String fieldName) throws IOException {
var mappingXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(mapping.raw());
var mapperService = syntheticSource ? createSytheticSourceMapperService(mappingXContent) : createMapperService(mappingXContent);
var mapperService = params.syntheticSource
? createSytheticSourceMapperService(mappingXContent)
: createMapperService(mappingXContent);
var document = documentGenerator.generate(template, mapping);
var documentXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(document);
Object blockLoaderResult = setupAndInvokeBlockLoader(mapperService, documentXContent, fieldName);
Object expected = expected(mapping.lookup().get(fieldName), getFieldValue(document, fieldName), syntheticSource);
Object expected = expected(mapping.lookup().get(fieldName), getFieldValue(document, fieldName));
assertEquals(expected, blockLoaderResult);
}
protected abstract Object expected(Map<String, Object> fieldMapping, Object value, boolean syntheticSource);
protected abstract Object expected(Map<String, Object> fieldMapping, Object value);
private Object getFieldValue(Map<String, Object> document, String fieldName) {
var rawValues = new ArrayList<>();
@ -258,8 +279,7 @@ public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
// TODO randomize when adding support for fields that care about this
return MappedFieldType.FieldExtractPreference.NONE;
return params.preference;
}
@Override
@ -283,4 +303,8 @@ public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
}
});
}
protected static boolean hasDocValues(Map<String, Object> fieldMapping, boolean defaultValue) {
return (boolean) fieldMapping.getOrDefault("doc_values", defaultValue);
}
}

View File

@ -16,20 +16,22 @@ import java.util.Map;
import java.util.Objects;
public abstract class NumberFieldBlockLoaderTestCase<T extends Number> extends BlockLoaderTestCase {
public NumberFieldBlockLoaderTestCase(FieldType fieldType) {
super(fieldType);
public NumberFieldBlockLoaderTestCase(FieldType fieldType, Params params) {
super(fieldType, params);
}
@Override
@SuppressWarnings("unchecked")
protected Object expected(Map<String, Object> fieldMapping, Object value, boolean syntheticSource) {
protected Object expected(Map<String, Object> fieldMapping, Object value) {
var nullValue = fieldMapping.get("null_value") != null ? convert((Number) fieldMapping.get("null_value"), fieldMapping) : null;
if (value instanceof List<?> == false) {
return convert(value, nullValue, fieldMapping);
}
if ((boolean) fieldMapping.getOrDefault("doc_values", false)) {
boolean hasDocValues = hasDocValues(fieldMapping, true);
boolean useDocValues = params.preference() == MappedFieldType.FieldExtractPreference.NONE || params.syntheticSource();
if (hasDocValues && useDocValues) {
// Sorted
var resultList = ((List<Object>) value).stream()
.map(v -> convert(v, nullValue, fieldMapping))

View File

@ -66,11 +66,15 @@ import org.elasticsearch.compute.test.SequenceLongBlockSourceOperator;
import org.elasticsearch.compute.test.TestDriverFactory;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
@ -193,7 +197,7 @@ public class OperatorTests extends MapperServiceTestCase {
});
operators.add(
new OrdinalsGroupingOperator(
shardIdx -> new KeywordFieldMapper.KeywordFieldType("g").blockLoader(null),
shardIdx -> new KeywordFieldMapper.KeywordFieldType("g").blockLoader(mockBlContext()),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
ElementType.BYTES_REF,
0,
@ -396,4 +400,43 @@ public class OperatorTests extends MapperServiceTestCase {
false // no scoring
);
}
private MappedFieldType.BlockLoaderContext mockBlContext() {
return new MappedFieldType.BlockLoaderContext() {
@Override
public String indexName() {
throw new UnsupportedOperationException();
}
@Override
public IndexSettings indexSettings() {
throw new UnsupportedOperationException();
}
@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return MappedFieldType.FieldExtractPreference.NONE;
}
@Override
public SearchLookup lookup() {
throw new UnsupportedOperationException();
}
@Override
public Set<String> sourcePaths(String name) {
throw new UnsupportedOperationException();
}
@Override
public String parentField(String field) {
throw new UnsupportedOperationException();
}
@Override
public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
throw new UnsupportedOperationException();
}
};
}
}

View File

@ -225,7 +225,7 @@ public class ValueSourceReaderTypeConversionTests extends AnyOperatorTestCase {
MappedFieldType ft,
ElementType elementType
) {
return factory(shardContexts, ft.name(), elementType, ft.blockLoader(null));
return factory(shardContexts, ft.name(), elementType, ft.blockLoader(blContext()));
}
private static Operator.OperatorFactory factory(

View File

@ -138,7 +138,7 @@ public class ValuesSourceReaderOperatorTests extends OperatorTestCase {
}
public static Operator.OperatorFactory factory(IndexReader reader, MappedFieldType ft, ElementType elementType) {
return factory(reader, ft.name(), elementType, ft.blockLoader(null));
return factory(reader, ft.name(), elementType, ft.blockLoader(blContext()));
}
static Operator.OperatorFactory factory(IndexReader reader, String name, ElementType elementType, BlockLoader loader) {

View File

@ -9,18 +9,35 @@ package org.elasticsearch.xpack.esql.qa.mixed;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.test.TestClustersThreadFilter;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.xpack.esql.qa.rest.FieldExtractorTestCase;
import org.hamcrest.Matcher;
import org.junit.ClassRule;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.nullValue;
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
public class FieldExtractorIT extends FieldExtractorTestCase {
@ClassRule
public static ElasticsearchCluster cluster = Clusters.mixedVersionCluster();
public FieldExtractorIT(MappedFieldType.FieldExtractPreference preference) {
super(preference);
}
@Override
protected String getTestRestCluster() {
return cluster.getHttpAddresses();
}
@Override
protected Matcher<Integer> pidMatcher() {
// TODO these should all always return null because the parent is nested
return preference == MappedFieldType.FieldExtractPreference.STORED ? anyOf(equalTo(111), nullValue()) : nullValue(Integer.class);
}
}

View File

@ -9,6 +9,7 @@ package org.elasticsearch.xpack.esql.qa.multi_node;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.test.TestClustersThreadFilter;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.xpack.esql.qa.rest.FieldExtractorTestCase;
@ -19,6 +20,10 @@ public class FieldExtractorIT extends FieldExtractorTestCase {
@ClassRule
public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> {});
public FieldExtractorIT(MappedFieldType.FieldExtractPreference preference) {
super(preference);
}
@Override
protected String getTestRestCluster() {
return cluster.getHttpAddresses();

View File

@ -9,6 +9,7 @@ package org.elasticsearch.xpack.esql.qa.single_node;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.test.TestClustersThreadFilter;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.xpack.esql.qa.rest.FieldExtractorTestCase;
@ -19,6 +20,10 @@ public class FieldExtractorIT extends FieldExtractorTestCase {
@ClassRule
public static ElasticsearchCluster cluster = Clusters.testCluster();
public FieldExtractorIT(MappedFieldType.FieldExtractPreference preference) {
super(preference);
}
@Override
protected String getTestRestCluster() {
return cluster.getHttpAddresses();

View File

@ -7,6 +7,8 @@
package org.elasticsearch.xpack.esql.qa.rest;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.Version;
import org.elasticsearch.client.Request;
@ -19,6 +21,7 @@ import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.geo.GeometryTestUtils;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.test.ESTestCase;
@ -28,6 +31,7 @@ import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
import org.hamcrest.Matcher;
import org.junit.Before;
@ -50,6 +54,8 @@ import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.entityToMap;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsqlSync;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.nullValue;
/**
* Creates indices with many different mappings and fetches values from them to make sure
@ -60,6 +66,21 @@ import static org.hamcrest.Matchers.containsString;
public abstract class FieldExtractorTestCase extends ESRestTestCase {
private static final Logger logger = LogManager.getLogger(FieldExtractorTestCase.class);
@ParametersFactory(argumentFormatting = "%s")
public static List<Object[]> args() throws Exception {
return List.of(
new Object[] { null },
new Object[] { MappedFieldType.FieldExtractPreference.NONE },
new Object[] { MappedFieldType.FieldExtractPreference.STORED }
);
}
protected final MappedFieldType.FieldExtractPreference preference;
protected FieldExtractorTestCase(MappedFieldType.FieldExtractPreference preference) {
this.preference = preference;
}
@Before
public void notOld() {
assumeTrue(
@ -1269,6 +1290,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
{"Responses.process": 222,"process.parent.command_line":"run2.bat"}""");
Map<String, Object> result = runEsql("FROM test* | SORT process.parent.command_line");
// If we're loading from _source we load the nested field.
assertResultMap(
result,
List.of(
@ -1278,7 +1300,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
columnInfo("process.parent.command_line.text", "text")
),
List.of(
matchesList().item(null).item(null).item("run1.bat").item("run1.bat"),
matchesList().item(null).item(pidMatcher()).item("run1.bat").item("run1.bat"),
matchesList().item(222).item(222).item("run2.bat").item("run2.bat")
)
);
@ -1307,7 +1329,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
columnInfo("process.parent.command_line.text", "text")
),
List.of(
matchesList().item(null).item(null).item("run1.bat").item("run1.bat"),
matchesList().item(null).item(pidMatcher()).item("run1.bat").item("run1.bat"),
matchesList().item(222).item(222).item("run2.bat").item("run2.bat")
)
);
@ -1325,10 +1347,15 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
columnInfo("process.parent.command_line", "keyword"),
columnInfo("process.parent.command_line.text", "text")
),
List.of(matchesList().item(null).item(null).item("run1.bat").item("run1.bat"))
List.of(matchesList().item(null).item(pidMatcher()).item("run1.bat").item("run1.bat"))
);
}
protected Matcher<Integer> pidMatcher() {
// TODO these should all always return null because the parent is nested
return preference == MappedFieldType.FieldExtractPreference.STORED ? equalTo(111) : nullValue(Integer.class);
}
private void assumeIndexResolverNestedFieldsNameClashFixed() throws IOException {
// especially for BWC tests but also for regular tests
var capsName = EsqlCapabilities.Cap.FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER.name().toLowerCase(Locale.ROOT);
@ -1440,7 +1467,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
private record StoreAndDocValues(Boolean store, Boolean docValues) {}
private static class Test {
private class Test {
private final String type;
private final Map<String, Test> subFields = new TreeMap<>();
@ -1716,8 +1743,13 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
return err.replaceAll("\\\\\n\s+\\\\", "");
}
private static Map<String, Object> runEsql(String query) throws IOException {
return runEsqlSync(new RestEsqlTestCase.RequestObjectBuilder().query(query));
private Map<String, Object> runEsql(String query) throws IOException {
RestEsqlTestCase.RequestObjectBuilder request = new RestEsqlTestCase.RequestObjectBuilder().query(query);
if (preference != null) {
request = request.pragmas(
Settings.builder().put(QueryPragmas.FIELD_EXTRACT_PREFERENCE.getKey(), preference.toString()).build()
);
}
return runEsqlSync(request);
}
}

View File

@ -37,14 +37,19 @@ import org.elasticsearch.compute.test.TestDriverFactory;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.internal.AliasFilter;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchRequest;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.tasks.CancellableTask;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.core.async.AsyncExecutionId;
import org.elasticsearch.xpack.esql.core.expression.Alias;
@ -64,6 +69,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import static org.elasticsearch.test.ListMatcher.matchesList;
@ -193,7 +199,7 @@ public class LookupFromIndexIT extends AbstractEsqlIntegTestCase {
new ValuesSourceReaderOperator.FieldInfo(
"key",
PlannerUtils.toElementType(keyType),
shard -> searchContext.getSearchExecutionContext().getFieldType("key").blockLoader(null)
shard -> searchContext.getSearchExecutionContext().getFieldType("key").blockLoader(blContext())
)
),
List.of(new ValuesSourceReaderOperator.ShardContext(searchContext.getSearchExecutionContext().getIndexReader(), () -> {
@ -291,4 +297,46 @@ public class LookupFromIndexIT extends AbstractEsqlIntegTestCase {
assertTrue(driverContext.isFinished());
assertThat(driverContext.getSnapshot().releasables(), empty());
}
private static MappedFieldType.BlockLoaderContext blContext() {
return new MappedFieldType.BlockLoaderContext() {
@Override
public String indexName() {
return "test_index";
}
@Override
public IndexSettings indexSettings() {
var imd = IndexMetadata.builder("test_index")
.settings(ESTestCase.indexSettings(IndexVersion.current(), 1, 1).put(Settings.EMPTY))
.build();
return new IndexSettings(imd, Settings.EMPTY);
}
@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return MappedFieldType.FieldExtractPreference.NONE;
}
@Override
public SearchLookup lookup() {
throw new UnsupportedOperationException();
}
@Override
public Set<String> sourcePaths(String name) {
return Set.of(name);
}
@Override
public String parentField(String field) {
return null;
}
@Override
public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
return FieldNamesFieldMapper.FieldNamesFieldType.get(true);
}
};
}
}

View File

@ -104,11 +104,11 @@ public class SpatialCentroid extends SpatialAggregateFunction implements ToAggre
return switch (type) {
case DataType.GEO_POINT -> switch (fieldExtractPreference) {
case DOC_VALUES -> new SpatialCentroidGeoPointDocValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS, STORED -> new SpatialCentroidGeoPointSourceValuesAggregatorFunctionSupplier();
};
case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) {
case DOC_VALUES -> new SpatialCentroidCartesianPointDocValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS, STORED -> new SpatialCentroidCartesianPointSourceValuesAggregatorFunctionSupplier();
};
default -> throw EsqlIllegalArgumentException.illegalDataType(type);
};

View File

@ -108,20 +108,20 @@ public final class SpatialExtent extends SpatialAggregateFunction implements ToA
return switch (type) {
case DataType.GEO_POINT -> switch (fieldExtractPreference) {
case DOC_VALUES -> new SpatialExtentGeoPointDocValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS, STORED -> new SpatialExtentGeoPointSourceValuesAggregatorFunctionSupplier();
};
case DataType.CARTESIAN_POINT -> switch (fieldExtractPreference) {
case DOC_VALUES -> new SpatialExtentCartesianPointDocValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier();
case NONE, EXTRACT_SPATIAL_BOUNDS, STORED -> new SpatialExtentCartesianPointSourceValuesAggregatorFunctionSupplier();
};
case DataType.GEO_SHAPE -> switch (fieldExtractPreference) {
case EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentGeoShapeDocValuesAggregatorFunctionSupplier();
case NONE -> new SpatialExtentGeoShapeSourceValuesAggregatorFunctionSupplier();
case NONE, STORED -> new SpatialExtentGeoShapeSourceValuesAggregatorFunctionSupplier();
case DOC_VALUES -> throw new EsqlIllegalArgumentException("Illegal field extract preference: " + fieldExtractPreference);
};
case DataType.CARTESIAN_SHAPE -> switch (fieldExtractPreference) {
case EXTRACT_SPATIAL_BOUNDS -> new SpatialExtentCartesianShapeDocValuesAggregatorFunctionSupplier();
case NONE -> new SpatialExtentCartesianShapeSourceValuesAggregatorFunctionSupplier();
case NONE, STORED -> new SpatialExtentCartesianShapeSourceValuesAggregatorFunctionSupplier();
case DOC_VALUES -> throw new EsqlIllegalArgumentException("Illegal field extract preference: " + fieldExtractPreference);
};
default -> throw EsqlIllegalArgumentException.illegalDataType(type);

View File

@ -11,13 +11,14 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext;
import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns;
import org.elasticsearch.xpack.esql.plan.physical.AggregateExec;
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec;
import org.elasticsearch.xpack.esql.plan.physical.LeafExec;
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.esql.rule.Rule;
import java.util.ArrayList;
import java.util.LinkedHashSet;
@ -33,10 +34,10 @@ import java.util.Set;
*
* @see ProjectAwayColumns
*/
public class InsertFieldExtraction extends Rule<PhysicalPlan, PhysicalPlan> {
public class InsertFieldExtraction extends PhysicalOptimizerRules.ParameterizedOptimizerRule<PhysicalPlan, LocalPhysicalOptimizerContext> {
@Override
public PhysicalPlan apply(PhysicalPlan plan) {
public PhysicalPlan rule(PhysicalPlan plan, LocalPhysicalOptimizerContext context) {
// apply the plan locally, adding a field extractor right before data is loaded
// by going bottom-up
plan = plan.transformUp(p -> {
@ -67,7 +68,12 @@ public class InsertFieldExtraction extends Rule<PhysicalPlan, PhysicalPlan> {
if (child.outputSet().stream().anyMatch(EsQueryExec::isSourceAttribute)) {
found = true;
// collect source attributes and add the extractor
child = new FieldExtractExec(p.source(), child, List.copyOf(missing));
child = new FieldExtractExec(
p.source(),
child,
List.copyOf(missing),
context.configuration().pragmas().fieldExtractPreference()
);
}
}
newChildren.add(child);

View File

@ -36,6 +36,12 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
private final List<Attribute> attributesToExtract;
private final @Nullable Attribute sourceAttribute;
/**
* The default for {@link #fieldExtractPreference} if the plan doesn't require
* a preference.
*/
private final MappedFieldType.FieldExtractPreference defaultPreference;
/**
* Attributes that may be extracted as doc values even if that makes them
* less accurate. This is mostly used for geo fields which lose a lot of
@ -57,14 +63,20 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
private List<Attribute> lazyOutput;
public FieldExtractExec(Source source, PhysicalPlan child, List<Attribute> attributesToExtract) {
this(source, child, attributesToExtract, Set.of(), Set.of());
public FieldExtractExec(
Source source,
PhysicalPlan child,
List<Attribute> attributesToExtract,
MappedFieldType.FieldExtractPreference defaultPreference
) {
this(source, child, attributesToExtract, defaultPreference, Set.of(), Set.of());
}
private FieldExtractExec(
Source source,
PhysicalPlan child,
List<Attribute> attributesToExtract,
MappedFieldType.FieldExtractPreference defaultPreference,
Set<Attribute> docValuesAttributes,
Set<Attribute> boundsAttributes
) {
@ -73,14 +85,17 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
this.sourceAttribute = extractSourceAttributesFrom(child);
this.docValuesAttributes = docValuesAttributes;
this.boundsAttributes = boundsAttributes;
this.defaultPreference = defaultPreference;
}
private FieldExtractExec(StreamInput in) throws IOException {
this(
Source.readFrom((PlanStreamInput) in),
in.readNamedWriteable(PhysicalPlan.class),
in.readNamedWriteableCollectionAsList(Attribute.class)
in.readNamedWriteableCollectionAsList(Attribute.class),
MappedFieldType.FieldExtractPreference.NONE
);
// defaultPreference is only used on the data node and never serialized.
// docValueAttributes and boundsAttributes are only used on the data node and never serialized.
}
@ -89,6 +104,7 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
Source.EMPTY.writeTo(out);
out.writeNamedWriteable(child());
out.writeNamedWriteableCollection(attributesToExtract());
// defaultPreference is only used on the data node and never serialized.
// docValueAttributes and boundsAttributes are only used on the data node and never serialized.
}
@ -113,20 +129,20 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
@Override
protected NodeInfo<FieldExtractExec> info() {
return NodeInfo.create(this, FieldExtractExec::new, child(), attributesToExtract);
return NodeInfo.create(this, FieldExtractExec::new, child(), attributesToExtract, defaultPreference);
}
@Override
public UnaryExec replaceChild(PhysicalPlan newChild) {
return new FieldExtractExec(source(), newChild, attributesToExtract, docValuesAttributes, boundsAttributes);
return new FieldExtractExec(source(), newChild, attributesToExtract, defaultPreference, docValuesAttributes, boundsAttributes);
}
public FieldExtractExec withDocValuesAttributes(Set<Attribute> docValuesAttributes) {
return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes);
return new FieldExtractExec(source(), child(), attributesToExtract, defaultPreference, docValuesAttributes, boundsAttributes);
}
public FieldExtractExec withBoundsAttributes(Set<Attribute> boundsAttributes) {
return new FieldExtractExec(source(), child(), attributesToExtract, docValuesAttributes, boundsAttributes);
return new FieldExtractExec(source(), child(), attributesToExtract, defaultPreference, docValuesAttributes, boundsAttributes);
}
public List<Attribute> attributesToExtract() {
@ -202,6 +218,6 @@ public class FieldExtractExec extends UnaryExec implements EstimatesRowSize {
if (docValuesAttributes.contains(attr)) {
return MappedFieldType.FieldExtractPreference.DOC_VALUES;
}
return MappedFieldType.FieldExtractPreference.NONE;
return defaultPreference;
}
}

View File

@ -18,6 +18,7 @@ import org.elasticsearch.compute.lucene.DataPartitioning;
import org.elasticsearch.compute.operator.Driver;
import org.elasticsearch.compute.operator.DriverStatus;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.esql.core.expression.Expression;
@ -64,6 +65,12 @@ public final class QueryPragmas implements Writeable {
public static final Setting<ByteSizeValue> FOLD_LIMIT = Setting.memorySizeSetting("fold_limit", "5%");
public static final Setting<MappedFieldType.FieldExtractPreference> FIELD_EXTRACT_PREFERENCE = Setting.enumSetting(
MappedFieldType.FieldExtractPreference.class,
"field_extract_preference",
MappedFieldType.FieldExtractPreference.NONE
);
public static final QueryPragmas EMPTY = new QueryPragmas(Settings.EMPTY);
private final Settings settings;
@ -159,6 +166,16 @@ public final class QueryPragmas implements Writeable {
return FOLD_LIMIT.get(settings);
}
/**
* The default preference for extracting fields, defaults to {@code NONE}. Some fields must
* be extracted in some special way because of how they are used in the plan. But most fields
* can be loaded in many ways so they pick the ways that they think are most efficient in their
* {@link MappedFieldType#blockLoader} method. This can influence their choice.
*/
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return FIELD_EXTRACT_PREFERENCE.get(settings);
}
public boolean isEmpty() {
return settings.isEmpty();
}

View File

@ -7,6 +7,7 @@
package org.elasticsearch.xpack.esql.plan.physical;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
@ -18,7 +19,7 @@ public class FieldExtractExecSerializationTests extends AbstractPhysicalPlanSeri
Source source = randomSource();
PhysicalPlan child = randomChild(depth);
List<Attribute> attributesToExtract = randomFieldAttributes(1, 4, false);
return new FieldExtractExec(source, child, attributesToExtract);
return new FieldExtractExec(source, child, attributesToExtract, MappedFieldType.FieldExtractPreference.NONE);
}
@Override
@ -35,7 +36,7 @@ public class FieldExtractExecSerializationTests extends AbstractPhysicalPlanSeri
} else {
attributesToExtract = randomValueOtherThan(attributesToExtract, () -> randomFieldAttributes(1, 4, false));
}
return new FieldExtractExec(instance.source(), child, attributesToExtract);
return new FieldExtractExec(instance.source(), child, attributesToExtract, MappedFieldType.FieldExtractPreference.NONE);
}
@Override

View File

@ -329,7 +329,7 @@ public class UnsignedLongFieldMapper extends FieldMapper {
// Counters are not supported by ESQL so we load them in null
return BlockLoader.CONSTANT_NULLS;
}
if (hasDocValues()) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
return new BlockDocValuesReader.LongsBlockLoader(name());
}
if (isSyntheticSource) {
@ -354,7 +354,8 @@ public class UnsignedLongFieldMapper extends FieldMapper {
return unsignedToSortableSignedLong(parseUnsignedLong(value));
}
};
BlockSourceReader.LeafIteratorLookup lookup = isStored() || isIndexed()
BlockSourceReader.LeafIteratorLookup lookup = hasDocValues() == false && (isStored() || isIndexed())
// We only write the field names field if there aren't doc values or norms
? BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())
: BlockSourceReader.lookupMatchingAll();
return new BlockSourceReader.LongsBlockLoader(valueFetcher, lookup);

View File

@ -18,8 +18,8 @@ import java.util.Map;
public class UnsignedLongFieldBlockLoaderTests extends NumberFieldBlockLoaderTestCase<Long> {
private static final long MASK_2_63 = 0x8000000000000000L;
public UnsignedLongFieldBlockLoaderTests() {
super(FieldType.UNSIGNED_LONG);
public UnsignedLongFieldBlockLoaderTests(Params params) {
super(FieldType.UNSIGNED_LONG, params);
}
@Override