Fix match_only_text bugs if defined as multi-field (#130188)

* Fix match_only_text bugs if defined as multi-field

Bugs starting to occur when #129126 was merged.

Closes #129737
This commit is contained in:
Martijn van Groningen 2025-06-30 09:35:54 +02:00 committed by GitHub
parent cbdafbac90
commit 15c0028c04
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 337 additions and 23 deletions

View File

@ -14,6 +14,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -40,7 +41,6 @@ import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.BlockSourceReader;
@ -133,7 +133,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue()
meta.getValue(),
withinMultiField,
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
);
return ft;
}
@ -163,17 +165,24 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final TextFieldType textFieldType;
private final String originalName;
private final boolean withinMultiField;
private final boolean hasCompatibleMultiFields;
public MatchOnlyTextFieldType(
String name,
TextSearchInfo tsi,
Analyzer indexAnalyzer,
boolean isSyntheticSource,
Map<String, String> meta
Map<String, String> meta,
boolean withinMultiField,
boolean hasCompatibleMultiFields
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
this.originalName = isSyntheticSource ? name + "._original" : null;
this.withinMultiField = withinMultiField;
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
}
public MatchOnlyTextFieldType(String name) {
@ -182,7 +191,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Lucene.STANDARD_ANALYZER,
false,
Collections.emptyMap()
Collections.emptyMap(),
false,
false
);
}
@ -209,16 +220,34 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
);
}
if (searchExecutionContext.isSourceSynthetic()) {
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
String parentField = searchExecutionContext.parentPath(name());
var parent = searchExecutionContext.lookup().fieldType(parentField);
if (parent.isStored()) {
return storedFieldFetcher(parentField);
} else if (parent.hasDocValues()) {
return docValuesFieldFetcher(parentField);
} else {
assert false : "parent field should either be stored or have doc values";
}
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
if (kwd != null) {
var fieldType = kwd.fieldType();
if (fieldType.isStored()) {
return storedFieldFetcher(fieldType.name());
} else if (fieldType.hasDocValues()) {
return docValuesFieldFetcher(fieldType.name());
} else {
assert false : "multi field should either be stored or have doc values";
}
} else {
assert false : "multi field of type keyword should exist";
}
} else if (searchExecutionContext.isSourceSynthetic()) {
String name = storedFieldNameForSyntheticSource();
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
return context -> {
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
return docId -> {
leafLoader.advanceTo(docId);
return leafLoader.storedFields().get(name);
};
};
return storedFieldFetcher(name);
}
return context -> {
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
@ -234,6 +263,35 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
};
}
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
return context -> {
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
return docId -> {
if (sortedDocValues.advanceExact(docId)) {
var values = new ArrayList<>(sortedDocValues.docValueCount());
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
long ord = sortedDocValues.nextOrd();
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
}
return values;
} else {
return List.of();
}
};
};
}
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String name) {
var loader = StoredFieldLoader.create(false, Set.of(name));
return context -> {
var leafLoader = loader.getLoader(context, null);
return docId -> {
leafLoader.advanceTo(docId);
return leafLoader.storedFields().get(name);
};
};
}
private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) {
return new ConstantScoreQuery(
new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)
@ -506,6 +564,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
if (storeSource) {
return new SyntheticSourceSupport.Native(
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
@Override
@ -519,5 +578,13 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
}
}
);
} else {
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
if (kwd != null) {
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
}
assert false : "there should be a suite field mapper with native synthetic source support";
return super.syntheticSourceSupport();
}
}
}

View File

@ -394,4 +394,170 @@ synthetic_source with copy_to:
- match:
hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch"
---
synthetic_source match_only_text as multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: keyword
fields:
text:
type: match_only_text
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo.text: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
---
synthetic_source match_only_text as multi-field with stored keyword as parent:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: keyword
store: true
doc_values: false
fields:
text:
type: match_only_text
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo.text: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
---
synthetic_source match_only_text with multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: match_only_text
fields:
raw:
type: keyword
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
---
synthetic_source match_only_text with stored multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: match_only_text
fields:
raw:
type: keyword
store: true
doc_values: false
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

View File

@ -0,0 +1,81 @@
---
synthetic_source text as multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: keyword
fields:
text:
type: text
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo.text: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
---
synthetic_source text with multi-field:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: text
fields:
raw:
type: keyword
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: apache lucene
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"