Fix match_only_text bugs if defined as multi-field (#130188)
* Fix match_only_text bugs if defined as multi-field Bugs starting to occur when #129126 was merged. Closes #129737
This commit is contained in:
parent
cbdafbac90
commit
15c0028c04
|
@ -14,6 +14,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -40,7 +41,6 @@ import org.elasticsearch.index.fielddata.FieldDataContext;
|
|||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
|
||||
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
|
||||
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
|
||||
import org.elasticsearch.index.mapper.BlockLoader;
|
||||
import org.elasticsearch.index.mapper.BlockSourceReader;
|
||||
|
@ -133,7 +133,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
tsi,
|
||||
indexAnalyzer,
|
||||
context.isSourceSynthetic(),
|
||||
meta.getValue()
|
||||
meta.getValue(),
|
||||
withinMultiField,
|
||||
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
|
||||
);
|
||||
return ft;
|
||||
}
|
||||
|
@ -163,17 +165,24 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
private final TextFieldType textFieldType;
|
||||
private final String originalName;
|
||||
|
||||
private final boolean withinMultiField;
|
||||
private final boolean hasCompatibleMultiFields;
|
||||
|
||||
public MatchOnlyTextFieldType(
|
||||
String name,
|
||||
TextSearchInfo tsi,
|
||||
Analyzer indexAnalyzer,
|
||||
boolean isSyntheticSource,
|
||||
Map<String, String> meta
|
||||
Map<String, String> meta,
|
||||
boolean withinMultiField,
|
||||
boolean hasCompatibleMultiFields
|
||||
) {
|
||||
super(name, true, false, false, tsi, meta);
|
||||
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
|
||||
this.textFieldType = new TextFieldType(name, isSyntheticSource);
|
||||
this.originalName = isSyntheticSource ? name + "._original" : null;
|
||||
this.withinMultiField = withinMultiField;
|
||||
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
|
||||
}
|
||||
|
||||
public MatchOnlyTextFieldType(String name) {
|
||||
|
@ -182,7 +191,9 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
|
||||
Lucene.STANDARD_ANALYZER,
|
||||
false,
|
||||
Collections.emptyMap()
|
||||
Collections.emptyMap(),
|
||||
false,
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -209,16 +220,34 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
|
||||
);
|
||||
}
|
||||
if (searchExecutionContext.isSourceSynthetic()) {
|
||||
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
|
||||
String parentField = searchExecutionContext.parentPath(name());
|
||||
var parent = searchExecutionContext.lookup().fieldType(parentField);
|
||||
if (parent.isStored()) {
|
||||
return storedFieldFetcher(parentField);
|
||||
} else if (parent.hasDocValues()) {
|
||||
return docValuesFieldFetcher(parentField);
|
||||
} else {
|
||||
assert false : "parent field should either be stored or have doc values";
|
||||
}
|
||||
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
|
||||
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
|
||||
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
|
||||
if (kwd != null) {
|
||||
var fieldType = kwd.fieldType();
|
||||
if (fieldType.isStored()) {
|
||||
return storedFieldFetcher(fieldType.name());
|
||||
} else if (fieldType.hasDocValues()) {
|
||||
return docValuesFieldFetcher(fieldType.name());
|
||||
} else {
|
||||
assert false : "multi field should either be stored or have doc values";
|
||||
}
|
||||
} else {
|
||||
assert false : "multi field of type keyword should exist";
|
||||
}
|
||||
} else if (searchExecutionContext.isSourceSynthetic()) {
|
||||
String name = storedFieldNameForSyntheticSource();
|
||||
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
|
||||
return context -> {
|
||||
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
|
||||
return docId -> {
|
||||
leafLoader.advanceTo(docId);
|
||||
return leafLoader.storedFields().get(name);
|
||||
};
|
||||
};
|
||||
return storedFieldFetcher(name);
|
||||
}
|
||||
return context -> {
|
||||
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
|
||||
|
@ -234,6 +263,35 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
};
|
||||
}
|
||||
|
||||
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
|
||||
return context -> {
|
||||
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
|
||||
return docId -> {
|
||||
if (sortedDocValues.advanceExact(docId)) {
|
||||
var values = new ArrayList<>(sortedDocValues.docValueCount());
|
||||
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
|
||||
long ord = sortedDocValues.nextOrd();
|
||||
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
|
||||
}
|
||||
return values;
|
||||
} else {
|
||||
return List.of();
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String name) {
|
||||
var loader = StoredFieldLoader.create(false, Set.of(name));
|
||||
return context -> {
|
||||
var leafLoader = loader.getLoader(context, null);
|
||||
return docId -> {
|
||||
leafLoader.advanceTo(docId);
|
||||
return leafLoader.storedFields().get(name);
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) {
|
||||
return new ConstantScoreQuery(
|
||||
new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)
|
||||
|
@ -506,18 +564,27 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
protected SyntheticSourceSupport syntheticSourceSupport() {
|
||||
return new SyntheticSourceSupport.Native(
|
||||
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
|
||||
@Override
|
||||
protected void write(XContentBuilder b, Object value) throws IOException {
|
||||
if (value instanceof BytesRef valueBytes) {
|
||||
b.value(valueBytes.utf8ToString());
|
||||
} else {
|
||||
assert value instanceof String;
|
||||
b.value(value.toString());
|
||||
if (storeSource) {
|
||||
return new SyntheticSourceSupport.Native(
|
||||
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
|
||||
@Override
|
||||
protected void write(XContentBuilder b, Object value) throws IOException {
|
||||
if (value instanceof BytesRef valueBytes) {
|
||||
b.value(valueBytes.utf8ToString());
|
||||
} else {
|
||||
assert value instanceof String;
|
||||
b.value(value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
} else {
|
||||
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
|
||||
if (kwd != null) {
|
||||
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
|
||||
}
|
||||
);
|
||||
assert false : "there should be a suite field mapper with native synthetic source support";
|
||||
return super.syntheticSourceSupport();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -394,4 +394,170 @@ synthetic_source with copy_to:
|
|||
- match:
|
||||
hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
---
|
||||
synthetic_source match_only_text as multi-field:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: keyword
|
||||
fields:
|
||||
text:
|
||||
type: match_only_text
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo.text: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
---
|
||||
synthetic_source match_only_text as multi-field with stored keyword as parent:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: keyword
|
||||
store: true
|
||||
doc_values: false
|
||||
fields:
|
||||
text:
|
||||
type: match_only_text
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo.text: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
---
|
||||
synthetic_source match_only_text with multi-field:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: match_only_text
|
||||
fields:
|
||||
raw:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
---
|
||||
synthetic_source match_only_text with stored multi-field:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: match_only_text
|
||||
fields:
|
||||
raw:
|
||||
type: keyword
|
||||
store: true
|
||||
doc_values: false
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
---
|
||||
synthetic_source text as multi-field:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: keyword
|
||||
fields:
|
||||
text:
|
||||
type: text
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo.text: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
---
|
||||
synthetic_source text with multi-field:
|
||||
- requires:
|
||||
cluster_features: [ "mapper.source.mode_from_index_setting" ]
|
||||
reason: "Source mode configured through index setting"
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
mapping.source.mode: synthetic
|
||||
mappings:
|
||||
properties:
|
||||
foo:
|
||||
type: text
|
||||
fields:
|
||||
raw:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: synthetic_source_test
|
||||
id: "1"
|
||||
refresh: true
|
||||
body:
|
||||
foo: "Apache Lucene powers Elasticsearch"
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: synthetic_source_test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
foo: apache lucene
|
||||
|
||||
- match: { "hits.total.value": 1 }
|
||||
- match:
|
||||
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
|
Loading…
Reference in New Issue