Add index version for match_only_text stored field in binary format (#130363)

Follow-up to #130049 to gate using the binary format for the stored field
in match_only_text fields behind an index version.
This commit is contained in:
Jordan Powers 2025-07-01 18:19:18 -07:00 committed by GitHub
parent 7d4ba31681
commit a69c48477f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 56 additions and 10 deletions

View File

@ -35,6 +35,7 @@ import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.UTF8DecodingReader;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
@ -105,8 +106,15 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean withinMultiField,
boolean storedFieldInBinaryFormat
) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
@ -116,6 +124,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
indexCreatedVersion
);
this.withinMultiField = withinMultiField;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
}
@Override
@ -135,7 +144,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
context.isSourceSynthetic(),
meta.getValue(),
withinMultiField,
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
storedFieldInBinaryFormat
);
return ft;
}
@ -155,8 +165,22 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
}
}
private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES)
|| indexCreatedVersion.between(
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_BACKPORT_8_X,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
);
}
public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
c.isWithinMultiField(),
isSyntheticSourceStoredFieldInBinaryFormat(c.indexVersionCreated())
)
);
public static class MatchOnlyTextFieldType extends StringFieldType {
@ -167,6 +191,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final boolean withinMultiField;
private final boolean hasCompatibleMultiFields;
private final boolean storedFieldInBinaryFormat;
public MatchOnlyTextFieldType(
String name,
@ -175,7 +200,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
boolean isSyntheticSource,
Map<String, String> meta,
boolean withinMultiField,
boolean hasCompatibleMultiFields
boolean hasCompatibleMultiFields,
boolean storedFieldInBinaryFormat
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
@ -183,6 +209,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
this.originalName = isSyntheticSource ? name + "._original" : null;
this.withinMultiField = withinMultiField;
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
}
public MatchOnlyTextFieldType(String name) {
@ -193,6 +220,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
false,
Collections.emptyMap(),
false,
false,
false
);
}
@ -451,8 +479,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (textFieldType.isSyntheticSource()) {
if (storedFieldInBinaryFormat) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
} else {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
}
}
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());
@ -502,6 +534,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;
private MatchOnlyTextFieldMapper(
String simpleName,
@ -521,6 +554,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat;
}
@Override
@ -530,7 +564,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this);
}
@Override
@ -547,8 +581,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
context.addToFieldNames(fieldType().name());
if (storeSource) {
if (storedFieldInBinaryFormat) {
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
} else {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string()));
}
}
}

View File

@ -26,8 +26,10 @@ import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
@ -356,10 +358,14 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
}
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
var mappings = mapping(b -> {
b.startObject("field1").field("type", "match_only_text").endObject();
b.startObject("field2").field("type", "match_only_text").endObject();
})).documentMapper();
});
var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build();
DocumentMapper mapper = createMapperService(IndexVersions.UPGRADE_TO_LUCENE_10_2_2, settings, () -> true, mappings)
.documentMapper();
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);

View File

@ -145,6 +145,7 @@ public class IndexVersions {
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X = def(8_536_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0);
@ -178,6 +179,7 @@ public class IndexVersions {
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
/*
* STOP! READ THIS FIRST! No, really,