From a69c48477f2fbca93115613c2b512e8b469142e3 Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Tue, 1 Jul 2025 18:19:18 -0700 Subject: [PATCH] Add index version for match_only_text stored field in binary format (#130363) Follow-up to #130049 to gate using the binary format for the stored field in match_only_text fields behind an index version. --- .../extras/MatchOnlyTextFieldMapper.java | 54 ++++++++++++++++--- .../extras/MatchOnlyTextFieldMapperTests.java | 10 +++- .../elasticsearch/index/IndexVersions.java | 2 + 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 387477570ece..500b51415a45 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -105,8 +106,15 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { private final TextParams.Analyzers analyzers; private final boolean withinMultiField; + private final boolean storedFieldInBinaryFormat; - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexAnalyzers indexAnalyzers, + boolean withinMultiField, + boolean storedFieldInBinaryFormat + ) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( @@ -116,6 +124,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { indexCreatedVersion ); this.withinMultiField = withinMultiField; + this.storedFieldInBinaryFormat = storedFieldInBinaryFormat; } @Override @@ -135,7 +144,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { context.isSourceSynthetic(), meta.getValue(), withinMultiField, - multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() + multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(), + storedFieldInBinaryFormat ); return ft; } @@ -155,8 +165,22 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { } } + private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) { + return indexCreatedVersion.onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES) + || indexCreatedVersion.between( + IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_BACKPORT_8_X, + IndexVersions.UPGRADE_TO_LUCENE_10_0_0 + ); + } + public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField()) + (n, c) -> new Builder( + n, + c.indexVersionCreated(), + c.getIndexAnalyzers(), + c.isWithinMultiField(), + isSyntheticSourceStoredFieldInBinaryFormat(c.indexVersionCreated()) + ) ); public static class MatchOnlyTextFieldType extends StringFieldType { @@ -167,6 +191,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { private final boolean withinMultiField; private final boolean hasCompatibleMultiFields; + private final boolean storedFieldInBinaryFormat; public MatchOnlyTextFieldType( String name, @@ -175,7 +200,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { boolean isSyntheticSource, Map meta, boolean withinMultiField, - boolean hasCompatibleMultiFields + boolean hasCompatibleMultiFields, + boolean storedFieldInBinaryFormat ) { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); @@ -183,6 +209,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { this.originalName = isSyntheticSource ? name + "._original" : null; this.withinMultiField = withinMultiField; this.hasCompatibleMultiFields = hasCompatibleMultiFields; + this.storedFieldInBinaryFormat = storedFieldInBinaryFormat; } public MatchOnlyTextFieldType(String name) { @@ -193,6 +220,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { false, Collections.emptyMap(), false, + false, false ); } @@ -451,7 +479,11 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (textFieldType.isSyntheticSource()) { - return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + if (storedFieldInBinaryFormat) { + return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource()); + } else { + return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + } } SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name())); // MatchOnlyText never has norms, so we have to use the field names field @@ -502,6 +534,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { private final boolean storeSource; private final FieldType fieldType; private final boolean withinMultiField; + private final boolean storedFieldInBinaryFormat; private MatchOnlyTextFieldMapper( String simpleName, @@ -521,6 +554,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; this.withinMultiField = builder.withinMultiField; + this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat; } @Override @@ -530,7 +564,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this); } @Override @@ -547,8 +581,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { context.addToFieldNames(fieldType().name()); if (storeSource) { - final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); - context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef)); + if (storedFieldInBinaryFormat) { + final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef)); + } else { + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string())); + } } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index cfbf3a338f69..ef72e234f8d6 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -26,8 +26,10 @@ import org.apache.lucene.tests.analysis.Token; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -356,10 +358,14 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase { } public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException { - DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> { + var mappings = mapping(b -> { b.startObject("field1").field("type", "match_only_text").endObject(); b.startObject("field2").field("type", "match_only_text").endObject(); - })).documentMapper(); + }); + var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build(); + DocumentMapper mapper = createMapperService(IndexVersions.UPGRADE_TO_LUCENE_10_2_2, settings, () -> true, mappings) + .documentMapper(); + try (Directory directory = newDirectory()) { RandomIndexWriter iw = indexWriterForSyntheticSource(directory); diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 6ff33cf05d51..2e464afa72b7 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -145,6 +145,7 @@ public class IndexVersions { public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1); public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2); + public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X = def(8_536_0_00, Version.LUCENE_9_12_2); public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0); public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0); public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0); @@ -178,6 +179,7 @@ public class IndexVersions { public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2); public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); + public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2); /* * STOP! READ THIS FIRST! No, really,