Add index version for match_only_text stored field in binary format (#130363)

Follow-up to #130049 to gate using the binary format for the stored field
in match_only_text fields behind an index version.
This commit is contained in:
Jordan Powers 2025-07-01 18:19:18 -07:00 committed by GitHub
parent 7d4ba31681
commit a69c48477f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 56 additions and 10 deletions

View File

@ -35,6 +35,7 @@ import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.text.UTF8DecodingReader;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FieldDataContext;
@ -105,8 +106,15 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final TextParams.Analyzers analyzers; private final TextParams.Analyzers analyzers;
private final boolean withinMultiField; private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean withinMultiField,
boolean storedFieldInBinaryFormat
) {
super(name); super(name);
this.indexCreatedVersion = indexCreatedVersion; this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers( this.analyzers = new TextParams.Analyzers(
@ -116,6 +124,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
indexCreatedVersion indexCreatedVersion
); );
this.withinMultiField = withinMultiField; this.withinMultiField = withinMultiField;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
} }
@Override @Override
@ -135,7 +144,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
context.isSourceSynthetic(), context.isSourceSynthetic(),
meta.getValue(), meta.getValue(),
withinMultiField, withinMultiField,
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
storedFieldInBinaryFormat
); );
return ft; return ft;
} }
@ -155,8 +165,22 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
} }
} }
private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES)
|| indexCreatedVersion.between(
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_BACKPORT_8_X,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
);
}
public static final TypeParser PARSER = new TypeParser( public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField()) (n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
c.isWithinMultiField(),
isSyntheticSourceStoredFieldInBinaryFormat(c.indexVersionCreated())
)
); );
public static class MatchOnlyTextFieldType extends StringFieldType { public static class MatchOnlyTextFieldType extends StringFieldType {
@ -167,6 +191,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final boolean withinMultiField; private final boolean withinMultiField;
private final boolean hasCompatibleMultiFields; private final boolean hasCompatibleMultiFields;
private final boolean storedFieldInBinaryFormat;
public MatchOnlyTextFieldType( public MatchOnlyTextFieldType(
String name, String name,
@ -175,7 +200,8 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
boolean isSyntheticSource, boolean isSyntheticSource,
Map<String, String> meta, Map<String, String> meta,
boolean withinMultiField, boolean withinMultiField,
boolean hasCompatibleMultiFields boolean hasCompatibleMultiFields,
boolean storedFieldInBinaryFormat
) { ) {
super(name, true, false, false, tsi, meta); super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
@ -183,6 +209,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
this.originalName = isSyntheticSource ? name + "._original" : null; this.originalName = isSyntheticSource ? name + "._original" : null;
this.withinMultiField = withinMultiField; this.withinMultiField = withinMultiField;
this.hasCompatibleMultiFields = hasCompatibleMultiFields; this.hasCompatibleMultiFields = hasCompatibleMultiFields;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
} }
public MatchOnlyTextFieldType(String name) { public MatchOnlyTextFieldType(String name) {
@ -193,6 +220,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
false, false,
Collections.emptyMap(), Collections.emptyMap(),
false, false,
false,
false false
); );
} }
@ -451,7 +479,11 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override @Override
public BlockLoader blockLoader(BlockLoaderContext blContext) { public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (textFieldType.isSyntheticSource()) { if (textFieldType.isSyntheticSource()) {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); if (storedFieldInBinaryFormat) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
} else {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
}
} }
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name())); SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field // MatchOnlyText never has norms, so we have to use the field names field
@ -502,6 +534,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
private final boolean storeSource; private final boolean storeSource;
private final FieldType fieldType; private final FieldType fieldType;
private final boolean withinMultiField; private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;
private MatchOnlyTextFieldMapper( private MatchOnlyTextFieldMapper(
String simpleName, String simpleName,
@ -521,6 +554,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource; this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField; this.withinMultiField = builder.withinMultiField;
this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat;
} }
@Override @Override
@ -530,7 +564,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
@Override @Override
public FieldMapper.Builder getMergeBuilder() { public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this); return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this);
} }
@Override @Override
@ -547,8 +581,12 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
context.addToFieldNames(fieldType().name()); context.addToFieldNames(fieldType().name());
if (storeSource) { if (storeSource) {
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); if (storedFieldInBinaryFormat) {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef)); final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
} else {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string()));
}
} }
} }

View File

@ -26,8 +26,10 @@ import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.Tuple; import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.LuceneDocument;
@ -356,10 +358,14 @@ public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
} }
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException { public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> { var mappings = mapping(b -> {
b.startObject("field1").field("type", "match_only_text").endObject(); b.startObject("field1").field("type", "match_only_text").endObject();
b.startObject("field2").field("type", "match_only_text").endObject(); b.startObject("field2").field("type", "match_only_text").endObject();
})).documentMapper(); });
var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build();
DocumentMapper mapper = createMapperService(IndexVersions.UPGRADE_TO_LUCENE_10_2_2, settings, () -> true, mappings)
.documentMapper();
try (Directory directory = newDirectory()) { try (Directory directory = newDirectory()) {
RandomIndexWriter iw = indexWriterForSyntheticSource(directory); RandomIndexWriter iw = indexWriterForSyntheticSource(directory);

View File

@ -145,6 +145,7 @@ public class IndexVersions {
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1); public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2); public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X = def(8_536_0_00, Version.LUCENE_9_12_2);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0); public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0);
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0); public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0);
@ -178,6 +179,7 @@ public class IndexVersions {
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2); public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
/* /*
* STOP! READ THIS FIRST! No, really, * STOP! READ THIS FIRST! No, really,