mirror of https://github.com/apache/kafka.git
KAFKA-6684: Support casting Connect values with bytes schema to string
Allow to cast LogicalType to string by calling the serialized (Java) object's toString(). Added tests for `BigDecimal` and `Date` as whole record and as fields. Author: Amit Sela <amitsela33@gmail.com> Reviewers: Randall Hauch <rhauch@gmail.com>, Robert Yokota <rayokota@gmail.com>, Ewen Cheslack-Postava <ewen@confluent.io> Closes #4820 from amitsela/cast-transform-bytes
This commit is contained in:
parent
c2af356724
commit
aaa71d7e01
|
|
@ -707,7 +707,7 @@ public class Values {
|
||||||
return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\"", "\\\\\"");
|
return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\"", "\\\\\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static DateFormat dateFormatFor(java.util.Date value) {
|
public static DateFormat dateFormatFor(java.util.Date value) {
|
||||||
if (value.getTime() < MILLIS_PER_DAY) {
|
if (value.getTime() < MILLIS_PER_DAY) {
|
||||||
return new SimpleDateFormat(ISO_8601_TIME_FORMAT_PATTERN);
|
return new SimpleDateFormat(ISO_8601_TIME_FORMAT_PATTERN);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ import org.apache.kafka.connect.data.Field;
|
||||||
import org.apache.kafka.connect.data.Schema;
|
import org.apache.kafka.connect.data.Schema;
|
||||||
import org.apache.kafka.connect.data.SchemaBuilder;
|
import org.apache.kafka.connect.data.SchemaBuilder;
|
||||||
import org.apache.kafka.connect.data.Struct;
|
import org.apache.kafka.connect.data.Struct;
|
||||||
|
import org.apache.kafka.connect.data.Values;
|
||||||
import org.apache.kafka.connect.errors.DataException;
|
import org.apache.kafka.connect.errors.DataException;
|
||||||
import org.apache.kafka.connect.transforms.util.SchemaUtil;
|
import org.apache.kafka.connect.transforms.util.SchemaUtil;
|
||||||
import org.apache.kafka.connect.transforms.util.SimpleConfig;
|
import org.apache.kafka.connect.transforms.util.SimpleConfig;
|
||||||
|
|
@ -78,9 +79,16 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
|
|
||||||
private static final String PURPOSE = "cast types";
|
private static final String PURPOSE = "cast types";
|
||||||
|
|
||||||
private static final Set<Schema.Type> SUPPORTED_CAST_TYPES = EnumSet.of(
|
private static final Set<Schema.Type> SUPPORTED_CAST_INPUT_TYPES = EnumSet.of(
|
||||||
Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64,
|
Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64,
|
||||||
Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN, Schema.Type.STRING
|
Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN,
|
||||||
|
Schema.Type.STRING, Schema.Type.BYTES
|
||||||
|
);
|
||||||
|
|
||||||
|
private static final Set<Schema.Type> SUPPORTED_CAST_OUTPUT_TYPES = EnumSet.of(
|
||||||
|
Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64,
|
||||||
|
Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN,
|
||||||
|
Schema.Type.STRING
|
||||||
);
|
);
|
||||||
|
|
||||||
// As a special case for casting the entire value (e.g. the incoming key is a int64 but you know it could be an
|
// As a special case for casting the entire value (e.g. the incoming key is a int64 but you know it could be an
|
||||||
|
|
@ -120,14 +128,14 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
|
|
||||||
private R applySchemaless(R record) {
|
private R applySchemaless(R record) {
|
||||||
if (wholeValueCastType != null) {
|
if (wholeValueCastType != null) {
|
||||||
return newRecord(record, null, castValueToType(operatingValue(record), wholeValueCastType));
|
return newRecord(record, null, castValueToType(null, operatingValue(record), wholeValueCastType));
|
||||||
}
|
}
|
||||||
|
|
||||||
final Map<String, Object> value = requireMap(operatingValue(record), PURPOSE);
|
final Map<String, Object> value = requireMap(operatingValue(record), PURPOSE);
|
||||||
final HashMap<String, Object> updatedValue = new HashMap<>(value);
|
final HashMap<String, Object> updatedValue = new HashMap<>(value);
|
||||||
for (Map.Entry<String, Schema.Type> fieldSpec : casts.entrySet()) {
|
for (Map.Entry<String, Schema.Type> fieldSpec : casts.entrySet()) {
|
||||||
String field = fieldSpec.getKey();
|
String field = fieldSpec.getKey();
|
||||||
updatedValue.put(field, castValueToType(value.get(field), fieldSpec.getValue()));
|
updatedValue.put(field, castValueToType(null, value.get(field), fieldSpec.getValue()));
|
||||||
}
|
}
|
||||||
return newRecord(record, null, updatedValue);
|
return newRecord(record, null, updatedValue);
|
||||||
}
|
}
|
||||||
|
|
@ -138,7 +146,7 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
|
|
||||||
// Whole-record casting
|
// Whole-record casting
|
||||||
if (wholeValueCastType != null)
|
if (wholeValueCastType != null)
|
||||||
return newRecord(record, updatedSchema, castValueToType(operatingValue(record), wholeValueCastType));
|
return newRecord(record, updatedSchema, castValueToType(valueSchema, operatingValue(record), wholeValueCastType));
|
||||||
|
|
||||||
// Casting within a struct
|
// Casting within a struct
|
||||||
final Struct value = requireStruct(operatingValue(record), PURPOSE);
|
final Struct value = requireStruct(operatingValue(record), PURPOSE);
|
||||||
|
|
@ -147,7 +155,8 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
for (Field field : value.schema().fields()) {
|
for (Field field : value.schema().fields()) {
|
||||||
final Object origFieldValue = value.get(field);
|
final Object origFieldValue = value.get(field);
|
||||||
final Schema.Type targetType = casts.get(field.name());
|
final Schema.Type targetType = casts.get(field.name());
|
||||||
final Object newFieldValue = targetType != null ? castValueToType(origFieldValue, targetType) : origFieldValue;
|
final Object newFieldValue = targetType != null ? castValueToType(field.schema(), origFieldValue, targetType) : origFieldValue;
|
||||||
|
log.trace("Cast field '{}' from '{}' to '{}'", field.name(), origFieldValue, newFieldValue);
|
||||||
updatedValue.put(updatedSchema.field(field.name()), newFieldValue);
|
updatedValue.put(updatedSchema.field(field.name()), newFieldValue);
|
||||||
}
|
}
|
||||||
return newRecord(record, updatedSchema, updatedValue);
|
return newRecord(record, updatedSchema, updatedValue);
|
||||||
|
|
@ -168,8 +177,10 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
SchemaBuilder fieldBuilder = convertFieldType(casts.get(field.name()));
|
SchemaBuilder fieldBuilder = convertFieldType(casts.get(field.name()));
|
||||||
if (field.schema().isOptional())
|
if (field.schema().isOptional())
|
||||||
fieldBuilder.optional();
|
fieldBuilder.optional();
|
||||||
if (field.schema().defaultValue() != null)
|
if (field.schema().defaultValue() != null) {
|
||||||
fieldBuilder.defaultValue(castValueToType(field.schema().defaultValue(), fieldBuilder.type()));
|
Schema fieldSchema = field.schema();
|
||||||
|
fieldBuilder.defaultValue(castValueToType(fieldSchema, fieldSchema.defaultValue(), fieldBuilder.type()));
|
||||||
|
}
|
||||||
builder.field(field.name(), fieldBuilder.build());
|
builder.field(field.name(), fieldBuilder.build());
|
||||||
} else {
|
} else {
|
||||||
builder.field(field.name(), field.schema());
|
builder.field(field.name(), field.schema());
|
||||||
|
|
@ -181,7 +192,7 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
if (valueSchema.isOptional())
|
if (valueSchema.isOptional())
|
||||||
builder.optional();
|
builder.optional();
|
||||||
if (valueSchema.defaultValue() != null)
|
if (valueSchema.defaultValue() != null)
|
||||||
builder.defaultValue(castValueToType(valueSchema.defaultValue(), builder.type()));
|
builder.defaultValue(castValueToType(valueSchema, valueSchema.defaultValue(), builder.type()));
|
||||||
|
|
||||||
updatedSchema = builder.build();
|
updatedSchema = builder.build();
|
||||||
schemaUpdateCache.put(valueSchema, updatedSchema);
|
schemaUpdateCache.put(valueSchema, updatedSchema);
|
||||||
|
|
@ -212,11 +223,12 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object castValueToType(Object value, Schema.Type targetType) {
|
private static Object castValueToType(Schema schema, Object value, Schema.Type targetType) {
|
||||||
try {
|
try {
|
||||||
if (value == null) return null;
|
if (value == null) return null;
|
||||||
|
|
||||||
Schema.Type inferredType = ConnectSchema.schemaType(value.getClass());
|
Schema.Type inferredType = schema == null ? ConnectSchema.schemaType(value.getClass()) :
|
||||||
|
schema.type();
|
||||||
if (inferredType == null) {
|
if (inferredType == null) {
|
||||||
throw new DataException("Cast transformation was passed a value of type " + value.getClass()
|
throw new DataException("Cast transformation was passed a value of type " + value.getClass()
|
||||||
+ " which is not supported by Connect's data API");
|
+ " which is not supported by Connect's data API");
|
||||||
|
|
@ -327,7 +339,12 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String castToString(Object value) {
|
private static String castToString(Object value) {
|
||||||
return value.toString();
|
if (value instanceof java.util.Date) {
|
||||||
|
java.util.Date dateValue = (java.util.Date) value;
|
||||||
|
return Values.dateFormatFor(dateValue).format(dateValue);
|
||||||
|
} else {
|
||||||
|
return value.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract Schema operatingSchema(R record);
|
protected abstract Schema operatingSchema(R record);
|
||||||
|
|
@ -370,15 +387,19 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Schema.Type validCastType(Schema.Type type, FieldType fieldType) {
|
private static Schema.Type validCastType(Schema.Type type, FieldType fieldType) {
|
||||||
if (!SUPPORTED_CAST_TYPES.contains(type)) {
|
switch (fieldType) {
|
||||||
String message = "Cast transformation does not support casting to/from " + type
|
case INPUT:
|
||||||
+ "; supported types are " + SUPPORTED_CAST_TYPES;
|
if (!SUPPORTED_CAST_INPUT_TYPES.contains(type)) {
|
||||||
switch (fieldType) {
|
throw new DataException("Cast transformation does not support casting from " +
|
||||||
case INPUT:
|
type + "; supported types are " + SUPPORTED_CAST_INPUT_TYPES);
|
||||||
throw new DataException(message);
|
}
|
||||||
case OUTPUT:
|
break;
|
||||||
throw new ConfigException(message);
|
case OUTPUT:
|
||||||
}
|
if (!SUPPORTED_CAST_OUTPUT_TYPES.contains(type)) {
|
||||||
|
throw new ConfigException("Cast transformation does not support casting to " +
|
||||||
|
type + "; supported types are " + SUPPORTED_CAST_OUTPUT_TYPES);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,15 +18,18 @@
|
||||||
package org.apache.kafka.connect.transforms;
|
package org.apache.kafka.connect.transforms;
|
||||||
|
|
||||||
import org.apache.kafka.common.config.ConfigException;
|
import org.apache.kafka.common.config.ConfigException;
|
||||||
|
import org.apache.kafka.connect.data.Decimal;
|
||||||
import org.apache.kafka.connect.data.Schema;
|
import org.apache.kafka.connect.data.Schema;
|
||||||
import org.apache.kafka.connect.data.SchemaBuilder;
|
import org.apache.kafka.connect.data.SchemaBuilder;
|
||||||
import org.apache.kafka.connect.data.Struct;
|
import org.apache.kafka.connect.data.Struct;
|
||||||
import org.apache.kafka.connect.data.Timestamp;
|
import org.apache.kafka.connect.data.Timestamp;
|
||||||
|
import org.apache.kafka.connect.data.Values;
|
||||||
import org.apache.kafka.connect.errors.DataException;
|
import org.apache.kafka.connect.errors.DataException;
|
||||||
import org.apache.kafka.connect.source.SourceRecord;
|
import org.apache.kafka.connect.source.SourceRecord;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -39,6 +42,7 @@ import static org.junit.Assert.assertTrue;
|
||||||
public class CastTest {
|
public class CastTest {
|
||||||
private final Cast<SourceRecord> xformKey = new Cast.Key<>();
|
private final Cast<SourceRecord> xformKey = new Cast.Key<>();
|
||||||
private final Cast<SourceRecord> xformValue = new Cast.Value<>();
|
private final Cast<SourceRecord> xformValue = new Cast.Value<>();
|
||||||
|
private static final long MILLIS_PER_DAY = 24 * 60 * 60 * 1000;
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void teardown() {
|
public void teardown() {
|
||||||
|
|
@ -61,6 +65,11 @@ public class CastTest {
|
||||||
xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:array"));
|
xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:array"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(expected = ConfigException.class)
|
||||||
|
public void testUnsupportedTargetType() {
|
||||||
|
xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:bytes"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(expected = ConfigException.class)
|
@Test(expected = ConfigException.class)
|
||||||
public void testConfigInvalidMap() {
|
public void testConfigInvalidMap() {
|
||||||
xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:int8:extra"));
|
xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:int8:extra"));
|
||||||
|
|
@ -171,6 +180,28 @@ public class CastTest {
|
||||||
assertEquals("42", transformed.value());
|
assertEquals("42", transformed.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void castWholeBigDecimalRecordValueWithSchemaString() {
|
||||||
|
BigDecimal bigDecimal = new BigDecimal(42);
|
||||||
|
xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "string"));
|
||||||
|
SourceRecord transformed = xformValue.apply(new SourceRecord(null, null, "topic", 0,
|
||||||
|
Decimal.schema(bigDecimal.scale()), bigDecimal));
|
||||||
|
|
||||||
|
assertEquals(Schema.Type.STRING, transformed.valueSchema().type());
|
||||||
|
assertEquals("42", transformed.value());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void castWholeDateRecordValueWithSchemaString() {
|
||||||
|
Date timestamp = new Date(MILLIS_PER_DAY + 1); // day + 1msec to get a timestamp formatting.
|
||||||
|
xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "string"));
|
||||||
|
SourceRecord transformed = xformValue.apply(new SourceRecord(null, null, "topic", 0,
|
||||||
|
Timestamp.SCHEMA, timestamp));
|
||||||
|
|
||||||
|
assertEquals(Schema.Type.STRING, transformed.valueSchema().type());
|
||||||
|
assertEquals(Values.dateFormatFor(timestamp).format(timestamp), transformed.value());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void castWholeRecordDefaultValue() {
|
public void castWholeRecordDefaultValue() {
|
||||||
// Validate default value in schema is correctly converted
|
// Validate default value in schema is correctly converted
|
||||||
|
|
@ -292,7 +323,8 @@ public class CastTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void castFieldsWithSchema() {
|
public void castFieldsWithSchema() {
|
||||||
xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "int8:int16,int16:int32,int32:int64,int64:boolean,float32:float64,float64:boolean,boolean:int8,string:int32,optional:int32"));
|
Date day = new Date(MILLIS_PER_DAY);
|
||||||
|
xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "int8:int16,int16:int32,int32:int64,int64:boolean,float32:float64,float64:boolean,boolean:int8,string:int32,bigdecimal:string,date:string,optional:int32"));
|
||||||
|
|
||||||
// Include an optional fields and fields with defaults to validate their values are passed through properly
|
// Include an optional fields and fields with defaults to validate their values are passed through properly
|
||||||
SchemaBuilder builder = SchemaBuilder.struct();
|
SchemaBuilder builder = SchemaBuilder.struct();
|
||||||
|
|
@ -305,6 +337,8 @@ public class CastTest {
|
||||||
builder.field("float64", SchemaBuilder.float64().defaultValue(-1.125).build());
|
builder.field("float64", SchemaBuilder.float64().defaultValue(-1.125).build());
|
||||||
builder.field("boolean", Schema.BOOLEAN_SCHEMA);
|
builder.field("boolean", Schema.BOOLEAN_SCHEMA);
|
||||||
builder.field("string", Schema.STRING_SCHEMA);
|
builder.field("string", Schema.STRING_SCHEMA);
|
||||||
|
builder.field("bigdecimal", Decimal.schema(new BigDecimal(42).scale()));
|
||||||
|
builder.field("date", Timestamp.SCHEMA);
|
||||||
builder.field("optional", Schema.OPTIONAL_FLOAT32_SCHEMA);
|
builder.field("optional", Schema.OPTIONAL_FLOAT32_SCHEMA);
|
||||||
builder.field("timestamp", Timestamp.SCHEMA);
|
builder.field("timestamp", Timestamp.SCHEMA);
|
||||||
Schema supportedTypesSchema = builder.build();
|
Schema supportedTypesSchema = builder.build();
|
||||||
|
|
@ -317,6 +351,8 @@ public class CastTest {
|
||||||
recordValue.put("float32", 32.f);
|
recordValue.put("float32", 32.f);
|
||||||
recordValue.put("float64", -64.);
|
recordValue.put("float64", -64.);
|
||||||
recordValue.put("boolean", true);
|
recordValue.put("boolean", true);
|
||||||
|
recordValue.put("bigdecimal", new BigDecimal(42));
|
||||||
|
recordValue.put("date", day);
|
||||||
recordValue.put("string", "42");
|
recordValue.put("string", "42");
|
||||||
recordValue.put("timestamp", new Date(0));
|
recordValue.put("timestamp", new Date(0));
|
||||||
// optional field intentionally omitted
|
// optional field intentionally omitted
|
||||||
|
|
@ -335,8 +371,25 @@ public class CastTest {
|
||||||
assertEquals(true, ((Struct) transformed.value()).schema().field("float64").schema().defaultValue());
|
assertEquals(true, ((Struct) transformed.value()).schema().field("float64").schema().defaultValue());
|
||||||
assertEquals((byte) 1, ((Struct) transformed.value()).get("boolean"));
|
assertEquals((byte) 1, ((Struct) transformed.value()).get("boolean"));
|
||||||
assertEquals(42, ((Struct) transformed.value()).get("string"));
|
assertEquals(42, ((Struct) transformed.value()).get("string"));
|
||||||
|
assertEquals("42", ((Struct) transformed.value()).get("bigdecimal"));
|
||||||
|
assertEquals(Values.dateFormatFor(day).format(day), ((Struct) transformed.value()).get("date"));
|
||||||
assertEquals(new Date(0), ((Struct) transformed.value()).get("timestamp"));
|
assertEquals(new Date(0), ((Struct) transformed.value()).get("timestamp"));
|
||||||
assertNull(((Struct) transformed.value()).get("optional"));
|
assertNull(((Struct) transformed.value()).get("optional"));
|
||||||
|
|
||||||
|
Schema transformedSchema = ((Struct) transformed.value()).schema();
|
||||||
|
assertEquals(Schema.INT16_SCHEMA.type(), transformedSchema.field("int8").schema().type());
|
||||||
|
assertEquals(Schema.OPTIONAL_INT32_SCHEMA.type(), transformedSchema.field("int16").schema().type());
|
||||||
|
assertEquals(Schema.INT64_SCHEMA.type(), transformedSchema.field("int32").schema().type());
|
||||||
|
assertEquals(Schema.BOOLEAN_SCHEMA.type(), transformedSchema.field("int64").schema().type());
|
||||||
|
assertEquals(Schema.FLOAT64_SCHEMA.type(), transformedSchema.field("float32").schema().type());
|
||||||
|
assertEquals(Schema.BOOLEAN_SCHEMA.type(), transformedSchema.field("float64").schema().type());
|
||||||
|
assertEquals(Schema.INT8_SCHEMA.type(), transformedSchema.field("boolean").schema().type());
|
||||||
|
assertEquals(Schema.INT32_SCHEMA.type(), transformedSchema.field("string").schema().type());
|
||||||
|
assertEquals(Schema.STRING_SCHEMA.type(), transformedSchema.field("bigdecimal").schema().type());
|
||||||
|
assertEquals(Schema.STRING_SCHEMA.type(), transformedSchema.field("date").schema().type());
|
||||||
|
assertEquals(Schema.OPTIONAL_INT32_SCHEMA.type(), transformedSchema.field("optional").schema().type());
|
||||||
|
// The following fields are not changed
|
||||||
|
assertEquals(Timestamp.SCHEMA.type(), transformedSchema.field("timestamp").schema().type());
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue