mirror of https://github.com/apache/kafka.git
				
				
				
			KAFKA-6684: Support casting Connect values with bytes schema to string
Allow to cast LogicalType to string by calling the serialized (Java) object's toString(). Added tests for `BigDecimal` and `Date` as whole record and as fields. Author: Amit Sela <amitsela33@gmail.com> Reviewers: Randall Hauch <rhauch@gmail.com>, Robert Yokota <rayokota@gmail.com>, Ewen Cheslack-Postava <ewen@confluent.io> Closes #4820 from amitsela/cast-transform-bytes
This commit is contained in:
		
							parent
							
								
									c2af356724
								
							
						
					
					
						commit
						aaa71d7e01
					
				|  | @ -707,7 +707,7 @@ public class Values { | |||
|         return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\"", "\\\\\""); | ||||
|     } | ||||
| 
 | ||||
|     protected static DateFormat dateFormatFor(java.util.Date value) { | ||||
|     public static DateFormat dateFormatFor(java.util.Date value) { | ||||
|         if (value.getTime() < MILLIS_PER_DAY) { | ||||
|             return new SimpleDateFormat(ISO_8601_TIME_FORMAT_PATTERN); | ||||
|         } | ||||
|  |  | |||
|  | @ -28,6 +28,7 @@ import org.apache.kafka.connect.data.Field; | |||
| import org.apache.kafka.connect.data.Schema; | ||||
| import org.apache.kafka.connect.data.SchemaBuilder; | ||||
| import org.apache.kafka.connect.data.Struct; | ||||
| import org.apache.kafka.connect.data.Values; | ||||
| import org.apache.kafka.connect.errors.DataException; | ||||
| import org.apache.kafka.connect.transforms.util.SchemaUtil; | ||||
| import org.apache.kafka.connect.transforms.util.SimpleConfig; | ||||
|  | @ -78,9 +79,16 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
| 
 | ||||
|     private static final String PURPOSE = "cast types"; | ||||
| 
 | ||||
|     private static final Set<Schema.Type> SUPPORTED_CAST_TYPES = EnumSet.of( | ||||
|     private static final Set<Schema.Type> SUPPORTED_CAST_INPUT_TYPES = EnumSet.of( | ||||
|             Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64, | ||||
|                     Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN, Schema.Type.STRING | ||||
|                     Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN, | ||||
|                             Schema.Type.STRING, Schema.Type.BYTES | ||||
|     ); | ||||
| 
 | ||||
|     private static final Set<Schema.Type> SUPPORTED_CAST_OUTPUT_TYPES = EnumSet.of( | ||||
|             Schema.Type.INT8, Schema.Type.INT16, Schema.Type.INT32, Schema.Type.INT64, | ||||
|                     Schema.Type.FLOAT32, Schema.Type.FLOAT64, Schema.Type.BOOLEAN, | ||||
|                             Schema.Type.STRING | ||||
|     ); | ||||
| 
 | ||||
|     // As a special case for casting the entire value (e.g. the incoming key is a int64 but you know it could be an | ||||
|  | @ -120,14 +128,14 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
| 
 | ||||
|     private R applySchemaless(R record) { | ||||
|         if (wholeValueCastType != null) { | ||||
|             return newRecord(record, null, castValueToType(operatingValue(record), wholeValueCastType)); | ||||
|             return newRecord(record, null, castValueToType(null, operatingValue(record), wholeValueCastType)); | ||||
|         } | ||||
| 
 | ||||
|         final Map<String, Object> value = requireMap(operatingValue(record), PURPOSE); | ||||
|         final HashMap<String, Object> updatedValue = new HashMap<>(value); | ||||
|         for (Map.Entry<String, Schema.Type> fieldSpec : casts.entrySet()) { | ||||
|             String field = fieldSpec.getKey(); | ||||
|             updatedValue.put(field, castValueToType(value.get(field), fieldSpec.getValue())); | ||||
|             updatedValue.put(field, castValueToType(null, value.get(field), fieldSpec.getValue())); | ||||
|         } | ||||
|         return newRecord(record, null, updatedValue); | ||||
|     } | ||||
|  | @ -138,7 +146,7 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
| 
 | ||||
|         // Whole-record casting | ||||
|         if (wholeValueCastType != null) | ||||
|             return newRecord(record, updatedSchema, castValueToType(operatingValue(record), wholeValueCastType)); | ||||
|             return newRecord(record, updatedSchema, castValueToType(valueSchema, operatingValue(record), wholeValueCastType)); | ||||
| 
 | ||||
|         // Casting within a struct | ||||
|         final Struct value = requireStruct(operatingValue(record), PURPOSE); | ||||
|  | @ -147,7 +155,8 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
|         for (Field field : value.schema().fields()) { | ||||
|             final Object origFieldValue = value.get(field); | ||||
|             final Schema.Type targetType = casts.get(field.name()); | ||||
|             final Object newFieldValue = targetType != null ? castValueToType(origFieldValue, targetType) : origFieldValue; | ||||
|             final Object newFieldValue = targetType != null ? castValueToType(field.schema(), origFieldValue, targetType) : origFieldValue; | ||||
|             log.trace("Cast field '{}' from '{}' to '{}'", field.name(), origFieldValue, newFieldValue); | ||||
|             updatedValue.put(updatedSchema.field(field.name()), newFieldValue); | ||||
|         } | ||||
|         return newRecord(record, updatedSchema, updatedValue); | ||||
|  | @ -168,8 +177,10 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
|                     SchemaBuilder fieldBuilder = convertFieldType(casts.get(field.name())); | ||||
|                     if (field.schema().isOptional()) | ||||
|                         fieldBuilder.optional(); | ||||
|                     if (field.schema().defaultValue() != null) | ||||
|                         fieldBuilder.defaultValue(castValueToType(field.schema().defaultValue(), fieldBuilder.type())); | ||||
|                     if (field.schema().defaultValue() != null) { | ||||
|                         Schema fieldSchema = field.schema(); | ||||
|                         fieldBuilder.defaultValue(castValueToType(fieldSchema, fieldSchema.defaultValue(), fieldBuilder.type())); | ||||
|                     } | ||||
|                     builder.field(field.name(), fieldBuilder.build()); | ||||
|                 } else { | ||||
|                     builder.field(field.name(), field.schema()); | ||||
|  | @ -181,7 +192,7 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
|         if (valueSchema.isOptional()) | ||||
|             builder.optional(); | ||||
|         if (valueSchema.defaultValue() != null) | ||||
|             builder.defaultValue(castValueToType(valueSchema.defaultValue(), builder.type())); | ||||
|             builder.defaultValue(castValueToType(valueSchema, valueSchema.defaultValue(), builder.type())); | ||||
| 
 | ||||
|         updatedSchema = builder.build(); | ||||
|         schemaUpdateCache.put(valueSchema, updatedSchema); | ||||
|  | @ -212,11 +223,12 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     private static Object castValueToType(Object value, Schema.Type targetType) { | ||||
|     private static Object castValueToType(Schema schema, Object value, Schema.Type targetType) { | ||||
|         try { | ||||
|             if (value == null) return null; | ||||
| 
 | ||||
|             Schema.Type inferredType = ConnectSchema.schemaType(value.getClass()); | ||||
|             Schema.Type inferredType = schema == null ? ConnectSchema.schemaType(value.getClass()) : | ||||
|                     schema.type(); | ||||
|             if (inferredType == null) { | ||||
|                 throw new DataException("Cast transformation was passed a value of type " + value.getClass() | ||||
|                         + " which is not supported by Connect's data API"); | ||||
|  | @ -327,7 +339,12 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
|     } | ||||
| 
 | ||||
|     private static String castToString(Object value) { | ||||
|         return value.toString(); | ||||
|         if (value instanceof java.util.Date) { | ||||
|             java.util.Date dateValue = (java.util.Date) value; | ||||
|             return Values.dateFormatFor(dateValue).format(dateValue); | ||||
|         } else { | ||||
|             return value.toString(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     protected abstract Schema operatingSchema(R record); | ||||
|  | @ -370,15 +387,19 @@ public abstract class Cast<R extends ConnectRecord<R>> implements Transformation | |||
|     } | ||||
| 
 | ||||
|     private static Schema.Type validCastType(Schema.Type type, FieldType fieldType) { | ||||
|         if (!SUPPORTED_CAST_TYPES.contains(type)) { | ||||
|             String message = "Cast transformation does not support casting to/from " + type | ||||
|                     + "; supported types are " + SUPPORTED_CAST_TYPES; | ||||
|             switch (fieldType) { | ||||
|                 case INPUT: | ||||
|                     throw new DataException(message); | ||||
|                 case OUTPUT: | ||||
|                     throw new ConfigException(message); | ||||
|             } | ||||
|         switch (fieldType) { | ||||
|             case INPUT: | ||||
|                 if (!SUPPORTED_CAST_INPUT_TYPES.contains(type)) { | ||||
|                     throw new DataException("Cast transformation does not support casting from " + | ||||
|                         type + "; supported types are " + SUPPORTED_CAST_INPUT_TYPES); | ||||
|                 } | ||||
|                 break; | ||||
|             case OUTPUT: | ||||
|                 if (!SUPPORTED_CAST_OUTPUT_TYPES.contains(type)) { | ||||
|                     throw new ConfigException("Cast transformation does not support casting to " + | ||||
|                         type + "; supported types are " + SUPPORTED_CAST_OUTPUT_TYPES); | ||||
|                 } | ||||
|                 break; | ||||
|         } | ||||
|         return type; | ||||
|     } | ||||
|  |  | |||
|  | @ -18,15 +18,18 @@ | |||
| package org.apache.kafka.connect.transforms; | ||||
| 
 | ||||
| import org.apache.kafka.common.config.ConfigException; | ||||
| import org.apache.kafka.connect.data.Decimal; | ||||
| import org.apache.kafka.connect.data.Schema; | ||||
| import org.apache.kafka.connect.data.SchemaBuilder; | ||||
| import org.apache.kafka.connect.data.Struct; | ||||
| import org.apache.kafka.connect.data.Timestamp; | ||||
| import org.apache.kafka.connect.data.Values; | ||||
| import org.apache.kafka.connect.errors.DataException; | ||||
| import org.apache.kafka.connect.source.SourceRecord; | ||||
| import org.junit.After; | ||||
| import org.junit.Test; | ||||
| 
 | ||||
| import java.math.BigDecimal; | ||||
| import java.util.Collections; | ||||
| import java.util.Date; | ||||
| import java.util.HashMap; | ||||
|  | @ -39,6 +42,7 @@ import static org.junit.Assert.assertTrue; | |||
| public class CastTest { | ||||
|     private final Cast<SourceRecord> xformKey = new Cast.Key<>(); | ||||
|     private final Cast<SourceRecord> xformValue = new Cast.Value<>(); | ||||
|     private static final long MILLIS_PER_DAY = 24 * 60 * 60 * 1000; | ||||
| 
 | ||||
|     @After | ||||
|     public void teardown() { | ||||
|  | @ -61,6 +65,11 @@ public class CastTest { | |||
|         xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:array")); | ||||
|     } | ||||
| 
 | ||||
|     @Test(expected = ConfigException.class) | ||||
|     public void testUnsupportedTargetType() { | ||||
|         xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:bytes")); | ||||
|     } | ||||
| 
 | ||||
|     @Test(expected = ConfigException.class) | ||||
|     public void testConfigInvalidMap() { | ||||
|         xformKey.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "foo:int8:extra")); | ||||
|  | @ -171,6 +180,28 @@ public class CastTest { | |||
|         assertEquals("42", transformed.value()); | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|     public void castWholeBigDecimalRecordValueWithSchemaString() { | ||||
|         BigDecimal bigDecimal = new BigDecimal(42); | ||||
|         xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "string")); | ||||
|         SourceRecord transformed = xformValue.apply(new SourceRecord(null, null, "topic", 0, | ||||
|                 Decimal.schema(bigDecimal.scale()), bigDecimal)); | ||||
| 
 | ||||
|         assertEquals(Schema.Type.STRING, transformed.valueSchema().type()); | ||||
|         assertEquals("42", transformed.value()); | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|     public void castWholeDateRecordValueWithSchemaString() { | ||||
|         Date timestamp = new Date(MILLIS_PER_DAY + 1); // day + 1msec to get a timestamp formatting. | ||||
|         xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "string")); | ||||
|         SourceRecord transformed = xformValue.apply(new SourceRecord(null, null, "topic", 0, | ||||
|                 Timestamp.SCHEMA, timestamp)); | ||||
| 
 | ||||
|         assertEquals(Schema.Type.STRING, transformed.valueSchema().type()); | ||||
|         assertEquals(Values.dateFormatFor(timestamp).format(timestamp), transformed.value()); | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|     public void castWholeRecordDefaultValue() { | ||||
|         // Validate default value in schema is correctly converted | ||||
|  | @ -292,7 +323,8 @@ public class CastTest { | |||
| 
 | ||||
|     @Test | ||||
|     public void castFieldsWithSchema() { | ||||
|         xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "int8:int16,int16:int32,int32:int64,int64:boolean,float32:float64,float64:boolean,boolean:int8,string:int32,optional:int32")); | ||||
|         Date day = new Date(MILLIS_PER_DAY); | ||||
|         xformValue.configure(Collections.singletonMap(Cast.SPEC_CONFIG, "int8:int16,int16:int32,int32:int64,int64:boolean,float32:float64,float64:boolean,boolean:int8,string:int32,bigdecimal:string,date:string,optional:int32")); | ||||
| 
 | ||||
|         // Include an optional fields and fields with defaults to validate their values are passed through properly | ||||
|         SchemaBuilder builder = SchemaBuilder.struct(); | ||||
|  | @ -305,6 +337,8 @@ public class CastTest { | |||
|         builder.field("float64", SchemaBuilder.float64().defaultValue(-1.125).build()); | ||||
|         builder.field("boolean", Schema.BOOLEAN_SCHEMA); | ||||
|         builder.field("string", Schema.STRING_SCHEMA); | ||||
|         builder.field("bigdecimal", Decimal.schema(new BigDecimal(42).scale())); | ||||
|         builder.field("date", Timestamp.SCHEMA); | ||||
|         builder.field("optional", Schema.OPTIONAL_FLOAT32_SCHEMA); | ||||
|         builder.field("timestamp", Timestamp.SCHEMA); | ||||
|         Schema supportedTypesSchema = builder.build(); | ||||
|  | @ -317,6 +351,8 @@ public class CastTest { | |||
|         recordValue.put("float32", 32.f); | ||||
|         recordValue.put("float64", -64.); | ||||
|         recordValue.put("boolean", true); | ||||
|         recordValue.put("bigdecimal", new BigDecimal(42)); | ||||
|         recordValue.put("date", day); | ||||
|         recordValue.put("string", "42"); | ||||
|         recordValue.put("timestamp", new Date(0)); | ||||
|         // optional field intentionally omitted | ||||
|  | @ -335,8 +371,25 @@ public class CastTest { | |||
|         assertEquals(true, ((Struct) transformed.value()).schema().field("float64").schema().defaultValue()); | ||||
|         assertEquals((byte) 1, ((Struct) transformed.value()).get("boolean")); | ||||
|         assertEquals(42, ((Struct) transformed.value()).get("string")); | ||||
|         assertEquals("42", ((Struct) transformed.value()).get("bigdecimal")); | ||||
|         assertEquals(Values.dateFormatFor(day).format(day), ((Struct) transformed.value()).get("date")); | ||||
|         assertEquals(new Date(0), ((Struct) transformed.value()).get("timestamp")); | ||||
|         assertNull(((Struct) transformed.value()).get("optional")); | ||||
| 
 | ||||
|         Schema transformedSchema = ((Struct) transformed.value()).schema(); | ||||
|         assertEquals(Schema.INT16_SCHEMA.type(), transformedSchema.field("int8").schema().type()); | ||||
|         assertEquals(Schema.OPTIONAL_INT32_SCHEMA.type(), transformedSchema.field("int16").schema().type()); | ||||
|         assertEquals(Schema.INT64_SCHEMA.type(), transformedSchema.field("int32").schema().type()); | ||||
|         assertEquals(Schema.BOOLEAN_SCHEMA.type(), transformedSchema.field("int64").schema().type()); | ||||
|         assertEquals(Schema.FLOAT64_SCHEMA.type(), transformedSchema.field("float32").schema().type()); | ||||
|         assertEquals(Schema.BOOLEAN_SCHEMA.type(), transformedSchema.field("float64").schema().type()); | ||||
|         assertEquals(Schema.INT8_SCHEMA.type(), transformedSchema.field("boolean").schema().type()); | ||||
|         assertEquals(Schema.INT32_SCHEMA.type(), transformedSchema.field("string").schema().type()); | ||||
|         assertEquals(Schema.STRING_SCHEMA.type(), transformedSchema.field("bigdecimal").schema().type()); | ||||
|         assertEquals(Schema.STRING_SCHEMA.type(), transformedSchema.field("date").schema().type()); | ||||
|         assertEquals(Schema.OPTIONAL_INT32_SCHEMA.type(), transformedSchema.field("optional").schema().type()); | ||||
|         // The following fields are not changed | ||||
|         assertEquals(Timestamp.SCHEMA.type(), transformedSchema.field("timestamp").schema().type()); | ||||
|     } | ||||
| 
 | ||||
|     @SuppressWarnings("unchecked") | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue