Compare commits

...

216 Commits

Author SHA1 Message Date
Adhiraj 451d0da3fb
fix: add missing jdk http sender for otlp metrics exporter (#3124)
fix: add missing jdk http sender for otlp metrics exporter

Signed-off-by: Adhiraj <68840640+adhraj12@users.noreply.github.com>
2026-01-04 22:25:41 +08:00
ANSH THUKRAL cc62ad524a
docs: clarify recommended onboarding path in contributing guide (#3120)
Signed-off-by: ANSH THUKRAL <anshthukral2504@gmail.com>
2025-12-29 11:21:33 +08:00
Xu Han@AutoMQ 4be8c789d3
feat(eventloop): add ref count to eventloop (#3126)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-25 18:52:29 +08:00
Xu Han@AutoMQ 2418658424
fix(zerozone): notify the listeners in appendAsFollower (#3122)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-24 14:40:40 +08:00
Xu Han@AutoMQ e5de8a921a
chore(zerozone): set the request timeout to 30s (#3119)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-23 21:01:43 +08:00
Tino Britty e2ba16c9d7
perf: Optimize ClientUtils.clusterClientBaseConfig: 3x faster with zero functional changes (#3112)
Signed-off-by: Tino Britty  <153193545+brittytino@users.noreply.github.com>
2025-12-23 10:36:15 +08:00
AKASH KAPOOR 7c2bad5ba1
docs: clarify prerequisites for quick start (#3106)
* Update README.md

docs: add prerequisites for local quick start


Signed-off-by: AKASH KAPOOR <131011402+Akashkapoor11@users.noreply.github.com>

* docs: move prerequisites under Get started section

Signed-off-by: AKASH KAPOOR <131011402+Akashkapoor11@users.noreply.github.com>

* docs: move prerequisites under Get started section

Signed-off-by: AKASH KAPOOR <131011402+Akashkapoor11@users.noreply.github.com>

---------

Signed-off-by: AKASH KAPOOR <131011402+Akashkapoor11@users.noreply.github.com>
2025-12-22 15:30:34 +08:00
Xu Han@AutoMQ 7535e76c44
refactor(StreamRecordBatch): replace constructor with static factory method for improved clarity (#3109)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-15 21:11:16 +08:00
Kumar Mohit 60b1ec614f
fix(s3stream): fix array bounds check in ByteBufAlloc (#3102) 2025-12-12 14:31:42 +08:00
Xu Han@AutoMQ d768bfa3e9
feat(zerozone): optimize wal replaying via network transfer (#3101)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-12 14:14:22 +08:00
Xu Han@AutoMQ c470d491bf
fix(wal): revert the recovery modification in #3090 (#3097)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-10 22:58:15 +08:00
Gezi-lzq 863abcb653
fix(proto): enhance map field type resolution (#3094) 2025-12-09 17:08:24 +08:00
Gezi-lzq 48446d941c
test(TableCoordinatorTest): enhance commit status machine tests (#3089)
* test(TableCoordinatorTest): enhance commit status machine tests and improve snapshot expiration handling

* fix(TableCoordinator): correct spelling of 'Committed' in status transition methods

* test(TableCoordinatorTest): refactor snapshot expiration tests and improve verification logic

* chore: update table topic config

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>

---------

Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-09 14:28:56 +08:00
Xu Han@AutoMQ 0965b4fd93
feat(wal): expose wal uri & refactor BootstrapWal (#3091)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-09 14:28:22 +08:00
Suraj Darade 4ceb66d4ea
feat(core): configurable iceberg snapshot expiration (#3077)
* defined constants to hold the new configuration

* defined config

* added fields and initialization in the constructor

* made the iceberg configurable, allowing user control over snapshot

* remove oops extra char moment

* missing spaces and trailing spaces fixes

* indentation for method call chaining aligned with indentation guidelines

* unit tests: configurable iceberg snapshot expiration for tablecoordinator

* remove: unused imports

* test file: applied spotless

* fixed: failing unit tests for tablecoordinator

* spotless applied on test module
2025-12-08 20:40:52 +08:00
Xu Han@AutoMQ dd031cb941
fix(s3wal): improve next() logic to handle pre-fetched records (#3088)
- [Bug] the nextRecord is not cleaned up when directly calling #next without calling #hasNext
- The bug won't happen in production because each #next is called after #hasNext
2025-12-08 17:08:39 +08:00
Xu Han@AutoMQ 2b73b8ed62
fix(logcache): fix logcacheblock leak (#3086)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-08 11:23:28 +08:00
Xu Han@AutoMQ da9ae1ac92
fix(zerozone): snapshot miss the segment rolling (#3082)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-05 16:26:44 +08:00
Xu Han@AutoMQ a28603cf83
perf(zerozone): smooth the commit (#3080)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-12-05 10:39:44 +08:00
yx9o e05e3a406e
feat(core): Replace fixed parameters with environment variables (#3075) 2025-12-03 11:02:46 +08:00
KRISHNA DAS MEENA 5d6881be6e
docs: update Slack community link in README (#3072)
* Enhance Code of Conduct with new community guidelines (AI transparency, privacy, integrity)

* Update Slack link in README.md

Signed-off-by: KRISHNA DAS MEENA <mek127142@gmail.com>

* Revise Code of Conduct for clarity and inclusivity

Updated the Code of Conduct to enhance clarity and inclusivity, adding new standards and examples of acceptable and unacceptable behavior.

Signed-off-by: KRISHNA DAS MEENA <mek127142@gmail.com>

* Update CODE_OF_CONDUCT.md

Signed-off-by: KRISHNA DAS MEENA <mek127142@gmail.com>

---------

Signed-off-by: KRISHNA DAS MEENA <mek127142@gmail.com>
2025-12-01 10:35:39 +08:00
nicolchen 059c81700d
fix(s3wal): update s3 wal upload interval default value (#3065) 2025-11-27 22:24:03 +08:00
Xu Han@AutoMQ 7a26af947a
fix(zerozone2): bump the commitEpoch when there is no inflight requests (#3067)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-27 19:25:08 +08:00
Xu Han@AutoMQ 3678dc2647
fix(s3stream): may trim more data than expected (#3064)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-27 15:22:51 +08:00
yx9o 8f73f638c4
feat(s3stream): S3 API timeout can be configured via the path parameter (#3061) 2025-11-27 10:16:37 +08:00
Xu Han@AutoMQ d67d4b0207
revert(s3wal): handle hollow after trim (#3060)
Revert "fix(s3wal): handle hollow after trim (#3058)"

This reverts commit efd674a7f4.
2025-11-26 21:07:20 +08:00
Xu Han@AutoMQ efd674a7f4
fix(s3wal): handle hollow after trim (#3058)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-26 20:59:57 +08:00
Xu Han@AutoMQ ea591fd05b
fix(metastream): compaction may drop certain keys (#3056)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-26 19:36:57 +08:00
woshigaopp 142f9b9231
fix(core): fix e2e (#3050) (#3053)
fix enterprise remote write e2e
2025-11-26 16:48:16 +08:00
Xu Han@AutoMQ 255a6f3583
revert(wal): revert wal change in #3049 (#3052)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-26 16:42:56 +08:00
Xu Han@AutoMQ 2e1f572a56
fix(failover): refresh failover task when the epoch bump (#3049)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-26 15:37:33 +08:00
Xu Han@AutoMQ 9dc224ece7
feat(failover): don't failover the recovering node (#3045)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-25 22:48:35 +08:00
woshigaopp df917b9916
fix(e2e): fix e2e (#3042) (#3043)
fix e2e
2025-11-25 21:34:24 +08:00
John Luo 1663377213
docs: Add 8k banner to readme (#3041)
* Update README.md use 8k banner

Signed-off-by: John Luo <56945247+johnluoyx@users.noreply.github.com>

* remove extra image

Signed-off-by: John Luo <56945247+johnluoyx@users.noreply.github.com>

---------

Signed-off-by: John Luo <56945247+johnluoyx@users.noreply.github.com>
2025-11-25 15:44:05 +08:00
1sonofqiu 6c1a32da1e
fix(metrics): inject offset metric register for txn commit (#3038) 2025-11-24 18:40:58 +08:00
Xu Han@AutoMQ 3c45edd7a9
feat(s3stream): auto adjust walUploadThreshold (#3036)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-24 10:51:52 +08:00
Gezi-lzq 0db45b8d86
fix(RecordAssembler): optimize field creation using Accessor (#3031) 2025-11-20 11:26:59 +08:00
Xu Han@AutoMQ dc93d94f68
fix(benchmark): fix compression so it works even when not batching (#3029)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-19 20:02:05 +08:00
Xu Han@AutoMQ 434ec662ca
fix(network): fix potential ByteBuf LEAK in fetch (#3027)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-19 19:37:34 +08:00
Xu Han@AutoMQ daef2e2771
fix(logcache): guard cache free with writeLock (#3021)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-19 11:23:32 +08:00
Gezi-lzq 147264b264
feat(protobuf): enhance Protobuf data handling with LogicalMap support and enhance test coverage (#3020) 2025-11-19 11:02:19 +08:00
Gezi-lzq a9f6fafb6f
fix(schema): streamline schema change handling and enhance test coverage (#3019) 2025-11-19 10:52:47 +08:00
Xu Han@AutoMQ 5ab129555b
perf(s3stream): async heavy log cache operation (#3016)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-17 19:13:57 +08:00
Gezi-lzq e9feeec9a7
fix(binder): enhance RecordBinder and TypeAdapter to support STRUCT type conversion (#3005)
* fix(binder): enhance RecordBinder and TypeAdapter to support STRUCT type conversion

* test(binder): add tests for nested struct binding and schema instance reuse

* fix(adapter): support conversion from Number to TIMESTAMPTZ in AbstractTypeAdapter

* fix(adapter): update list and map conversion methods to include StructConverter
2025-11-17 16:36:56 +08:00
woshigaopp ddbb286a20
fix(core): auto add ops metrcis (#3013) (#3014) 2025-11-15 22:43:18 +08:00
woshigaopp 91459b2a17
feat(connect): support connect openTelemetry and log for 1.6 (#2961) (#3012)
Extract metrics and logging features as standalone modules.
2025-11-14 18:19:00 +08:00
Xu Han@AutoMQ fb43893d53
chore(metrics): move queue time record to AsyncNetworkBandwidthLimiter (#3010)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-14 17:30:19 +08:00
Xu Han@AutoMQ 4226b5038f
fix(wal): fix leak caused by #3000 (#3007)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-14 10:54:43 +08:00
Xu Han@AutoMQ 107ae202bf
feat(zerozone2): add overload protection (#3002)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-13 11:41:58 +08:00
Xu Han@AutoMQ 9e3828df12
fix(wal): filter inner trim record for range get (#3000)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-11 19:19:33 +08:00
Xu Han@AutoMQ fdb40ead4b
perf(s3stream): optimize limiter lock range (#2998)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-11 16:29:13 +08:00
Xu Han@AutoMQ 69b01b940b
perf(s3stream): optimize logcache merge (#2996)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-10 21:58:35 +08:00
Xu Han@AutoMQ dbe443b153
perf(s3stream): parallel append & optimize logcache init (#2994)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-10 20:18:04 +08:00
Xu Han@AutoMQ 17d17dae07
perf(zerozone2): smooth wal upload (#2992)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-10 16:32:48 +08:00
JasirVoriya 95946420a7
perf(s3stream): compute compaction delay using min timestamp instead of sorting (#2984) 2025-11-04 13:12:50 +08:00
Xu Han@AutoMQ 6c752a140e
fix(zerozone2): fix the upgrade from v1 to v2 (#2986)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-04 02:23:25 +08:00
Xu Han@AutoMQ 63e6eb8f92
feat(logcache): limit max block count (#2982)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-11-03 12:18:04 +08:00
Gezi-lzq 1c620eb6e0
feat(timestamp): enhance RecordBinder to support TIMESTAMP and TIME types in union handling (#2981)
* feat(timestamp): enhance RecordBinder to support TIMESTAMP and TIME types in union handling

* test(avro): add missing import for AvroRecordBinderTest
2025-11-03 11:03:19 +08:00
Tino Britty 6ed1940948
fix: correctly use bytebuffer slice in decodecontexts method (#2925)
* fix: correctly use bytebuffer slice in decodecontexts method

Signed-off-by: Tino Britty  <153193545+brittytino@users.noreply.github.com>

* eckstyle] [ERROR] RESOLVED

---------

Signed-off-by: Tino Britty  <153193545+brittytino@users.noreply.github.com>
2025-10-28 14:15:51 +08:00
John Luo e3c45d92a9
feat(README): add new customers logos to README (#2975) 2025-10-27 23:37:07 +08:00
Xu Han@AutoMQ 4f936d86b3
chore(zerozone2): detect close failure (#2973)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-24 01:47:22 +08:00
Gezi-lzq 06e90e2e4c
feat(table-coordinator): handle exceptions during snapshot expiration (#2969) 2025-10-21 20:22:39 +08:00
Gezi-lzq dc295c8d13
feat(core): add iceberg-nessie dependency for enhanced functionality (#2967) 2025-10-21 10:32:19 +08:00
Xu Han@AutoMQ 99f64721f3
feat(zerozone2): fast acks=0 (#2963)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-17 15:22:40 +08:00
Xu Han@AutoMQ d11ef0718d
feat(zerozone2): async start RouterChannel and ConfirmWAL (#2959)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-16 20:49:19 +08:00
Xu Han@AutoMQ 489888de62
chore(zerozone2): schedule logging proxy mapping (#2957)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-16 18:52:35 +08:00
Xu Han@AutoMQ f05987aad6
feat(failover): remove the controller limitation (#2955)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-16 15:48:19 +08:00
Xu Han@AutoMQ f5147bb747
feat(zerozone2): response retryable error when route out get UNKNOWN_SERVER_ERROR (#2953)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-16 11:10:23 +08:00
Xu Han@AutoMQ bea70076af
fix(zerzone2): fix commited epoch not bump when there is gracefully shutdown node (#2951)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-15 15:58:49 +08:00
Xu Han@AutoMQ 8361a58626
perf(s3stream): optimize the doRangeRead GC issue caused by #2764 (#2948)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-14 19:58:35 +08:00
Xu Han@AutoMQ 38db6e57fb
fix(zerozone2): fail the recover when decode fail (#2946)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-14 17:33:37 +08:00
Xu Han@AutoMQ a00ab9e6f8
chore(zerozone2): isolate the record encode allocator (#2944)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-13 20:11:00 +08:00
Xu Han@AutoMQ f5afb1c23b
fix(zerozone2): fix duplicated release (#2942)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-13 12:53:11 +08:00
Xu Han@AutoMQ be01227492
fix(zerozone2): release after append & better snapshot replay (#2940)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-12 23:00:16 +08:00
Xu Han@AutoMQ fac54967d0
fix(zerozone2): fix bug caused by #2929 (#2936)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-11 14:09:23 +08:00
Gezi-lzq 2d8b68cbf4
feat(security): enhance ClientUtils to include listener-specific security settings (#2933)
* feat(security): enhance ClientUtils to include listener-specific security settings

* chore: spotless apply
2025-10-11 10:29:06 +08:00
Xu Han@AutoMQ c3dde6eb26
feat(zerozone2): return NOT_LEADER_OR_FOLLOWER to stale produce (#2931)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-10 18:53:23 +08:00
Robin Han dd690bec35 fix(zerozone2): reset replayer when reset subscriber
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-10 17:47:56 +08:00
yx9o 237463e8f4
fix(s3stream): correct mismatch between configuration class defaults and profile property defaults (#2924) 2025-10-09 11:49:41 +08:00
Nick Guo 111d641d9f
refactor(tool): adjust consume logic in `automq-perf-test.sh` tool (#2900) 2025-10-09 11:48:56 +08:00
Xu Han@AutoMQ 31ee997eda
fix(zerozone2): support lazy stream (#2922)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-10-02 21:02:05 +08:00
Xu Han@AutoMQ 4268503ab1
fix(zerozone2): fix the snapshot to avoid consuming abort txn record (#2920)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-30 18:17:56 +08:00
Gezi-lzq 18340f2b95
feat(avro): simplify union handling in AvroValueAdapter and add support for nested union types (#2916) 2025-09-29 16:58:43 +08:00
Romain Manni-Bucau f41f5f014e
fix: ensure S3 endpoint is not required for table topic since AWS client can guess it most of the time (#2915)
[fix] ensure S3 endpoint is not required for table topic since AWS client can guess it most of the time
2025-09-29 16:21:38 +08:00
Xu Han@AutoMQ 2e5a7760b4
fix(log): elastic log supports truncateFullyAndStartAt (#2913)
fix(log): elastic log supports truncate

Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-29 14:27:02 +08:00
Xu Han@AutoMQ 3375bd3474
fix(zerozone2): fix epoch missing update when there isn't new record (#2911)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-29 11:53:06 +08:00
Xu Han@AutoMQ c66be0f80a
chore(zerozone2): clean up s3stream metrics (#2909)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-28 17:26:31 +08:00
Xu Han@AutoMQ 0768c166de
fix(zerozone2): fix txn record overwrite by mistake (#2905)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-28 16:01:16 +08:00
Xu Han@AutoMQ e965b1d567
fix(zerozone2): guard snapshot by lock (#2907)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-28 15:55:01 +08:00
Xu Han@AutoMQ 51cd912bec
fix(issue2902): change Verification.waitingRequests to unbounded queue to fix the deadlock (#2903)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-27 18:11:07 +08:00
Gezi-lzq 0482afb2ed
feat(benchmark): enhance BenchmarkResult to track duration in nanoseconds and add field count metrics (#2898)
* feat(benchmark): enhance BenchmarkResult to track duration in nanoseconds and add field count metrics

* feat(adapter): improve map conversion handling and enhance field counting logic

* feat(transform): refactor enriched schema cache to use SchemaKey for improved uniqueness handling

* feat(metric): reduce granularity for binary buffers to optimize performance

* feat(metric): reduce granularity for binary buffers to optimize performance

* feat(binder): optimize map field count calculation in RecordBinder

* feat(transform): simplify fingerprint computation in DebeziumUnwrapTransform
2025-09-26 19:59:19 +08:00
Xu Han@AutoMQ 8b988cb134
fix(s3stream): fix potential merge read fail (#2896)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-26 15:26:08 +08:00
Romain Manni-Bucau c235b2374e
test(iceberg): ensure iceberg rest catalog supports custom headers passing th… (#2892)
* [test] ensure iceberg rest catalog supports custom headers passing through headers to iceberg library

* [test] assert custom header

* spotless was not active under java 21 so was missed
2025-09-26 15:25:41 +08:00
yx9o b17304bac1
fix(core): fix typo (#2893) 2025-09-25 20:36:24 +08:00
Xu Han@AutoMQ 54b1389396
fix(zerozone2): gracefully await subscriber close (#2894)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-25 20:35:04 +08:00
Gezi-lzq d3a3847fd0
feat(tests): add Protobuf ConverterTest for comprehensive Protobuf conversion validation (#2885)
* feat(tests): add Protobuf ConverterTest for comprehensive Protobuf conversion validation

* fix(tests): reorder imports in ProtobufRegistryConverterTest for clarity
2025-09-24 20:18:33 +08:00
Xu Han@AutoMQ 9cee5e693e
fix(wal): try drain all waiting upload bulks (#2888)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-24 19:14:34 +08:00
Xu Han@AutoMQ fcd1f60a04
fix(wal): limit bulk upload delay (#2886)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-24 18:23:56 +08:00
Gezi-lzq e67f9e4f43
feat(metadata): add retain and release methods for improved image management (#2882) 2025-09-24 15:09:32 +08:00
Gezi-lzq ae63696aa2
feat(metrics): integrate new metrics for table delay, fields per second, and event loop busy ratio (#2876)
* feat(metrics): integrate new metrics for table topic delay and fields per second

* feat(metrics): add event loop busy ratio metric for table worker

* feat(network): enhance network permit handling and improve blocking consumption
2025-09-23 10:23:10 +08:00
Gezi-lzq 79dbfe5b53
fix(partition): handle null partition spec in commit response (#2877) 2025-09-22 20:42:33 +08:00
Gezi-lzq 6616d103a8
feat(converter): support union types for list and map conversions in AvroValueAdapter (#2872)
* feat(converter): support union types for list and map conversions in AvroValueAdapter

* fix(factory): update value converter creation to use schema ID in RecordProcessorFactory
2025-09-22 16:33:52 +08:00
yx9o d81722b80c
fix(s3stream): correct the comment about the maximum range of merged read tasks (#2865) 2025-09-22 16:03:12 +08:00
Xu Han@AutoMQ fb154a5df5
feat(wal): simple direct channel wrapper (#2875)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-22 14:43:53 +08:00
Gezi-lzq 5efe233674
feat(perf): add Avro and Protobuf performance test cases and related configurations (#2873) 2025-09-22 14:30:42 +08:00
Xu Han@AutoMQ e856df5a9a
chore(s3stream): remove useless wal code (#2870)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-19 18:41:20 +08:00
Xu Han@AutoMQ dd2ec4e3c4
fix(metrics): set the interval for the lazy-created histogram (#2868)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-19 12:02:02 +08:00
Xu Han@AutoMQ 525e0b046e
fix(zerozone2): close snapshotReadPartitionsManager (#2866)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-19 09:57:38 +08:00
Xu Han@AutoMQ 113a1b0dba
fix(zerozone2): fix the expired offsetForLeaderEpoch (#2863)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-17 19:05:32 +08:00
Gezi-lzq 3421ad26ca
feat: enhance schema handling and optimize record processing (#2855)
* fix(converter): improve schema identity generation and optimize RecordAssembler

* feat: enhance schema handling and optimize record processing

* feat: improve schema field handling and normalize record values

* feat: enhance schema generation and improve handling of optional fields

* fix: reorder import statements for better organization
2025-09-17 17:31:50 +08:00
allen 14c34647c8
feat(metrics): enhance metrics for benchmark (#2860) 2025-09-17 17:26:35 +08:00
Xu Han@AutoMQ b98535b960
fix(zerozone2): await subscriber close (#2858)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-17 15:54:41 +08:00
allen 90b243a1cd
feat(metrics): enhance metrics for benchmark (#2856) 2025-09-17 11:39:28 +08:00
Xu Han@AutoMQ 555af3801c
fix(zerozone2): fix reset bug cause skip (#2853)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-16 21:28:02 +08:00
Xu Han@AutoMQ 10abc0402f
fix(zerozone2): adapt to txn (#2850)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-16 17:40:30 +08:00
allen 02a7df261d
fix(metrics): fix percentile metrics (#2849) 2025-09-16 16:50:05 +08:00
Xu Han@AutoMQ b7b989a828
fix(log): fix potential fetch NPE when the partition is closing (#2848)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-16 15:05:17 +08:00
Xu Han@AutoMQ 5cca02d1cb
fix(s3stream): fix buf leak when recover fail (#2841)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-15 21:09:06 +08:00
Xu Han@AutoMQ 5f639cc597
fix(zerozone2): partition open conflict (#2838)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-15 14:20:24 +08:00
Gezi-lzq d269d1a882
feat: add tabletopic e2e case (#2827)
* feat(perf): implement random Avro value generation for performance testing

* feat(e2e): implement end-to-end tests for Table Topic feature with Avro messages

* feat(tests): add end-to-end tests for Table Topic feature with schema evolution and broker restart scenarios

* fix(docker): streamline Docker installation in Dockerfile and update run command in ducker-ak

* fix(config): update S3 bucket name for Iceberg catalog in configuration
2025-09-15 11:15:22 +08:00
Gezi-lzq 1bcc9c7643
fix(config): remove exception for invalid list string in TableTopicConfigValidator (#2836) 2025-09-15 11:04:46 +08:00
Gezi-lzq be113fdac1
fix(config): add support for latest schema subject and message name configuration in TopicConfig and WorkerConfig (#2834) 2025-09-13 11:11:39 +08:00
Gezi-lzq 87137ff875
fix(converter): update subject and message name retrieval in ConverterFactory and WorkerConfig (#2831)
fix(config): correct spelling of 'subject' in WorkerConfig
2025-09-11 14:31:46 +08:00
1sonofqiu d46ff1ecf1
fix(docker): update kafka docker image build process (#2829)
* fix(docker): update build process
2025-09-10 17:46:03 +08:00
1sonofqiu f6166bdca2
feat(docker): kafka docker image adapt (#2828)
* feat(docker): add AutoMQ Kafka Docker release workflow and update Dockerfile
2025-09-10 17:07:31 +08:00
allen 7ea0cb6237
feat(nfs-wal): initial implementation of nfs wal (#2825) 2025-09-08 17:45:06 +08:00
Robin Han 853177d56f feat(reassign): optimize partition reassignment to avoid client fast retry
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-08 15:43:03 +08:00
Gezi-lzq 5bd9aa3e14
chore(config): update default commit interval to 1 minute for table topic (#2819) 2025-09-08 11:20:07 +08:00
Gezi-lzq 5930eea0e5
feat(table): integrate RecordProcessor architecture and enhance error tolerance (#2814)
* feat(table): integrate RecordProcessor architecture and enhance error tolerance

* feat(process): handle system-level errors in schema registry interactions

* chore(process): fix code style

* feat(tests): update assertions for schema evolution and error handling in RecordProcessor tests

* feat(process): simplify error handling for RestClientException in DefaultRecordProcessor

* feat(errors): enhance ErrorsTolerance enum and streamline error tolerance configuration

* feat(metrics): improve field count calculations for List and Map types

* chore(format): improve code style in RecordBinder.java

* feat(writer): modify offset range computation
2025-09-05 18:56:56 +08:00
Xu Han@AutoMQ 91d3d404fc
fix(workflow): set the gradle version (#2816)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-05 17:44:40 +08:00
Xu Han@AutoMQ 8b673fdd15
chore(workflows): add publish maven package workflow (#2812)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-05 15:33:28 +08:00
Xu Han@AutoMQ 3b55de7874
perf(zerozone2): optimize the wal batch upload strategy (#2809)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-05 10:52:09 +08:00
Gezi-lzq 01e537162d
feat(process): introduce record processor factory and enrich conversion/transform pipeline (#2796)
* feat(process): introduce flexible record processor factory and enrich conversion/transform pipeline

- Add RecordProcessorFactory to support dynamic creation of processors based on schema and transform configs
- Refactor RegistryConverterFactory for improved schema format handling and converter instantiation
- Implement SchemaFormat, TableTopicConvertType, and TableTopicTransformType enums for config-driven processing
- Enhance Converter interface and conversion records to include key, value, and timestamp fields
- Refactor AvroRegistryConverter and ProtobufRegistryConverter to return unified RecordData objects
- Add ProtoToAvroConverter for robust Protobuf-to-Avro conversion
- Update transform chain: add KafkaMetadataTransform for metadata enrichment, refactor DebeziumUnwrapTransform
- Update DefaultRecordProcessor and TransformContext to support partition-aware processing
- Improve error handling and code clarity across conversion and transform modules
2025-09-05 10:29:13 +08:00
Nick Guo 982bdbb3a3
feat(strimzi): add docker build workflow for strimzi (#2802)
* feat(strimzi): add docker build workflow for strimzi
2025-09-04 10:24:03 +08:00
Xu Han@AutoMQ 85638eaaa8
fix(zerozone2): fix potential WAL reset overflow BUG (#2805)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-03 16:04:10 +08:00
Xu Han@AutoMQ 70ebf4e5ac
feat(java): set min java version to 17 (#2803)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-03 10:49:51 +08:00
Xu Han@AutoMQ 826415b1d8
chore(e2e): save the e2e runner resource (#2801)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-01 20:10:23 +08:00
Xu Han@AutoMQ 2e1a2df45f
chore(e2e): accelerate e2e (#2799)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-09-01 20:10:13 +08:00
John Luo 223e823f81
feat(README): Update README.md solgan picture (#2798)
Update README.md

Signed-off-by: John Luo <56945247+lyx2000@users.noreply.github.com>
2025-09-01 16:20:53 +08:00
John Luo d8e53034c7
feat(README): Update README.md use new solgan (#2797) 2025-09-01 13:22:42 +08:00
Gezi-lzq fabd7087e9
feat(binder): add batch field count statistics to RecordBinder and AvroRecordView (#2795) 2025-08-29 18:58:10 +08:00
John Luo 1a32a0d574
feat(README): Add promotional images to the Readme (#2791)
* Add promotional images to the Readme

Signed-off-by: John Luo <56945247+lyx2000@users.noreply.github.com>

* Update size

Signed-off-by: John Luo <56945247+lyx2000@users.noreply.github.com>

* Update README.md

Signed-off-by: John Luo <56945247+lyx2000@users.noreply.github.com>

---------

Signed-off-by: John Luo <56945247+lyx2000@users.noreply.github.com>
2025-08-29 17:14:56 +08:00
Xu Han@AutoMQ eb2bac45bd
chore(zerozone2): rename channelOffset targetNodeId to attributes (#2793)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-29 17:09:18 +08:00
Gezi-lzq 330bdb1843
refactor(process): Improve and clarify data processing error handling (#2792)
refactor(process): unify error handling with InvalidDataException and update DataError types
2025-08-29 16:57:15 +08:00
Gezi-lzq 862213d3ed
feat(process): introduce unified record processing pipeline with converter and transform interfaces (#2786) 2025-08-29 14:46:56 +08:00
Xu Han@AutoMQ 59c64c386f
feat(zerozone2): force route to local (#2789)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-28 21:45:32 +08:00
Xu Han@AutoMQ 71da316589
refactor(zerozone2): make wal provider more extensible (#2787)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-27 16:42:56 +08:00
Gezi-lzq ee27700945
feat(binder): implement AvroValueAdapter and RecordBinder for Avro to Iceberg conversion (#2744)
* feat(binder): implement AvroValueAdapter and RecordBinder for Avro to Iceberg conversion

* chore(binder): preallocate list and map sizes
2025-08-27 11:29:06 +08:00
Xu Han@AutoMQ 36e427a473
chore(metrics): unify object wal metrics (#2784)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-26 20:24:33 +08:00
Xu Han@AutoMQ cee57fd94c
chore(config): dynamic change config based on heap size 2025-08-26 18:29:15 +08:00
Robin Han 5640433c70 fix(zerozone2): fix failover bug caused by #2764
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-25 11:00:08 +08:00
yx9o e3d19c2d11 fix(core): fix lock object error when implementing singleton pattern 2025-08-25 10:00:45 +08:00
Gezi-lzq 574ed89baf
fix(metrics): optimize metrics name mapping in S3MetricsExporter (#2775) 2025-08-22 14:45:37 +08:00
Xu Han@AutoMQ a83ab0e869
feat(zerozone2): complete delay fetch (#2773)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-21 15:03:04 +08:00
Xu Han@AutoMQ fd8ea99d97
feat(zerozone2): add metrics (#2772)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-21 14:47:53 +08:00
Xu Han@AutoMQ a81af21c2d
fix(zerozone2): fix the replay blocking when ObjectNotExist (#2771)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-21 14:30:58 +08:00
Xu Han@AutoMQ d355af4aba
fix(s3stream): move network inbound consumption from s3stream to kafkaapi (#2770)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-21 14:01:41 +08:00
Xu Han@AutoMQ 665182a1f8
chore(s3stream): increase part size from 16MB to 32MB (#2769)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-21 14:01:34 +08:00
Xiao Yang 8887c4104c
refactor(core): support dynamic splicing factory class name #2757 2025-08-21 11:17:47 +08:00
Robin Han 4cb1fb515e chore(zerozone2): add S3StorageTest back
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-18 20:09:55 +08:00
Xu Han@AutoMQ 864c857d15
feat(zerozone2): support zero zone v2 (#2766)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-18 16:25:39 +08:00
Xu Han@AutoMQ 5a85abcf1c
feat(zerozone2): add router channel epoch manager (#2765)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-18 15:40:15 +08:00
Xu Han@AutoMQ fc5e335a49
feat(zerozone2): S3WAL supports sequential append and distributed read (#2764)
- S3WAL supports sequential append & callback
- Simple the S3Storage logic by S3WAl sequential append
- S3WAL supports distributed read from another nodes
- S3Storage supports linked record

Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-18 15:13:35 +08:00
Xu Han@AutoMQ 0798e2e8bf
feat(zerozone2): extend ZoneRouter & PartitionSnapshot protocol (#2763)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-18 11:33:34 +08:00
Xu Han@AutoMQ b4fb4ea904
chore(README): update to latest release 1.5.5 (#2762)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-15 20:39:35 +08:00
Xu Han@AutoMQ 7b325c38ec
fix(failover): fix failover get wrong node config (#2759)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-15 18:40:48 +08:00
woshigaopp cb235081dd
fix(e2e): fix benchmark e2e (#2754)
remove unnecessary import to fix e2e.
2025-08-14 15:25:31 +08:00
Xu Han@AutoMQ 5520dde387
fix(s3stream): fix the network out over-consumed (#2752)
Consider the following scenario:

A Fetch request contains partitions P1 and P2. The data of P1 is in LogCache, while the data of P2 is not.
First, a fast read will be attempted. At this time, P1 will return data and consume Network Out, and P2 will return a FastReadException.
Due to the FastReadException, the entire Fetch attempts a slow read. At this time, both P1 and P2 return data and consume Network Out.
At this point, the Network Out in step 2 is consumed repeatedly.
Solution: Move the S3Stream network out consumption to ElasticReplicaManager. Avoid the network out traffic over-consumed, when there are mixin(tail read & catch-up read) partitions reading.

Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-13 10:25:01 +08:00
Xu Han@AutoMQ c5672b374c
fix(s3stream): add pending requests await timeout for S3Stream#close (#2750)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-08-12 15:48:52 +08:00
Saumya Pandey be42048437
fix: Added support for AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY (#2747)
Added support for AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY to the
list of supported environment variables. Issue #2746
2025-08-11 10:21:27 +08:00
woshigaopp 31b828f0f4
feat: modify for enterprise e2e (#2732)
* feat: modify for enterprise e2e

* feat: add AutoMQ inject start/end

* feat: undo modify runclass.sh
2025-07-30 23:13:50 +08:00
Gezi-lzq 59a92fa6a8
fix(streamReader): implement scheduled cleanup for expired stream readers (#2719)
* fix(streamReader): implement scheduled cleanup for expired stream readers

* fix(streamReader): implement scheduled cleanup for expired stream readers

* fix(streamReader): add missing import statements in StreamReaders and StreamReadersTest

* fix(StreamReadersTest): improve test setup and cleanup logic for stream readers

* test(StreamReadersTest): update expired stream reader cleanup test for manual trigger and faster execution

* style(StreamReadersTest): remove extra blank line in import statements

* test(StreamReadersTest): use reflection to simulate expired stream readers for faster cleanup testing

Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>

* refactor(StreamReader, StreamReaders): inject Time for testable time control and remove reflection from tests

- Add Time dependency to StreamReader and StreamReaders for time-related operations
- Update constructors to accept Time, defaulting to Time.SYSTEM
- Replace System.currentTimeMillis() with time.milliseconds() throughout
- Refactor StreamReadersTest to use MockTime for simulating time passage
- Remove reflection-based time manipulation in tests for cleaner and safer testing

---------

Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>
2025-07-30 16:55:35 +08:00
Gezi-lzq 06eef9ae78
perf(log): increase FETCH_BATCH_SIZE to 512KB in StreamSegmentInputStream (#2722) 2025-07-30 11:59:01 +08:00
Gezi-lzq 201c6eb832
fix(log): Prevent potential offset overflow in ElasticLogSegment (#2720)
* fix(log): Prevent potential offset overflow in ElasticLogSegment

This commit addresses an issue where a log segment could accommodate more than Integer.MAX_VALUE records, leading to a potential integer overflow when calculating relative offsets.

The root cause was that the check `offset - baseOffset <= Integer.MAX_VALUE` allowed a relative offset to be exactly `Integer.MAX_VALUE`. Since offsets are 0-based, this allows for `Integer.MAX_VALUE + 1` records, which cannot be represented by a standard Integer.

This fix implements the following changes:
1.  In `ElasticLogSegment`, the offset validation is changed from `<=` to `< Integer.MAX_VALUE` to ensure the relative offset strictly fits within an Integer's bounds.
2.  In `LogCleaner`, a new segment grouping method `groupSegmentsBySizeV2` is introduced for `ElasticUnifiedLog`. This method uses the same stricter offset check to prevent incorrectly grouping segments that would exceed the offset limit.
3.  The corresponding unit tests in `LogCleanerTest` have been updated to reflect these new boundaries and validate the fix.

Fixes: #2718

* fix(logCleaner): unify segment grouping logic

* fix(logCleaner): extract offset range check for segment grouping to prevent overflow in ElasticLogSegment

* style(logCleaner): fix indentation in segment grouping condition for readability

* style(logCleaner): fix line break in offset range check for readability

* chore: add AutoMQ inject

* style(logCleaner): remove unnecessary blank line after segment grouping

* fix(stream): validate record batch count to prevent negative values in append
2025-07-30 11:34:52 +08:00
lifepuzzlefun 014d2d2861
perf(metadata): remove empty RangeMetadata (#2709) 2025-07-28 17:20:40 +08:00
Vivek Chavan 75bdea05e5
fix: resolve Base64 decoding error in certificate parsing (#2615) (#2693)
- Fix IllegalArgumentException: Illegal base64 character 20 in S3StreamKafkaMetricsManager
- Replace single newline removal with comprehensive whitespace cleanup using replaceAll("\s", "")
- Add graceful error handling for both Base64 and certificate parsing failures
- Add comprehensive unit tests covering various whitespace scenarios and edge cases
- Improve logging with specific error messages for failed certificate parsing

Fixes #2615
2025-07-28 14:10:45 +08:00
Gezi-lzq 80b9377da3
fix(logCleaner): optimize write buffer management and clear buffer before use (#2704) 2025-07-28 14:08:46 +08:00
Gezi-lzq 32c51bde40
feat(log): enhance reading logic to handle offset gaps and add unit tests (#2699) 2025-07-28 09:05:17 +08:00
Xu Han@AutoMQ a34c536edb
chore(github): change code owners (#2695)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-07-25 15:40:43 +08:00
lifepuzzlefun 7b9d8a490e
perf(s3stream): avoid S3StreamSetObject objectId long primitive unboxing (#2687) 2025-07-25 10:45:34 +08:00
Napat Rungruangbangchan 6143ec682c
fix: update comment to reflect cluster setup in docker-compose-cluste… (#2691)
fix: update comment to reflect cluster setup in docker-compose-cluster.yaml
2025-07-22 15:13:36 +08:00
John Luo c1db592e8d
fix(readme): adjust logo (#2689) 2025-07-21 15:05:48 +08:00
John Luo ae16846da9
fix(README): readme-logo (#2688) 2025-07-21 08:45:07 +08:00
Gezi-lzq 5a8aaaab8a
feat(catalog): avoid static global credentials provider (#2684)
* feat(catalog): Avoid static global credentials provider

Refactors `CredentialProviderHolder` to prevent "Connection Pool Shutdown"
errors by creating a new provider instance for each catalog.

Previously, a single static `AwsCredentialsProvider` was shared globally.
If this provider was closed, it would affect all subsequent operations.
By creating a new provider on each `create()` call from Iceberg, this
change removes the global singleton and isolates provider instances.

Fixes #2680

* Update core/src/main/java/kafka/automq/table/CredentialProviderHolder.java

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>

* fix(credentials): update DefaultCredentialsProvider instantiation to use builder pattern

---------

Signed-off-by: Gezi-lzq <lzqtxwd@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-17 19:19:08 +08:00
Yu Ning 6847223535
feat(block-wal): use version V1 and support sequential return (#2682)
* feat(s3stream/block-wal): complete appends sequentially (#2665)

* feat(s3stream/block-wal): complete appends sequentially



* fix: use a lock to ensure there is at most one callback thread



---------



* feat(s3stream/wal): write a padding record when no space at the end of device (#2673)

* refactor(RecordHeader): remove useless methods



* feat(SlidingWindowService): write a padding block when not enough space



* feat(recovery): handle padding records



* fix: fix incorrect assertion



---------



* feat(s3stream/wal): defaults to using version V1 and forward compatible (#2676)

* feat: introduce the Block WAL V1



* feat: impl `RecoverIteratorV1` which only recovers continuous records



* feat: wal forward compatibility



* test: fix tests



* test: test recover from WAL V1



* test: test upgrade



---------



---------

Signed-off-by: Ning Yu <ningyu@automq.com>
2025-07-17 14:15:06 +08:00
Gezi-lzq cf724bc1ec
fix(worker): update CommitResponse to use partition type from writerFactory (#2677)
* fix(worker): update CommitResponse to use partition type from writerFactory

* fix(worker): mock partitionSpec in TopicPartitionsWorkerTest for unpartitioned partitions

* fix(worker): reorganize imports in TopicPartitionsWorkerTest for clarity
2025-07-17 11:10:58 +08:00
Xiaorui Wang 4eeb7d63af
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-07-14 17:52:31 +08:00
lifepuzzlefun 233cc097b0
perf(misc): optimize FairLimiter implementation (#2670)
* fix(s3stream): avoid StreamMetadataManager add callback when retry

* perf(misc): optimize FairLimiter implementation
2025-07-14 14:10:59 +08:00
Xiao Yang 0c1a196419
fix(s3stream): improve ClientWrapper resource release to prevent resource leakage (#2657) 2025-07-10 11:10:41 +08:00
1sonofqiu af45ce48ad
chore(container): update welcome message and container patch (#2658)
* chore(container): rollback bitnami

* chore(container): welcome page message

* Revert "chore(container): welcome page message"

This reverts commit 998f704348.

* chore(container): update welcome message and container patch
2025-07-10 11:05:31 +08:00
Gezi-lzq f2470b7547
fix: support list more than 1000 objects by prefix (#2660)
This commit fixes an issue where the doList method in AwsObjectStorage.java
did not handle paginated results from the S3 listObjectsV2 API. The
method now recursively fetches all pages of objects, ensuring that it can
retrieve more than the default 1000 objects.
2025-07-09 10:09:50 +08:00
lifepuzzlefun b1c4501403
fix(s3stream): avoid StreamMetadataManager add callback when retry (#2659) 2025-07-08 17:32:28 +08:00
Xiaorui Wang b190a61b69
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-07-04 21:43:57 +08:00
Xiaorui Wang 8f1ca0a9b7
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-07-04 21:43:23 +08:00
Xu Han@AutoMQ bab8169a33
fix(image): guard streams image access with lock to prevent data loss (#2653)
fix(image): guard streams image access with lock to prevent compaction skip data

Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-07-04 18:20:07 +08:00
Gezi-lzq 7c9cdd0acc
fix(deps): resolve logging conflicts from Hive Metastore (#2648)
feat(dependencies): add jcl-over-slf4j library and exclude conflicting logging implementations
2025-07-03 16:38:37 +08:00
Gezi-lzq 69b9ca51fe
fix(docs): update docker-compose command and deployment links in README (#2647) 2025-07-02 16:00:31 +08:00
Yu Ning 109ab63639
chore(tools/perf): log client logs to a separate file (#2644)
Signed-off-by: Ning Yu <ningyu@automq.com>
2025-07-02 14:48:59 +08:00
yx9o 76d017ae6d
fix(s3stream): correct mismatched comments (#2643) 2025-06-26 09:58:59 +08:00
Gezi-lzq 8867ef7313
fix(docker): update minio policy commands to use 'anonymous' instead of 'policy' (#2640)
fix(docker): update minio policy commands to use 'anonymous' instead of 'public'
2025-06-25 18:04:57 +08:00
Xu Han@AutoMQ 524d72e6a8
feat(tabletopic): default include hive catalog dependencies (#2638)
Signed-off-by: Robin Han <hanxvdovehx@gmail.com>
2025-06-25 16:02:57 +08:00
lyx 2ce079741c
feat(README): add customer wall logos (#2634)
* feat(README): add customer wall logos

* fix(README): remove extra logo

* fix(README): Adjust the expression

* feat(README): adjust logo width
2025-06-20 22:13:01 +08:00
woshigaopp bddc2fbcea
fix(e2e): fix stream update test (#2633) 2025-06-19 19:32:35 +08:00
Shichao Nie 162931f23a
feat(auto_balancer): only use pending append/fetch latency to identif… (#2631)
feat(auto_balancer): only use pending append/fetch latency to identify slow broker

Signed-off-by: Shichao Nie <niesc@automq.com>
2025-06-18 11:47:12 +08:00
woshigaopp abbe61d1e1
fix(e2e): add retry for flaky tests (#2625)
* fix(e2e): fix flaky tests

* add deflake
2025-06-18 10:49:40 +08:00
Gezi-lzq 5b7c13f96f
fix(docker): update AutoMQ image version to 1.6.0 in multiple docker-compose files (#2629) 2025-06-17 15:49:43 +08:00
Gezi-lzq 00132c7cb7
fix(docker): update MinIO command from config host to alias set (#2627)
* fix(docker): update MinIO command from config host to alias set

* fix(docker): update MinIO and mc images to specific release versions
2025-06-17 11:57:29 +08:00
Gezi-lzq fab34ed0f8
fix(metadata): ensure correct GroupCoordinator updates for topic deletions (#2626) 2025-06-17 09:45:18 +08:00
Xiaorui Wang d18ad89a1d
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-06-13 13:18:15 +08:00
allen 6df82cd1c2
docs: update README.md (#2623)
fix: update README.md
2025-06-13 11:24:44 +08:00
Xiaorui Wang 0cbb03f0b5
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-06-12 20:30:55 +08:00
Xiaorui Wang 696a353080
Update README.md
Signed-off-by: Xiaorui Wang <vintagewang@apache.org>
2025-06-12 19:49:45 +08:00
Emmanuel Ferdman d889507bb8
fix: clean up duplicated parameters (#2621)
* fix: clean up duplicated parameters

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>

* fix: clean up duplicated parameters

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>

---------

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-06-10 17:01:05 +08:00
KamiWan b674ea6e14
chore: Update README.md (#2622)
Update README.md

Now Kubernetes is supported

Signed-off-by: KamiWan <kaiming.wan@automq.com>
2025-06-10 15:13:36 +08:00
Yu Ning c16a8bff95
fix(quota): update broker quota configs on "--broker-defaults" (#2617)
Signed-off-by: Ning Yu <ningyu@automq.com>
2025-06-09 18:04:11 +08:00
woshigaopp 1b173b2df9
fix(e2e): fix e2e test (#2614)
* fix(e2e): fix e2e test performance and log compaction

* fix(e2e): fix e2e test reassign and round_trip

* fix(e2e): fix e2e test GroupModeTransactionsTest

* fix(e2e): fix e2e test reassignTest

* fix(e2e): fix e2e test kafka start failed because not support file wal

* fix(e2e): fix e2e test kafka start failed because not support file wal

* fix(e2e): fix e2e test kafka start failed because not support file wal

* fix(e2e): fix e2e test kafka start failed because wait logic

* fix(e2e): fix e2e test kafka start failed because wait too short

* fix(e2e): format code

* fix(e2e): format code

* fix(e2e): format code
2025-06-06 16:04:21 +08:00
466 changed files with 36679 additions and 10990 deletions

2
.github/CODEOWNERS vendored
View File

@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
* @superhx @SCNieh @Chillax-0v0 @Gezi-lzq
* @superhx @Gezi-lzq @1sonofqiu @woshigaopp

View File

@ -0,0 +1,70 @@
name: AutoMQ Kafka Docker Release
on:
workflow_dispatch:
inputs:
tag:
description: 'AutoMQ Version Tag'
required: false
type: string
workflow_run:
workflows: ["GitHub Release"]
types:
- completed
env:
KAFKA_VERSION: "3.9.0"
jobs:
automq-kafka-release:
name: AutoMQ Kafka Docker Image Release
strategy:
matrix:
platform: [ "ubuntu-24.04" ]
jdk: [ "17" ]
runs-on: ${{ matrix.platform }}
permissions:
contents: write
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Get release tag
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then
TAG="${{ github.event.inputs.tag }}"
# use the latest tag if not specified
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAG=$(git ls-remote --tags https://github.com/AutoMQ/automq.git | grep -v '\^{}' | tail -1 | sed 's/.*refs\/tags\///')
else
TAG="${{ github.event.workflow_run.head_branch }}"
fi
AUTOMQ_URL="https://github.com/AutoMQ/automq/releases/download/${TAG}/automq-${TAG}_kafka-${KAFKA_VERSION}.tgz"
{
echo "AUTOMQ_VERSION=${TAG}-kafka"
echo "AUTOMQ_URL=${AUTOMQ_URL}"
} >> $GITHUB_ENV
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_READ_WRITE_TOKEN }}
- name: Build and Push Docker Image
run: |
python3 -m venv .venv
source .venv/bin/activate
.venv/bin/pip install setuptools
cd docker
python3 docker_release.py \
${{ secrets.DOCKERHUB_USERNAME }}/automq:${AUTOMQ_VERSION} \
--kafka-url ${AUTOMQ_URL}

View File

@ -0,0 +1,84 @@
name: Docker Strimzi Release
on:
workflow_dispatch:
inputs:
tag:
description: 'AutoMQ Version Tag'
required: false
type: string
workflow_run:
workflows: ["GitHub Release"]
types:
- completed
env:
KAFKA_VERSION: "3.9.0"
STRIMZI_REPO: "https://github.com/AutoMQ/strimzi-kafka-operator.git"
STRIMZI_BRANCH: "main"
jobs:
strimzi-release:
name: Strimzi Image Release
if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
strategy:
matrix:
platform: [ "ubuntu-24.04" ]
jdk: ["17"]
runs-on: ${{ matrix.platform }}
permissions:
contents: write
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get release tag
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then
TAG="${{ github.event.inputs.tag }}"
# use the latest tag if not specified
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAG=$(git ls-remote --tags https://github.com/AutoMQ/automq.git | grep -v '\^{}' | tail -1 | sed 's/.*refs\/tags\///')
else
TAG="${{ github.event.workflow_run.head_branch }}"
fi
AUTOMQ_URL="https://github.com/AutoMQ/automq/releases/download/${TAG}/automq-${TAG}_kafka-${KAFKA_VERSION}.tgz"
{
echo "AUTOMQ_VERSION=${TAG}"
echo "AUTOMQ_URL=${AUTOMQ_URL}"
} >> $GITHUB_ENV
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: "zulu"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_READ_WRITE_TOKEN }}
- name: Build AutoMQ Strimzi Image
run: |
git clone --depth 1 --branch "${{ env.STRIMZI_BRANCH }}" "${{ env.STRIMZI_REPO }}" strimzi
cd strimzi
chmod +x ./tools/automq/build-automq-image.sh
./tools/automq/build-automq-image.sh \
"${{ env.AUTOMQ_VERSION }}" \
"${{ env.AUTOMQ_URL }}" \
"${{ env.KAFKA_VERSION }}" \
"${{ secrets.DOCKERHUB_USERNAME }}" \
"automq"

View File

@ -57,12 +57,14 @@ jobs:
run: ./tests/docker/run_tests.sh
env:
ESK_TEST_YML: ${{ inputs.test-yaml }}
_DUCKTAPE_OPTIONS: "--deflake 4"
shell: bash
- name: Run E2E tests with path
if: ${{ inputs.test-path != '' }}
run: ./tests/docker/run_tests.sh
env:
TC_PATHS: ${{ inputs.test-path }}
_DUCKTAPE_OPTIONS: "--deflake 4"
shell: bash
- name: Extract results
id: extract-results

View File

@ -1,61 +0,0 @@
name: Nightly Extra E2E tests
on:
workflow_dispatch:
schedule:
- cron: '0 16 * * *'
jobs:
benchmarks_e2e:
name: "Run benchmarks E2E Tests"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "benchmarks"
test-path: "tests/kafkatest/benchmarks"
runner: "e2e"
connect_e2e_1:
name: "Run connect E2E Tests 1"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect1"
test-yaml: "tests/suites/connect_test_suite1.yml"
runner: "e2e"
connect_e2e_2:
name: "Run connect E2E Tests 2"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect2"
test-yaml: "tests/suites/connect_test_suite2.yml"
runner: "e2e"
connect_e2e_3:
name: "Run connect E2E Tests 3"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect3"
test-yaml: "tests/suites/connect_test_suite3.yml"
runner: "e2e"
streams_e2e:
name: "Run streams E2E Tests"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "streams"
test-path: "tests/kafkatest/tests/streams"
runner: "e2e"
e2e_summary:
name: "E2E Tests Summary"
runs-on: "e2e"
if: ${{ always() && github.repository_owner == 'AutoMQ' }}
needs: [ benchmarks_e2e, connect_e2e_1, connect_e2e_2, connect_e2e_3, streams_e2e ]
steps:
- name: Report results
run: python3 tests/report_e2e_results.py
env:
CURRENT_REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEB_HOOK_URL: ${{ secrets.E2E_REPORT_WEB_HOOK_URL }}
DATA_MAP: "{\"benchmarks_e2e\": ${{ toJSON(needs.benchmarks_e2e.outputs) }}, \"connect_e2e_1\": ${{ toJSON(needs.connect_e2e_1.outputs) }}, \"connect_e2e_2\": ${{ toJSON(needs.connect_e2e_2.outputs) }}, \"connect_e2e_3\": ${{ toJSON(needs.connect_e2e_3.outputs) }}, \"streams_e2e\": ${{ toJSON(needs.streams_e2e.outputs) }}}"
REPORT_TITLE_PREFIX: "Extra"

View File

@ -1,8 +1,8 @@
name: Nightly Main E2E tests
name: Nightly E2E tests
on:
workflow_dispatch:
schedule:
- cron: '0 16 * * *'
- cron: '0 16 1,7,14,21,28 * *'
jobs:
main_e2e_1:
@ -45,11 +45,51 @@ jobs:
suite-id: "main5"
test-path: "tests/kafkatest/automq"
runner: "e2e"
benchmarks_e2e:
name: "Run benchmarks E2E Tests"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "benchmarks"
test-path: "tests/kafkatest/benchmarks"
runner: "e2e"
connect_e2e_1:
name: "Run connect E2E Tests 1"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect1"
test-yaml: "tests/suites/connect_test_suite1.yml"
runner: "e2e"
connect_e2e_2:
name: "Run connect E2E Tests 2"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect2"
test-yaml: "tests/suites/connect_test_suite2.yml"
runner: "e2e"
connect_e2e_3:
name: "Run connect E2E Tests 3"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "connect3"
test-yaml: "tests/suites/connect_test_suite3.yml"
runner: "e2e"
streams_e2e:
name: "Run streams E2E Tests"
uses: ./.github/workflows/e2e-run.yml
if: ${{ github.repository_owner == 'AutoMQ' }}
with:
suite-id: "streams"
test-path: "tests/kafkatest/tests/streams"
runner: "e2e"
e2e_summary:
runs-on: "e2e"
name: "E2E Tests Summary"
if: ${{ always() && github.repository_owner == 'AutoMQ' }}
needs: [ main_e2e_1, main_e2e_2, main_e2e_3, main_e2e_4, main_e2e_5 ]
needs: [ main_e2e_1, main_e2e_2, main_e2e_3, main_e2e_4, main_e2e_5, benchmarks_e2e, connect_e2e_1, connect_e2e_2, connect_e2e_3, streams_e2e ]
steps:
- name: Report results
run: python3 tests/report_e2e_results.py
@ -57,5 +97,5 @@ jobs:
CURRENT_REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEB_HOOK_URL: ${{ secrets.E2E_REPORT_WEB_HOOK_URL }}
DATA_MAP: "{\"main_e2e_1\": ${{ toJSON(needs.main_e2e_1.outputs) }}, \"main_e2e_2\": ${{ toJSON(needs.main_e2e_2.outputs) }}, \"main_e2e_3\": ${{ toJSON(needs.main_e2e_3.outputs) }}, \"main_e2e_4\": ${{ toJSON(needs.main_e2e_4.outputs) }}, \"main_e2e_5\": ${{ toJSON(needs.main_e2e_5.outputs) }}}"
DATA_MAP: "{\"main_e2e_1\": ${{ toJSON(needs.main_e2e_1.outputs) }}, \"main_e2e_2\": ${{ toJSON(needs.main_e2e_2.outputs) }}, \"main_e2e_3\": ${{ toJSON(needs.main_e2e_3.outputs) }}, \"main_e2e_4\": ${{ toJSON(needs.main_e2e_4.outputs) }}, \"main_e2e_5\": ${{ toJSON(needs.main_e2e_5.outputs) }}, \"benchmarks_e2e\": ${{ toJSON(needs.benchmarks_e2e.outputs) }}, \"connect_e2e_1\": ${{ toJSON(needs.connect_e2e_1.outputs) }}, \"connect_e2e_2\": ${{ toJSON(needs.connect_e2e_2.outputs) }}, \"connect_e2e_3\": ${{ toJSON(needs.connect_e2e_3.outputs) }}, \"streams_e2e\": ${{ toJSON(needs.streams_e2e.outputs) }}}"
REPORT_TITLE_PREFIX: "Main"

View File

@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Publish Maven Package
on:
workflow_dispatch:
inputs:
version:
description: 'Version to publish'
required: true
push:
tags:
- '[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+-rc[0-9]+'
env:
VERSION: ${{ github.event.inputs.version || github.ref_name }}
jobs:
publish:
name: "Publish to Github Packages"
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-22.04 ]
jdk: [ 17 ]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Gradle wrapper validation
uses: gradle/actions/wrapper-validation@v3
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: "zulu"
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v4
with:
gradle-version: '8.10'
- name: Publish
run: |
gradle publish -PmavenUrl='https://maven.pkg.github.com/AutoMQ/automq' \
-PmavenUsername=${{ env.GITHUB_ACTOR }} -PmavenPassword=${{ secrets.GITHUB_TOKEN }} \
-PskipSigning=true \
-Pgroup=com.automq.automq -Pversion=${{ env.VERSION }}

View File

@ -9,6 +9,14 @@ or [Slack](https://join.slack.com/t/automq/shared_invite/zt-29h17vye9-thf31ebIVL
Before getting started, please review AutoMQ's Code of Conduct. Everyone interacting in Slack or WeChat
follow [Code of Conduct](CODE_OF_CONDUCT.md).
## Suggested Onboarding Path for New Contributors
If you are new to AutoMQ, it is recommended to first deploy and run AutoMQ using Docker as described in the README.
This helps you quickly understand AutoMQs core concepts and behavior without local environment complexity.
After gaining familiarity, contributors who want to work on code can follow the steps in this guide to build and run AutoMQ locally.
## Code Contributions
### Finding or Reporting Issues

121
README.md
View File

@ -1,50 +1,105 @@
# AutoMQ: A stateless Kafka® on S3, offering 10x cost savings and scaling in seconds.
# A Diskless Kafka® on S3, Offering 10x Cost Savings and Scaling in Seconds.
<div align="center">
<p align="center">
📑&nbsp <a
href="https://docs.automq.com/docs/automq-opensource/HSiEwHVfdiO7rWk34vKcVvcvn2Z?utm_source=github"
href="https://www.automq.com/docs/automq/what-is-automq/overview?utm_source=github_automq"
target="_blank"
><b>Documentation</b></a>&nbsp&nbsp&nbsp
🔥&nbsp <a
href="https://www.automq.com/docs/automq-cloud/getting-started/install-byoc-environment/aws/install-env-from-marketplace"
href="https://www.automq.com/docs/automq-cloud/getting-started/install-byoc-environment/aws/install-env-from-marketplace?utm_source=github_automq"
target="_blank"
><b>Free trial of AutoMQ on AWS</b></a>&nbsp&nbsp&nbsp
</p>
[![Linkedin Badge](https://img.shields.io/badge/-LinkedIn-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/company/automq)](https://www.linkedin.com/company/automq)
[![Twitter URL](https://img.shields.io/twitter/follow/AutoMQ)](https://twitter.com/intent/follow?screen_name=AutoMQ_Lab)
[![](https://badgen.net/badge/Slack/Join%20AutoMQ/0abd59?icon=slack)](https://join.slack.com/t/automq/shared_invite/zt-29h17vye9-thf31ebIVL9oXuRdACnOIA)
[![](https://img.shields.io/badge/AutoMQ%20vs.%20Kafka(Cost)-yellow)](https://www.automq.com/blog/automq-vs-apache-kafka-a-real-aws-cloud-bill-comparison)
[![](https://img.shields.io/badge/AutoMQ%20vs.%20Kafka(Performance)-orange)](https://docs.automq.com/docs/automq-opensource/IJLQwnVROiS5cUkXfF0cuHnWnNd)
[![](https://badgen.net/badge/Slack/Join%20AutoMQ/0abd59?icon=slack)](https://go.automq.com/slack)
[![](https://img.shields.io/badge/AutoMQ%20vs.%20Kafka(Cost)-yellow)](https://www.automq.com/blog/automq-vs-apache-kafka-a-real-aws-cloud-bill-comparison?utm_source=github_automq)
[![](https://img.shields.io/badge/AutoMQ%20vs.%20Kafka(Performance)-orange)](https://www.automq.com/docs/automq/benchmarks/automq-vs-apache-kafka-benchmarks-and-cost?utm_source=github_automq)
[![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20AutoMQ%20Guru-006BFF)](https://gurubase.io/g/automq)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-AutoMQ%2Fautomq-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/AutoMQ/automq)
<a href="https://trendshift.io/repositories/9782" target="_blank"><img src="https://trendshift.io/api/badge/repositories/9782" alt="AutoMQ%2Fautomq | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</div>
## 👥 Big Companies Worldwide are Using AutoMQ
> Here are some of our major customers worldwide using AutoMQ.
<div align="center">
<img width="97%" alt="automq-solgan" src="https://github.com/user-attachments/assets/bdf6c5f5-7fe1-4004-8e15-54f1aa6bc32f" />
<a href="https://www.youtube.com/watch?v=IB8sh639Rsg" target="_blank">
<img alt="Grab" src="https://github.com/user-attachments/assets/01668da4-3916-4f49-97af-18f91b25f8c1" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="Avia" src="https://github.com/user-attachments/assets/d2845e1c-caf4-444a-93f0-97b13c9c8490" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="Tencent" src="https://github.com/user-attachments/assets/2bdd205f-38c1-4110-9af1-d4c782db3395" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="Honda" src="https://github.com/user-attachments/assets/ee65af29-8ee3-404b-bf81-a004fe0c327c" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="Trip" src="https://github.com/user-attachments/assets/0cb4ae63-6dc1-43dc-9416-625a08dca2e5" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="LG" src="https://github.com/user-attachments/assets/ed9e0f87-abc6-4552-977c-f342ecb105a0" width="19%" />
</a>
<a href="https://www.automq.com/blog/jdcom-automq-cubefs-trillion-scale-kafka-messaging" target="_blank">
<img alt="JD" src="https://github.com/user-attachments/assets/a7a86d2c-66fa-4575-b181-6cf56a31f880" width="19%" />
</a>
<a href="https://www.automq.com/blog/automq-help-geely-auto-solve-the-pain-points-of-kafka-elasticity-in-the-v2x-scenario" target="_blank">
<img alt="Geely" src="https://github.com/user-attachments/assets/d61f7c51-0d80-4290-a428-a941441c7ec9" width="19%" />
</a>
<a href="https://www.automq.com/blog/dewu-builds-trillion-level-monitoring-system-based-on-automq" target="_blank">
<img alt="Poizon" src="https://github.com/user-attachments/assets/45f4c642-0495-4bcc-9224-d2c5c2b2f0d5" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="Bitkub" src="https://github.com/user-attachments/assets/3b95cd26-973d-4405-9d2c-289c5807bb39" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="PalmPay" src="https://github.com/user-attachments/assets/b22f70f5-7553-4283-ac20-f034868b0121" width="19%" />
</a>
<a href="https://www.automq.com/blog/automq-vs-kafka-evaluation-and-comparison-by-little-red-book" target="_blank">
<img alt="RedNote" src="https://github.com/user-attachments/assets/4a62f1f3-e171-4d58-9d7e-ebabad6f8e23" width="19%" />
</a>
<a href="https://www.automq.com/blog/xpeng-motors-reduces-costs-by-50-by-replacing-kafka-with-automq" target="_blank">
<img alt="XPENG" src="https://github.com/user-attachments/assets/8b32c484-a4bf-4793-80d0-f454da254337" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="OPPO" src="https://github.com/user-attachments/assets/2b6d3cf0-ae54-4073-bc06-c6623e31c6d0" width="19%" />
</a>
<a href="https://www.automq.com/customer" target="_blank">
<img alt="BambuLab" src="https://github.com/user-attachments/assets/d09ded1b-3696-49ac-b38f-d02f9598b3bb" width="19%" />
</a>
</div>
<img width="1151" alt="image" src="https://github.com/user-attachments/assets/a2668e5e-eebf-479a-b85a-9611de1b60c8" />
- [Grab: Driving Efficiency with AutoMQ in DataStreaming Platform](https://www.youtube.com/watch?v=IB8sh639Rsg)
- [JD.com x AutoMQ x CubeFS: A Cost-Effective Journey](https://www.automq.com/blog/jdcom-automq-cubefs-trillion-scale-kafka-messaging)
- [Palmpay Uses AutoMQ to Replace Kafka, Optimizing Costs by 50%+](https://www.automq.com/blog/palmpay-uses-automq-to-replace-kafka)
- [AutoMQ help Geely Auto(Fortune Global 500) solve the pain points of Kafka elasticity in the V2X scenario](https://www.automq.com/blog/automq-help-geely-auto-solve-the-pain-points-of-kafka-elasticity-in-the-v2x-scenario)
- [How Asias Quora Zhihu uses AutoMQ to reduce Kafka cost and maintenance complexity](https://www.automq.com/blog/how-asias-quora-zhihu-use-automq-to-reduce-kafka-cost-and-maintenance-complexity)
- [XPENG Motors Reduces Costs by 50%+ by Replacing Kafka with AutoMQ](https://www.automq.com/blog/xpeng-motors-reduces-costs-by-50-by-replacing-kafka-with-automq)
- [Asia's GOAT, Poizon uses AutoMQ Kafka to build observability platform for massive data(30 GB/s)](https://www.automq.com/blog/asiax27s-goat-poizon-uses-automq-kafka-to-build-a-new-generation-observability-platform-for-massive-data)
- [AutoMQ Helps CaoCao Mobility Address Kafka Scalability During Holidays](https://www.automq.com/blog/automq-helps-caocao-mobility-address-kafka-scalability-issues-during-mid-autumn-and-national-day)
- [JD.com x AutoMQ x CubeFS: A Cost-Effective Journey](https://www.automq.com/blog/jdcom-automq-cubefs-trillion-scale-kafka-messaging?utm_source=github_automq)
- [Palmpay Uses AutoMQ to Replace Kafka, Optimizing Costs by 50%+](https://www.automq.com/blog/palmpay-uses-automq-to-replace-kafka?utm_source=github_automq)
- [AutoMQ help Geely Auto(Fortune Global 500) solve the pain points of Kafka elasticity in the V2X scenario](https://www.automq.com/blog/automq-help-geely-auto-solve-the-pain-points-of-kafka-elasticity-in-the-v2x-scenario?utm_source=github_automq)
- [How Asias Quora Zhihu uses AutoMQ to reduce Kafka cost and maintenance complexity](https://www.automq.com/blog/how-asias-quora-zhihu-use-automq-to-reduce-kafka-cost-and-maintenance-complexity?utm_source=github_automq)
- [XPENG Motors Reduces Costs by 50%+ by Replacing Kafka with AutoMQ](https://www.automq.com/blog/xpeng-motors-reduces-costs-by-50-by-replacing-kafka-with-automq?utm_source=github_automq)
- [Asia's GOAT, Poizon uses AutoMQ Kafka to build observability platform for massive data(30 GB/s)](https://www.automq.com/blog/asiax27s-goat-poizon-uses-automq-kafka-to-build-a-new-generation-observability-platform-for-massive-data?utm_source=github_automq)
- [AutoMQ Helps CaoCao Mobility Address Kafka Scalability During Holidays](https://www.automq.com/blog/automq-helps-caocao-mobility-address-kafka-scalability-issues-during-mid-autumn-and-national-day?utm_source=github_automq)
### Prerequisites
Before running AutoMQ locally, please ensure:
- Docker version 20.x or later
- Docker Compose v2
- At least 4 GB RAM allocated to Docker
- Ports 9092 and 9000 are available on your system
## ⛄ Get started with AutoMQ
> [!Tip]
> Deploying a production-ready AutoMQ cluster is challenging. This Quick Start is only for evaluating AutoMQ features and is not suitable for production use. For production deployment best practices, please [contact](https://www.automq.com/contact) our community for support.
The `docker/docker-compose.yaml` file provides a simple single-node setup for quick evaluation and development:
```shell
docker compose -f docker/docker-compose.yaml up -d
curl -O https://raw.githubusercontent.com/AutoMQ/automq/refs/tags/1.5.5/docker/docker-compose.yaml && docker compose -f docker-compose.yaml up -d
```
This setup features a single AutoMQ node serving as both controller and broker, alongside MinIO for S3 storage. All services operate within a Docker bridge network called `automq_net`, allowing you to start a Kafka producer in this network to test AutoMQ:
```shell
@ -54,19 +109,19 @@ docker run --network automq_net automqinc/automq:latest /bin/bash -c \
```
After testing, you can destroy the setup with:
```shell
docker compose -f docker/docker-compose.yaml down
docker compose -f docker-compose.yaml down
```
The `docker/docker-compose-cluster.yaml` file offers a more complex setup with three AutoMQ nodes, ideal for testing AutoMQ's cluster features, and can be run in the same way.
There are more deployment options available:
- [Deploy on Linux with 5 Nodes](https://www.automq.com/docs/automq/getting-started/cluster-deployment-on-linux)
- [Deploy on Kubernetes (Enterprise only now, open source soon)](https://www.automq.com/docs/automq/getting-started/cluster-deployment-on-kubernetes)
- [Run on Ceph / MinIO / CubeFS / HDFS](https://www.automq.com/docs/automq/deployment/overview)
- [Try AutoMQ on AWS Marketplace (Two Weeks Free Trial)](https://docs.automq.com/automq-cloud/getting-started/install-byoc-environment/aws/install-env-from-marketplace)
- [Deploy Multi-Nodes Test Cluster on Docker](https://www.automq.com/docs/automq/getting-started/deploy-multi-nodes-test-cluster-on-docker?utm_source=github_automq)
- [Deploy on Linux with 5 Nodes](https://www.automq.com/docs/automq/deployment/deploy-multi-nodes-cluster-on-linux?utm_source=github_automq)
- [Deploy on Kubernetes](https://www.automq.com/docs/automq/deployment/deploy-multi-nodes-cluster-on-kubernetes?utm_source=github_automq)
- [Try AutoMQ on AWS Marketplace (Two Weeks Free Trial)](https://docs.automq.com/automq-cloud/getting-started/install-byoc-environment/aws/install-env-from-marketplace?utm_source=github_automq)
- [Try AutoMQ on Alibaba Cloud Marketplace (Two Weeks Free Trial)](https://market.aliyun.com/products/55530001/cmgj00065841.html)
## 🗞️ Newest Feature - Table Topic
Table Topic is a new feature in AutoMQ that combines stream and table functionalities to unify streaming and data analysis. Currently, it supports Apache Iceberg and integrates with catalog services such as AWS Glue, HMS, and the Rest catalog. Additionally, it natively supports S3 tables, a new AWS product announced at the 2024 re:Invent. [Learn more](https://www.automq.com/blog/automq-table-topic-seamless-integration-with-s3-tables-and-iceberg).
Table Topic is a new feature in AutoMQ that combines stream and table functionalities to unify streaming and data analysis. Currently, it supports Apache Iceberg and integrates with catalog services such as AWS Glue, HMS, and the Rest catalog. Additionally, it natively supports S3 tables, a new AWS product announced at the 2024 re:Invent. [Learn more](https://www.automq.com/blog/automq-table-topic-seamless-integration-with-s3-tables-and-iceberg?utm_source=github_automq).
![image](https://github.com/user-attachments/assets/6b2a514a-cc3e-442e-84f6-d953206865e0)
@ -74,7 +129,7 @@ Table Topic is a new feature in AutoMQ that combines stream and table functional
AutoMQ is a stateless Kafka alternative that runs on S3 or any S3-compatible storage, such as MinIO. It is designed to address two major issues of Apache Kafka. First, Kafka clusters are difficult to scale out or in due to the stateful nature of its brokers. Data movement is required, and even reassigning partitions between brokers is a complex process. Second, hosting Kafka in the cloud can be prohibitively expensive. You face high costs for EBS storage, cross-AZ traffic, and significant over-provisioning due to Kafka's limited scalability.
Here are some key highlights of AutoMQ that make it an ideal choice to replace your Apache Kafka cluster, whether in the cloud or on-premise, as long as you have S3-compatible storage:
- **Cost effective**: The first true cloud-native streaming storage system, designed for optimal cost and efficiency on the cloud. Refer to [this report](https://www.automq.com/docs/automq/benchmarks/cost-effective-automq-vs-apache-kafka) to see how we cut Apache Kafka billing by 90% on the cloud.
- **Cost effective**: The first true cloud-native streaming storage system, designed for optimal cost and efficiency on the cloud. Refer to [this report](https://www.automq.com/docs/automq/benchmarks/cost-effective-automq-vs-apache-kafka?utm_source=github_automq) to see how we cut Apache Kafka billing by 90% on the cloud.
- **High Reliability**: Leverage object storage service to achieve zero RPO, RTO in seconds and 99.999999999% durability.
- **Zero Cross-AZ Traffic**: By using cloud object storage as the priority storage solution, AutoMQ eliminates cross-AZ traffic costs on AWS and GCP. In traditional Kafka setups, over 80% of costs arise from cross-AZ traffic, including producer, consumer, and replication sides.
- **Serverless**:
@ -83,9 +138,9 @@ Here are some key highlights of AutoMQ that make it an ideal choice to replace y
- Infinite scalable: Utilize cloud object storage as the primary storage solution, eliminating concerns about storage capacity.
- **Manage-less**: The built-in auto-balancer component automatically schedules partitions and network traffic between brokers, eliminating manual partition reassignment.
- **High performance**:
- High throughput: Leverage pre-fetching, batch processing, and parallel technologies to maximize the capabilities of cloud object storage. Refer to the [AutoMQ Performance White Paper](https://www.automq.com/docs/automq/benchmarks/benchmark-automq-vs-apache-kafka) to see how we achieve this.
- Low Latency: AutoMQ defaults to running on S3 directly, resulting in hundreds of milliseconds of latency. The enterprise version offers single-digit millisecond latency. [Contact us](https://www.automq.com/contact) for more details.
- **Built-in Metrics Export**: Natively export Prometheus and OpenTelemetry metrics, supporting both push and pull. Ditch inefficient JMX and monitor your cluster with modern tools. Refer to [full metrics list](https://www.automq.com/docs/automq/observability/metrics) provided by AutoMQ.
- High throughput: Leverage pre-fetching, batch processing, and parallel technologies to maximize the capabilities of cloud object storage. Refer to the [AutoMQ Performance White Paper](https://www.automq.com/docs/automq/benchmarks/automq-vs-apache-kafka-benchmarks-and-cost?utm_source=github_automq) to see how we achieve this.
- Low Latency: AutoMQ defaults to running on S3 directly, resulting in hundreds of milliseconds of latency. The enterprise version offers single-digit millisecond latency. [Contact us](https://www.automq.com/contact?utm_source=github_automq) for more details.
- **Built-in Metrics Export**: Natively export Prometheus and OpenTelemetry metrics, supporting both push and pull. Ditch inefficient JMX and monitor your cluster with modern tools. Refer to [full metrics list](https://www.automq.com/docs/automq/observability/metrics?utm_source=github_automq) provided by AutoMQ.
- **100% Kafka Compatible**: Fully compatible with Apache Kafka, offering all features with greater cost-effectiveness and operational efficiency.
## ✨Architecture
@ -99,7 +154,7 @@ Regarding the architecture of AutoMQ, it is fundamentally different from Kafka.
- Auto Balancer: a component that automatically balances traffic and partitions between brokers, eliminating the need for manual reassignment. Unlike Kafka, this built-in feature removes the need for cruise control.
- Rack-aware Router: Kafka has long faced cross-AZ traffic fees on AWS and GCP. Our shared storage architecture addresses this by using a rack-aware router to provide clients in different AZs with specific partition metadata, avoiding cross-AZ fees while exchanging data through object storage.
For more on AutoMQ's architecture, visit [AutoMQ Architecture](https://docs.automq.com/automq/architecture/overview) or explore the source code directly.
For more on AutoMQ's architecture, visit [AutoMQ Architecture](https://www.automq.com/docs/automq/architecture/overview?utm_source=github_automq) or explore the source code directly.
## 🌟 Stay Ahead
Star AutoMQ on GitHub for instant updates on new releases.
@ -108,7 +163,7 @@ Star AutoMQ on GitHub for instant updates on new releases.
## 💬 Community
You can join the following groups or channels to discuss or ask questions about AutoMQ:
- Ask questions or report a bug by [GitHub Issues](https://github.com/AutoMQ/automq/issues)
- Discuss about AutoMQ or Kafka by [Slack](https://join.slack.com/t/automq/shared_invite/zt-29h17vye9-thf31ebIVL9oXuRdACnOIA) or [Wechat Group](docs/images/automq-wechat.png)
- Discuss about AutoMQ or Kafka by [Slack](https://go.automq.com/slack) or [Wechat Group](docs/images/automq-wechat.png)
## 👥 How to contribute
@ -117,9 +172,9 @@ To contribute to AutoMQ please see [Code of Conduct](CODE_OF_CONDUCT.md) and [Co
We have a list of [good first issues](https://github.com/AutoMQ/automq/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) that help you to get started, gain experience, and get familiar with our contribution process.
## 👍 AutoMQ Enterprise Edition
The enterprise edition of AutoMQ offers a robust, user-friendly control plane for seamless cluster management, with enhanced availability and observability over the open-source version. Additionally, we offer [Kafka Linking](https://www.automq.com/solutions/kafka-linking) for zero-downtime migration from any Kafka-compatible cluster to AutoMQ.
The enterprise edition of AutoMQ offers a robust, user-friendly control plane for seamless cluster management, with enhanced availability and observability over the open-source version. Additionally, we offer [Kafka Linking](https://www.automq.com/solutions/kafka-linking?utm_source=github_automq) for zero-downtime migration from any Kafka-compatible cluster to AutoMQ.
[Contact us](https://www.automq.com/contact) for more information about the AutoMQ enterprise edition, and we'll gladly assist with your free trial.
[Contact us](https://www.automq.com/contact?utm_source=github_automq) for more information about the AutoMQ enterprise edition, and we'll gladly assist with your free trial.
## 📜 License
AutoMQ is under the Apache 2.0 license. See the [LICENSE](https://github.com/AutoMQ/automq/blob/main/LICENSE) file for details.

View File

@ -0,0 +1,125 @@
# AutoMQ Log Uploader Module
This module provides asynchronous S3 log upload capability based on Log4j 1.x. Other submodules only need to depend on this module and configure it simply to synchronize logs to object storage. Core components:
- `com.automq.log.S3RollingFileAppender`: Extends `RollingFileAppender`, pushes log events to the uploader while writing to local files.
- `com.automq.log.uploader.LogUploader`: Asynchronously buffers, compresses, and uploads logs; supports configuration switches and periodic cleanup.
- `com.automq.log.uploader.S3LogConfig`: Interface that abstracts the configuration required for uploading. Implementations must provide cluster ID, node ID, object storage instance, and leadership status.
## Quick Integration
1. Add dependency in your module's `build.gradle`:
```groovy
implementation project(':automq-log-uploader')
```
2. Implement or provide an `S3LogConfig` instance and configure the appender:
```java
// Set up the S3LogConfig through your application
S3LogConfig config = // your S3LogConfig implementation
S3RollingFileAppender.setup(config);
```
3. Reference the Appender in `log4j.properties`:
```properties
log4j.appender.s3_uploader=com.automq.log.S3RollingFileAppender
log4j.appender.s3_uploader.File=logs/server.log
log4j.appender.s3_uploader.MaxFileSize=100MB
log4j.appender.s3_uploader.MaxBackupIndex=10
log4j.appender.s3_uploader.layout=org.apache.log4j.PatternLayout
log4j.appender.s3_uploader.layout.ConversionPattern=[%d] %p %m (%c)%n
```
## S3LogConfig Interface
The `S3LogConfig` interface provides the configuration needed for log uploading:
```java
public interface S3LogConfig {
boolean isEnabled(); // Whether S3 upload is enabled
String clusterId(); // Cluster identifier
int nodeId(); // Node identifier
ObjectStorage objectStorage(); // S3 object storage instance
boolean isLeader(); // Whether this node should upload logs
}
```
The upload schedule can be overridden by environment variables:
- `AUTOMQ_OBSERVABILITY_UPLOAD_INTERVAL`: Maximum upload interval (milliseconds).
- `AUTOMQ_OBSERVABILITY_CLEANUP_INTERVAL`: Retention period (milliseconds), old objects earlier than this time will be cleaned up.
## Implementation Notes
### Leader Selection
The log uploader relies on the `S3LogConfig.isLeader()` method to determine whether the current node should upload logs and perform cleanup tasks. This avoids multiple nodes in a cluster simultaneously executing these operations.
### Object Storage Path
Logs are uploaded to object storage following this path pattern:
```
automq/logs/{clusterId}/{nodeId}/{hour}/{uuid}
```
Where:
- `clusterId` and `nodeId` come from the S3LogConfig
- `hour` is the timestamp hour for log organization
- `uuid` is a unique identifier for each log batch
## Usage Example
Complete example of using the log uploader:
```java
import com.automq.log.S3RollingFileAppender;
import com.automq.log.uploader.S3LogConfig;
import com.automq.stream.s3.operator.ObjectStorage;
// Implement S3LogConfig
public class MyS3LogConfig implements S3LogConfig {
@Override
public boolean isEnabled() {
return true; // Enable S3 upload
}
@Override
public String clusterId() {
return "my-cluster";
}
@Override
public int nodeId() {
return 1;
}
@Override
public ObjectStorage objectStorage() {
// Return your ObjectStorage instance
return myObjectStorage;
}
@Override
public boolean isLeader() {
// Return true if this node should upload logs
return isCurrentNodeLeader();
}
}
// Setup and use
S3LogConfig config = new MyS3LogConfig();
S3RollingFileAppender.setup(config);
// Configure Log4j to use the appender
// The appender will now automatically upload logs to S3
```
## Lifecycle Management
Remember to properly shutdown the log uploader when your application terminates:
```java
// During application shutdown
S3RollingFileAppender.shutdown();
```

View File

@ -0,0 +1,105 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.log;
import com.automq.log.uploader.LogRecorder;
import com.automq.log.uploader.LogUploader;
import com.automq.log.uploader.S3LogConfig;
import org.apache.log4j.RollingFileAppender;
import org.apache.log4j.spi.LoggingEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class S3RollingFileAppender extends RollingFileAppender {
private static final Logger LOGGER = LoggerFactory.getLogger(S3RollingFileAppender.class);
private static final Object INIT_LOCK = new Object();
private static volatile LogUploader logUploaderInstance;
private static volatile S3LogConfig s3LogConfig;
public S3RollingFileAppender() {
super();
}
public static void setup(S3LogConfig config) {
s3LogConfig = config;
synchronized (INIT_LOCK) {
if (logUploaderInstance != null) {
return;
}
try {
if (s3LogConfig == null) {
LOGGER.error("No s3LogConfig available; S3 log upload remains disabled.");
throw new RuntimeException("S3 log configuration is missing.");
}
if (!s3LogConfig.isEnabled() || s3LogConfig.objectStorage() == null) {
LOGGER.warn("S3 log upload is disabled by configuration.");
return;
}
LogUploader uploader = new LogUploader();
uploader.start(s3LogConfig);
logUploaderInstance = uploader;
LOGGER.info("S3RollingFileAppender initialized successfully using s3LogConfig {}.", s3LogConfig.getClass().getName());
} catch (Exception e) {
LOGGER.error("Failed to initialize S3RollingFileAppender", e);
throw e;
}
}
}
public static void shutdown() {
if (logUploaderInstance != null) {
synchronized (INIT_LOCK) {
if (logUploaderInstance != null) {
try {
logUploaderInstance.close();
logUploaderInstance = null;
LOGGER.info("S3RollingFileAppender log uploader closed successfully.");
} catch (Exception e) {
LOGGER.error("Failed to close S3RollingFileAppender log uploader", e);
}
}
}
}
}
@Override
protected void subAppend(LoggingEvent event) {
super.subAppend(event);
if (!closed && logUploaderInstance != null) {
LogRecorder.LogEvent logEvent = new LogRecorder.LogEvent(
event.getTimeStamp(),
event.getLevel().toString(),
event.getLoggerName(),
event.getRenderedMessage(),
event.getThrowableStrRep());
try {
logEvent.validate();
logUploaderInstance.append(logEvent);
} catch (IllegalArgumentException e) {
errorHandler.error("Failed to validate and append log event", e, 0);
}
}
}
}

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package com.automq.shell.log;
package com.automq.log.uploader;
import org.apache.commons.lang3.StringUtils;

View File

@ -17,10 +17,9 @@
* limitations under the License.
*/
package com.automq.shell.log;
package com.automq.log.uploader;
import com.automq.shell.AutoMQApplication;
import com.automq.shell.util.Utils;
import com.automq.log.uploader.util.Utils;
import com.automq.stream.s3.operator.ObjectStorage;
import com.automq.stream.s3.operator.ObjectStorage.ObjectInfo;
import com.automq.stream.s3.operator.ObjectStorage.ObjectPath;
@ -55,12 +54,14 @@ public class LogUploader implements LogRecorder {
public static final int DEFAULT_MAX_QUEUE_SIZE = 64 * 1024;
public static final int DEFAULT_BUFFER_SIZE = 16 * 1024 * 1024;
public static final int UPLOAD_INTERVAL = System.getenv("AUTOMQ_OBSERVABILITY_UPLOAD_INTERVAL") != null ? Integer.parseInt(System.getenv("AUTOMQ_OBSERVABILITY_UPLOAD_INTERVAL")) : 60 * 1000;
public static final int CLEANUP_INTERVAL = System.getenv("AUTOMQ_OBSERVABILITY_CLEANUP_INTERVAL") != null ? Integer.parseInt(System.getenv("AUTOMQ_OBSERVABILITY_CLEANUP_INTERVAL")) : 2 * 60 * 1000;
public static final int UPLOAD_INTERVAL = System.getenv("AUTOMQ_OBSERVABILITY_UPLOAD_INTERVAL") != null
? Integer.parseInt(System.getenv("AUTOMQ_OBSERVABILITY_UPLOAD_INTERVAL"))
: 60 * 1000;
public static final int CLEANUP_INTERVAL = System.getenv("AUTOMQ_OBSERVABILITY_CLEANUP_INTERVAL") != null
? Integer.parseInt(System.getenv("AUTOMQ_OBSERVABILITY_CLEANUP_INTERVAL"))
: 2 * 60 * 1000;
public static final int MAX_JITTER_INTERVAL = 60 * 1000;
private static final LogUploader INSTANCE = new LogUploader();
private final BlockingQueue<LogEvent> queue = new LinkedBlockingQueue<>(DEFAULT_MAX_QUEUE_SIZE);
private final ByteBuf uploadBuffer = Unpooled.directBuffer(DEFAULT_BUFFER_SIZE);
private final Random random = new Random();
@ -71,16 +72,42 @@ public class LogUploader implements LogRecorder {
private volatile S3LogConfig config;
private volatile CompletableFuture<Void> startFuture;
private ObjectStorage objectStorage;
private Thread uploadThread;
private Thread cleanupThread;
private LogUploader() {
public LogUploader() {
}
public static LogUploader getInstance() {
return INSTANCE;
public synchronized void start(S3LogConfig config) {
if (this.config != null) {
LOGGER.warn("LogUploader is already started.");
return;
}
this.config = config;
if (!config.isEnabled() || config.objectStorage() == null) {
LOGGER.warn("LogUploader is disabled due to configuration.");
closed = true;
return;
}
try {
this.objectStorage = config.objectStorage();
this.uploadThread = new Thread(new UploadTask());
this.uploadThread.setName("log-uploader-upload-thread");
this.uploadThread.setDaemon(true);
this.uploadThread.start();
this.cleanupThread = new Thread(new CleanupTask());
this.cleanupThread.setName("log-uploader-cleanup-thread");
this.cleanupThread.setDaemon(true);
this.cleanupThread.start();
LOGGER.info("LogUploader started successfully.");
} catch (Exception e) {
LOGGER.error("Failed to start LogUploader", e);
closed = true;
}
}
public void close() throws InterruptedException {
@ -97,63 +124,15 @@ public class LogUploader implements LogRecorder {
@Override
public boolean append(LogEvent event) {
if (!closed && couldUpload()) {
if (!closed) {
return queue.offer(event);
}
return false;
}
private boolean couldUpload() {
initConfiguration();
boolean enabled = config != null && config.isEnabled() && config.objectStorage() != null;
if (enabled) {
initUploadComponent();
}
return enabled && startFuture != null && startFuture.isDone();
}
private void initConfiguration() {
if (config == null) {
synchronized (this) {
if (config == null) {
config = AutoMQApplication.getBean(S3LogConfig.class);
}
}
}
}
private void initUploadComponent() {
if (startFuture == null) {
synchronized (this) {
if (startFuture == null) {
startFuture = CompletableFuture.runAsync(() -> {
try {
objectStorage = config.objectStorage();
uploadThread = new Thread(new UploadTask());
uploadThread.setName("log-uploader-upload-thread");
uploadThread.setDaemon(true);
uploadThread.start();
cleanupThread = new Thread(new CleanupTask());
cleanupThread.setName("log-uploader-cleanup-thread");
cleanupThread.setDaemon(true);
cleanupThread.start();
startFuture.complete(null);
} catch (Exception e) {
LOGGER.error("Initialize log uploader failed", e);
}
}, command -> new Thread(command).start());
}
}
}
}
private class UploadTask implements Runnable {
public String formatTimestampInMillis(long timestamp) {
private String formatTimestampInMillis(long timestamp) {
return ZonedDateTime.ofInstant(Instant.ofEpochMilli(timestamp), ZoneId.systemDefault())
.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS Z"));
}
@ -165,7 +144,6 @@ public class LogUploader implements LogRecorder {
long now = System.currentTimeMillis();
LogEvent event = queue.poll(1, TimeUnit.SECONDS);
if (event != null) {
// DateTime Level [Logger] Message \n stackTrace
StringBuilder logLine = new StringBuilder()
.append(formatTimestampInMillis(event.timestampMillis()))
.append(" ")
@ -204,25 +182,22 @@ public class LogUploader implements LogRecorder {
private void upload(long now) {
if (uploadBuffer.readableBytes() > 0) {
if (couldUpload()) {
try {
while (!Thread.currentThread().isInterrupted()) {
if (objectStorage == null) {
break;
}
try {
String objectKey = getObjectKey();
objectStorage.write(WriteOptions.DEFAULT, objectKey, Utils.compress(uploadBuffer.slice().asReadOnly())).get();
break;
} catch (Exception e) {
e.printStackTrace(System.err);
Thread.sleep(1000);
}
try {
while (!Thread.currentThread().isInterrupted()) {
if (objectStorage == null) {
break;
}
try {
String objectKey = getObjectKey();
objectStorage.write(WriteOptions.DEFAULT, objectKey, Utils.compress(uploadBuffer.slice().asReadOnly())).get();
break;
} catch (Exception e) {
LOGGER.warn("Failed to upload logs, will retry", e);
Thread.sleep(1000);
}
} catch (InterruptedException e) {
//ignore
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
uploadBuffer.clear();
lastUploadTimestamp = now;
@ -237,12 +212,11 @@ public class LogUploader implements LogRecorder {
public void run() {
while (!Thread.currentThread().isInterrupted()) {
try {
if (closed || !config.isActiveController()) {
if (closed || !config.isLeader()) {
Thread.sleep(Duration.ofMinutes(1).toMillis());
continue;
}
long expiredTime = System.currentTimeMillis() - CLEANUP_INTERVAL;
List<ObjectInfo> objects = objectStorage.list(String.format("automq/logs/%s", config.clusterId())).join();
if (!objects.isEmpty()) {
@ -252,7 +226,6 @@ public class LogUploader implements LogRecorder {
.collect(Collectors.toList());
if (!keyList.isEmpty()) {
// Some of s3 implements allow only 1000 keys per request.
CompletableFuture<?>[] deleteFutures = Lists.partition(keyList, 1000)
.stream()
.map(objectStorage::delete)
@ -260,7 +233,6 @@ public class LogUploader implements LogRecorder {
CompletableFuture.allOf(deleteFutures).join();
}
}
Thread.sleep(Duration.ofMinutes(1).toMillis());
} catch (InterruptedException e) {
break;
@ -275,5 +247,4 @@ public class LogUploader implements LogRecorder {
String hour = LocalDateTime.now(ZoneOffset.UTC).format(DateTimeFormatter.ofPattern("yyyyMMddHH"));
return String.format("automq/logs/%s/%s/%s/%s", config.clusterId(), config.nodeId(), hour, UUID.randomUUID());
}
}

View File

@ -17,19 +17,18 @@
* limitations under the License.
*/
package com.automq.shell.log;
package com.automq.log.uploader;
import com.automq.stream.s3.operator.ObjectStorage;
public interface S3LogConfig {
boolean isEnabled();
boolean isActiveController();
String clusterId();
int nodeId();
ObjectStorage objectStorage();
boolean isLeader();
}

View File

@ -0,0 +1,69 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.log.uploader.util;
import com.automq.stream.s3.ByteBufAlloc;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import io.netty.buffer.ByteBuf;
public class Utils {
private Utils() {
}
public static ByteBuf compress(ByteBuf input) throws IOException {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
byte[] buffer = new byte[input.readableBytes()];
input.readBytes(buffer);
gzipOutputStream.write(buffer);
}
ByteBuf compressed = ByteBufAlloc.byteBuffer(byteArrayOutputStream.size());
compressed.writeBytes(byteArrayOutputStream.toByteArray());
return compressed;
}
public static ByteBuf decompress(ByteBuf input) throws IOException {
byte[] compressedData = new byte[input.readableBytes()];
input.readBytes(compressedData);
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(compressedData);
try (GZIPInputStream gzipInputStream = new GZIPInputStream(byteArrayInputStream);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = gzipInputStream.read(buffer)) != -1) {
byteArrayOutputStream.write(buffer, 0, bytesRead);
}
byte[] uncompressedData = byteArrayOutputStream.toByteArray();
ByteBuf output = ByteBufAlloc.byteBuffer(uncompressedData.length);
output.writeBytes(uncompressedData);
return output;
}
}
}

459
automq-metrics/README.md Normal file
View File

@ -0,0 +1,459 @@
# AutoMQ automq-metrics Module
## Module Structure
```
com.automq.opentelemetry/
├── AutoMQTelemetryManager.java # Main management class for initialization and lifecycle
├── TelemetryConstants.java # Constants definition
├── common/
│ ├── OTLPCompressionType.java # OTLP compression types
│ └── OTLPProtocol.java # OTLP protocol types
├── exporter/
│ ├── MetricsExporter.java # Exporter interface
│ ├── MetricsExportConfig.java # Export configuration
│ ├── MetricsExporterProvider.java # Exporter factory provider
│ ├── MetricsExporterType.java # Exporter type enumeration
│ ├── MetricsExporterURI.java # URI parser for exporters
│ ├── OTLPMetricsExporter.java # OTLP exporter implementation
│ ├── PrometheusMetricsExporter.java # Prometheus exporter implementation
│ └── s3/ # S3 metrics exporter implementation
│ ├── CompressionUtils.java # Utility for data compression
│ ├── PrometheusUtils.java # Utilities for Prometheus format
│ ├── S3MetricsExporter.java # S3 metrics exporter implementation
│ └── S3MetricsExporterAdapter.java # Adapter to handle S3 metrics export
└── yammer/
├── DeltaHistogram.java # Delta histogram implementation
├── OTelMetricUtils.java # OpenTelemetry metrics utilities
├── YammerMetricsProcessor.java # Yammer metrics processor
└── YammerMetricsReporter.java # Yammer metrics reporter
```
The AutoMQ OpenTelemetry module is a telemetry data collection and export component based on OpenTelemetry SDK, specifically designed for AutoMQ Kafka. This module provides unified telemetry data management capabilities, supporting the collection of JVM metrics, JMX metrics, and Yammer metrics, and can export data to Prometheus, OTLP-compatible backend systems, or S3-compatible storage.
## Core Features
### 1. Metrics Collection
- **JVM Metrics**: Automatically collect JVM runtime metrics including CPU, memory pools, garbage collection, threads, etc.
- **JMX Metrics**: Define and collect JMX Bean metrics through configuration files
- **Yammer Metrics**: Bridge existing Kafka Yammer metrics system to OpenTelemetry
### 2. Multiple Exporter Support
- **Prometheus**: Expose metrics in Prometheus format through HTTP server
- **OTLP**: Support both gRPC and HTTP/Protobuf protocols for exporting to OTLP backends
- **S3**: Export metrics to S3-compatible object storage systems
### 3. Flexible Configuration
- Support parameter settings through Properties configuration files
- Configurable export intervals, compression methods, timeout values, etc.
- Support metric cardinality limits to control memory usage
## Module Structure
```
com.automq.opentelemetry/
├── AutoMQTelemetryManager.java # Main management class for initialization and lifecycle
├── TelemetryConfig.java # Configuration management class
├── TelemetryConstants.java # Constants definition
├── common/
│ └── MetricsUtils.java # Metrics utility class
├── exporter/
│ ├── MetricsExporter.java # Exporter interface
│ ├── MetricsExporterURI.java # URI parser
<20><><EFBFBD>── OTLPMetricsExporter.java # OTLP exporter implementation
│ ├── PrometheusMetricsExporter.java # Prometheus exporter implementation
│ └── s3/ # S3 metrics exporter implementation
│ ├── CompressionUtils.java # Utility for data compression
│ ├── PrometheusUtils.java # Utilities for Prometheus format
│ ├── S3MetricsConfig.java # Configuration interface
│ ├── S3MetricsExporter.java # S3 metrics exporter implementation
│ ├── S3MetricsExporterAdapter.java # Adapter to handle S3 metrics export
│ ├── LeaderNodeSelector.java # Interface for node selection logic
│ └── LeaderNodeSelectors.java # Factory for node selector implementations
└── yammer/
├── DeltaHistogram.java # Delta histogram implementation
├── OTelMetricUtils.java # OpenTelemetry metrics utilities
├── YammerMetricsProcessor.java # Yammer metrics processor
└── YammerMetricsReporter.java # Yammer metrics reporter
```
## Quick Start
### 1. Basic Usage
```java
import com.automq.opentelemetry.AutoMQTelemetryManager;
import com.automq.opentelemetry.exporter.MetricsExportConfig;
// Implement MetricsExportConfig
public class MyMetricsExportConfig implements MetricsExportConfig {
@Override
public String clusterId() { return "my-cluster"; }
@Override
public boolean isLeader() { return true; }
@Override
public int nodeId() { return 1; }
@Override
public ObjectStorage objectStorage() {
// Return your object storage instance for S3 exports
return myObjectStorage;
}
@Override
public List<Pair<String, String>> baseLabels() {
return Arrays.asList(
Pair.of("environment", "production"),
Pair.of("region", "us-east-1")
);
}
@Override
public int intervalMs() { return 60000; } // 60 seconds
}
// Create export configuration
MetricsExportConfig config = new MyMetricsExportConfig();
// Initialize telemetry manager singleton
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"prometheus://localhost:9090", // exporter URI
"automq-kafka", // service name
"broker-1", // instance ID
config // export config
);
// Start Yammer metrics reporting (optional)
MetricsRegistry yammerRegistry = // Get Kafka's Yammer registry
manager.startYammerMetricsReporter(yammerRegistry);
// Application running...
// Shutdown telemetry system
AutoMQTelemetryManager.shutdownInstance();
```
### 2. Get Meter Instance
```java
// Get the singleton instance
AutoMQTelemetryManager manager = AutoMQTelemetryManager.getInstance();
// Get Meter for custom metrics
Meter meter = manager.getMeter();
// Create custom metrics
LongCounter requestCounter = meter
.counterBuilder("http_requests_total")
.setDescription("Total number of HTTP requests")
.build();
requestCounter.add(1, Attributes.of(AttributeKey.stringKey("method"), "GET"));
```
## Configuration
### Basic Configuration
Configuration is provided through the `MetricsExportConfig` interface and constructor parameters:
| Parameter | Description | Example |
|-----------|-------------|---------|
| `exporterUri` | Metrics exporter URI | `prometheus://localhost:9090` |
| `serviceName` | Service name for telemetry | `automq-kafka` |
| `instanceId` | Unique service instance ID | `broker-1` |
| `config` | MetricsExportConfig implementation | See example above |
### Exporter Configuration
All configuration is done through the `MetricsExportConfig` interface and constructor parameters. Export intervals, compression settings, and other options are controlled through:
1. **Exporter URI**: Determines the export destination and protocol
2. **MetricsExportConfig**: Provides cluster information, intervals, and base labels
3. **Constructor parameters**: Service name and instance ID
#### Prometheus Exporter
```java
// Use prometheus:// URI scheme
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"prometheus://localhost:9090",
"automq-kafka",
"broker-1",
config
);
```
#### OTLP Exporter
```java
// Use otlp:// URI scheme with optional query parameters
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"otlp://localhost:4317?protocol=grpc&compression=gzip&timeout=30000",
"automq-kafka",
"broker-1",
config
);
```
#### S3 Metrics Exporter
```java
// Use s3:// URI scheme
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"s3://access-key:secret-key@my-bucket.s3.amazonaws.com",
"automq-kafka",
"broker-1",
config // config.clusterId(), nodeId(), isLeader() used for S3 export
);
```
Example usage with S3 exporter:
```java
// Implementation for S3 export configuration
public class S3MetricsExportConfig implements MetricsExportConfig {
private final ObjectStorage objectStorage;
public S3MetricsExportConfig(ObjectStorage objectStorage) {
this.objectStorage = objectStorage;
}
@Override
public String clusterId() { return "my-kafka-cluster"; }
@Override
public boolean isLeader() {
// Only one node in the cluster should return true
return isCurrentNodeLeader();
}
@Override
public int nodeId() { return 1; }
@Override
public ObjectStorage objectStorage() { return objectStorage; }
@Override
public List<Pair<String, String>> baseLabels() {
return Arrays.asList(Pair.of("environment", "production"));
}
@Override
public int intervalMs() { return 60000; }
}
// Initialize telemetry manager with S3 export
ObjectStorage objectStorage = // Create your object storage instance
MetricsExportConfig config = new S3MetricsExportConfig(objectStorage);
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"s3://access-key:secret-key@my-bucket.s3.amazonaws.com",
"automq-kafka",
"broker-1",
config
);
// Application running...
// Shutdown telemetry system
AutoMQTelemetryManager.shutdownInstance();
```
### JMX Metrics Configuration
Define JMX metrics collection rules through YAML configuration files:
```java
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
exporterUri, serviceName, instanceId, config
);
// Set JMX config paths after initialization
manager.setJmxConfigPaths("/jmx-config.yaml,/kafka-jmx.yaml");
```
#### Configuration File Requirements
1. **Directory Requirements**:
- Configuration files must be placed in the project's classpath (e.g., `src/main/resources` directory)
- Support subdirectory structure, e.g., `/config/jmx-metrics.yaml`
2. **Path Format**:
- Paths must start with `/` to indicate starting from classpath root
- Multiple configuration files separated by commas
3. **File Format**:
- Use YAML format (`.yaml` or `.yml` extension)
- Filenames can be customized, meaningful names are recommended
#### Recommended Directory Structure
```
src/main/resources/
├── jmx-kafka-broker.yaml # Kafka Broker metrics configuration
├── jmx-kafka-consumer.yaml # Kafka Consumer metrics configuration
├── jmx-kafka-producer.yaml # Kafka Producer metrics configuration
└── config/
├── custom-jmx.yaml # Custom JMX metrics configuration
└── third-party-jmx.yaml # Third-party component JMX configuration
```
JMX configuration file example (`jmx-config.yaml`):
```yaml
rules:
- bean: kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec
metricAttribute:
name: kafka_server_broker_topic_messages_in_per_sec
description: Messages in per second
unit: "1/s"
attributes:
- name: topic
value: topic
```
## Supported Metric Types
### 1. JVM Metrics
- Memory usage (heap memory, non-heap memory, memory pools)
- CPU usage
- Garbage collection statistics
- Thread states
### 2. Kafka Metrics
Through Yammer metrics bridging, supports the following types of Kafka metrics:
- `BytesInPerSec` - Bytes input per second
- `BytesOutPerSec` - Bytes output per second
- `Size` - Log size (for identifying idle partitions)
### 3. Custom Metrics
Support creating custom metrics through OpenTelemetry API:
- Counter
- Gauge
- Histogram
- UpDownCounter
## Best Practices
### 1. Production Environment Configuration
```java
public class ProductionMetricsConfig implements MetricsExportConfig {
@Override
public String clusterId() { return "production-cluster"; }
@Override
public boolean isLeader() {
// Implement your leader election logic
return isCurrentNodeController();
}
@Override
public int nodeId() { return getCurrentNodeId(); }
@Override
public ObjectStorage objectStorage() {
return productionObjectStorage;
}
@Override
public List<Pair<String, String>> baseLabels() {
return Arrays.asList(
Pair.of("environment", "production"),
Pair.of("region", System.getenv("AWS_REGION")),
Pair.of("version", getApplicationVersion())
);
}
@Override
public int intervalMs() { return 60000; } // 1 minute
}
// Initialize for production
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"prometheus://0.0.0.0:9090", // Or S3 URI for object storage export
"automq-kafka",
System.getenv("HOSTNAME"),
new ProductionMetricsConfig()
);
```
### 2. Development Environment Configuration
```java
public class DevelopmentMetricsConfig implements MetricsExportConfig {
@Override
public String clusterId() { return "dev-cluster"; }
@Override
public boolean isLeader() { return true; } // Single node in dev
@Override
public int nodeId() { return 1; }
@Override
public ObjectStorage objectStorage() { return null; } // Not needed for OTLP
@Override
public List<Pair<String, String>> baseLabels() {
return Arrays.asList(Pair.of("environment", "development"));
}
@Override
public int intervalMs() { return 10000; } // 10 seconds for faster feedback
}
// Initialize for development
AutoMQTelemetryManager manager = AutoMQTelemetryManager.initializeInstance(
"otlp://localhost:4317",
"automq-kafka-dev",
"local-dev",
new DevelopmentMetricsConfig()
);
```
### 3. Resource Management
- Set appropriate metric cardinality limits to avoid memory leaks
- Call `shutdown()` method when application closes to release resources
- Monitor exporter health status
## Troubleshooting
### Common Issues
1. **Metrics not exported**
- Check if exporter URI passed to `initializeInstance()` is correct
- Verify target endpoint is reachable
- Check error messages in logs
- Ensure `MetricsExportConfig.intervalMs()` returns reasonable value
2. **JMX metrics missing**
- Confirm JMX configuration file path set via `setJmxConfigPaths()` is correct
- Check YAML configuration file format
- Verify JMX Bean exists
- Ensure files are in classpath
3. **High memory usage**
- Implement cardinality limits in your `MetricsExportConfig`
- Check for high cardinality labels in `baseLabels()`
- Consider increasing export interval via `intervalMs()`
### Logging Configuration
Enable debug logging for more information using your logging framework configuration (e.g., logback.xml, log4j2.xml):
```xml
<!-- For Logback -->
<logger name="com.automq.opentelemetry" level="DEBUG" />
<logger name="io.opentelemetry" level="INFO" />
```
## Dependencies
- Java 8+
- OpenTelemetry SDK 1.30+
- Apache Commons Lang3
- SLF4J logging framework
## License
This module is open source under the Apache License 2.0.

View File

@ -0,0 +1,330 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry;
import com.automq.opentelemetry.exporter.MetricsExportConfig;
import com.automq.opentelemetry.exporter.MetricsExporter;
import com.automq.opentelemetry.exporter.MetricsExporterURI;
import com.automq.opentelemetry.yammer.YammerMetricsReporter;
import com.yammer.metrics.core.MetricsRegistry;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.bridge.SLF4JBridgeHandler;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import io.opentelemetry.api.OpenTelemetry;
import io.opentelemetry.api.baggage.propagation.W3CBaggagePropagator;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.metrics.Meter;
import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator;
import io.opentelemetry.context.propagation.ContextPropagators;
import io.opentelemetry.context.propagation.TextMapPropagator;
import io.opentelemetry.instrumentation.jmx.engine.JmxMetricInsight;
import io.opentelemetry.instrumentation.jmx.engine.MetricConfiguration;
import io.opentelemetry.instrumentation.jmx.yaml.RuleParser;
import io.opentelemetry.instrumentation.runtimemetrics.java8.Cpu;
import io.opentelemetry.instrumentation.runtimemetrics.java8.GarbageCollector;
import io.opentelemetry.instrumentation.runtimemetrics.java8.MemoryPools;
import io.opentelemetry.instrumentation.runtimemetrics.java8.Threads;
import io.opentelemetry.sdk.OpenTelemetrySdk;
import io.opentelemetry.sdk.metrics.SdkMeterProvider;
import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder;
import io.opentelemetry.sdk.metrics.export.MetricReader;
import io.opentelemetry.sdk.metrics.internal.SdkMeterProviderUtil;
import io.opentelemetry.sdk.resources.Resource;
/**
* The main manager for AutoMQ telemetry.
* This class is responsible for initializing, configuring, and managing the lifecycle of all
* telemetry components, including the OpenTelemetry SDK, metric exporters, and various metric sources.
*/
public class AutoMQTelemetryManager {
private static final Logger LOGGER = LoggerFactory.getLogger(AutoMQTelemetryManager.class);
// Singleton instance support
private static volatile AutoMQTelemetryManager instance;
private static final Object LOCK = new Object();
private final String exporterUri;
private final String serviceName;
private final String instanceId;
private final MetricsExportConfig metricsExportConfig;
private final List<MetricReader> metricReaders = new ArrayList<>();
private final List<AutoCloseable> autoCloseableList;
private OpenTelemetrySdk openTelemetrySdk;
private YammerMetricsReporter yammerReporter;
private int metricCardinalityLimit = TelemetryConstants.DEFAULT_METRIC_CARDINALITY_LIMIT;
private String jmxConfigPath;
/**
* Constructs a new Telemetry Manager with the given configuration.
*
* @param exporterUri The metrics exporter URI.
* @param serviceName The service name to be used in telemetry data.
* @param instanceId The unique instance ID for this service instance.
* @param metricsExportConfig The metrics configuration.
*/
public AutoMQTelemetryManager(String exporterUri, String serviceName, String instanceId, MetricsExportConfig metricsExportConfig) {
this.exporterUri = exporterUri;
this.serviceName = serviceName;
this.instanceId = instanceId;
this.metricsExportConfig = metricsExportConfig;
this.autoCloseableList = new ArrayList<>();
// Redirect JUL from OpenTelemetry SDK to SLF4J for unified logging
SLF4JBridgeHandler.removeHandlersForRootLogger();
SLF4JBridgeHandler.install();
}
/**
* Gets the singleton instance of AutoMQTelemetryManager.
* Returns null if no instance has been initialized.
*
* @return the singleton instance, or null if not initialized
*/
public static AutoMQTelemetryManager getInstance() {
return instance;
}
/**
* Initializes the singleton instance with the given configuration.
* This method should be called before any other components try to access the instance.
*
* @param exporterUri The metrics exporter URI.
* @param serviceName The service name to be used in telemetry data.
* @param instanceId The unique instance ID for this service instance.
* @param metricsExportConfig The metrics configuration.
* @return the initialized singleton instance
*/
public static AutoMQTelemetryManager initializeInstance(String exporterUri, String serviceName, String instanceId, MetricsExportConfig metricsExportConfig) {
if (instance == null) {
synchronized (LOCK) {
if (instance == null) {
AutoMQTelemetryManager newInstance = new AutoMQTelemetryManager(exporterUri, serviceName, instanceId, metricsExportConfig);
newInstance.init();
instance = newInstance;
LOGGER.info("AutoMQTelemetryManager singleton instance initialized");
}
}
}
return instance;
}
/**
* Shuts down the singleton instance and releases all resources.
*/
public static void shutdownInstance() {
if (instance != null) {
synchronized (LOCK) {
if (instance != null) {
instance.shutdown();
instance = null;
LOGGER.info("AutoMQTelemetryManager singleton instance shutdown");
}
}
}
}
/**
* Initializes the telemetry system. This method sets up the OpenTelemetry SDK,
* configures exporters, and registers JVM and JMX metrics.
*/
public void init() {
SdkMeterProvider meterProvider = buildMeterProvider();
this.openTelemetrySdk = OpenTelemetrySdk.builder()
.setMeterProvider(meterProvider)
.setPropagators(ContextPropagators.create(TextMapPropagator.composite(
W3CTraceContextPropagator.getInstance(), W3CBaggagePropagator.getInstance())))
.buildAndRegisterGlobal();
// Register JVM and JMX metrics
registerJvmMetrics(openTelemetrySdk);
registerJmxMetrics(openTelemetrySdk);
LOGGER.info("AutoMQ Telemetry Manager initialized successfully.");
}
private SdkMeterProvider buildMeterProvider() {
String hostName;
try {
hostName = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
hostName = "unknown-host";
}
AttributesBuilder attrsBuilder = Attributes.builder()
.put(TelemetryConstants.SERVICE_NAME_KEY, serviceName)
.put(TelemetryConstants.SERVICE_INSTANCE_ID_KEY, instanceId)
.put(TelemetryConstants.HOST_NAME_KEY, hostName)
// Add attributes for Prometheus compatibility
.put(TelemetryConstants.PROMETHEUS_JOB_KEY, serviceName)
.put(TelemetryConstants.PROMETHEUS_INSTANCE_KEY, instanceId);
for (Pair<String, String> label : metricsExportConfig.baseLabels()) {
attrsBuilder.put(label.getKey(), label.getValue());
}
Resource resource = Resource.getDefault().merge(Resource.create(attrsBuilder.build()));
SdkMeterProviderBuilder meterProviderBuilder = SdkMeterProvider.builder().setResource(resource);
// Configure exporters from URI
MetricsExporterURI exporterURI = buildMetricsExporterURI(exporterUri, metricsExportConfig);
for (MetricsExporter exporter : exporterURI.getMetricsExporters()) {
MetricReader reader = exporter.asMetricReader();
metricReaders.add(reader);
SdkMeterProviderUtil.registerMetricReaderWithCardinalitySelector(meterProviderBuilder, reader,
instrumentType -> metricCardinalityLimit);
}
return meterProviderBuilder.build();
}
protected MetricsExporterURI buildMetricsExporterURI(String exporterUri, MetricsExportConfig metricsExportConfig) {
return MetricsExporterURI.parse(exporterUri, metricsExportConfig);
}
private void registerJvmMetrics(OpenTelemetry openTelemetry) {
autoCloseableList.addAll(MemoryPools.registerObservers(openTelemetry));
autoCloseableList.addAll(Cpu.registerObservers(openTelemetry));
autoCloseableList.addAll(GarbageCollector.registerObservers(openTelemetry));
autoCloseableList.addAll(Threads.registerObservers(openTelemetry));
LOGGER.info("JVM metrics registered.");
}
@SuppressWarnings({"NP_LOAD_OF_KNOWN_NULL_VALUE", "RCN_REDUNDANT_NULLCHECK_OF_NULL_VALUE"})
private void registerJmxMetrics(OpenTelemetry openTelemetry) {
List<String> jmxConfigPaths = getJmxConfigPaths();
if (jmxConfigPaths.isEmpty()) {
LOGGER.info("No JMX metric config paths provided, skipping JMX metrics registration.");
return;
}
JmxMetricInsight jmxMetricInsight = JmxMetricInsight.createService(openTelemetry, metricsExportConfig.intervalMs());
MetricConfiguration metricConfig = new MetricConfiguration();
for (String path : jmxConfigPaths) {
try (InputStream ins = this.getClass().getResourceAsStream(path)) {
if (ins == null) {
LOGGER.error("JMX config file not found in classpath: {}", path);
continue;
}
RuleParser parser = RuleParser.get();
parser.addMetricDefsTo(metricConfig, ins, path);
} catch (Exception e) {
LOGGER.error("Failed to parse JMX config file: {}", path, e);
}
}
jmxMetricInsight.start(metricConfig);
// JmxMetricInsight doesn't implement Closeable, but we can create a wrapper
LOGGER.info("JMX metrics registered with config paths: {}", jmxConfigPaths);
}
public List<String> getJmxConfigPaths() {
if (StringUtils.isEmpty(jmxConfigPath)) {
return Collections.emptyList();
}
return Stream.of(jmxConfigPath.split(","))
.map(String::trim)
.filter(s -> !s.isEmpty())
.collect(Collectors.toList());
}
/**
* Starts reporting metrics from a given Yammer MetricsRegistry.
*
* @param registry The Yammer registry to bridge metrics from.
*/
public void startYammerMetricsReporter(MetricsRegistry registry) {
if (this.openTelemetrySdk == null) {
throw new IllegalStateException("TelemetryManager is not initialized. Call init() first.");
}
if (registry == null) {
LOGGER.warn("Yammer MetricsRegistry is null, skipping reporter start.");
return;
}
this.yammerReporter = new YammerMetricsReporter(registry);
this.yammerReporter.start(getMeter());
}
public void shutdown() {
autoCloseableList.forEach(autoCloseable -> {
try {
autoCloseable.close();
} catch (Exception e) {
LOGGER.error("Failed to close auto closeable", e);
}
});
metricReaders.forEach(metricReader -> {
metricReader.forceFlush();
try {
metricReader.close();
} catch (IOException e) {
LOGGER.error("Failed to close metric reader", e);
}
});
if (openTelemetrySdk != null) {
openTelemetrySdk.close();
}
}
/**
* get YammerMetricsReporter instance.
*
* @return The YammerMetricsReporter instance.
*/
public YammerMetricsReporter getYammerReporter() {
return this.yammerReporter;
}
public void setMetricCardinalityLimit(int limit) {
this.metricCardinalityLimit = limit;
}
public void setJmxConfigPaths(String jmxConfigPaths) {
this.jmxConfigPath = jmxConfigPaths;
}
/**
* Gets the default meter from the initialized OpenTelemetry SDK.
*
* @return The meter instance.
*/
public Meter getMeter() {
if (this.openTelemetrySdk == null) {
throw new IllegalStateException("TelemetryManager is not initialized. Call init() first.");
}
return this.openTelemetrySdk.getMeter(TelemetryConstants.TELEMETRY_SCOPE_NAME);
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry;
import io.opentelemetry.api.common.AttributeKey;
/**
* Constants for telemetry, including configuration keys, attribute keys, and default values.
*/
public class TelemetryConstants {
//################################################################
// Service and Resource Attributes
//################################################################
public static final String SERVICE_NAME_KEY = "service.name";
public static final String SERVICE_INSTANCE_ID_KEY = "service.instance.id";
public static final String HOST_NAME_KEY = "host.name";
public static final String TELEMETRY_SCOPE_NAME = "automq_for_kafka";
/**
* The cardinality limit for any single metric.
*/
public static final String METRIC_CARDINALITY_LIMIT_KEY = "automq.telemetry.metric.cardinality.limit";
public static final int DEFAULT_METRIC_CARDINALITY_LIMIT = 20000;
//################################################################
// Prometheus specific Attributes, for compatibility
//################################################################
public static final String PROMETHEUS_JOB_KEY = "job";
public static final String PROMETHEUS_INSTANCE_KEY = "instance";
//################################################################
// Custom Kafka-related Attribute Keys
//################################################################
public static final AttributeKey<Long> START_OFFSET_KEY = AttributeKey.longKey("startOffset");
public static final AttributeKey<Long> END_OFFSET_KEY = AttributeKey.longKey("endOffset");
}

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.common;
public enum OTLPCompressionType {
GZIP("gzip"),

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.common;
public enum OTLPProtocol {
GRPC("grpc"),

View File

@ -0,0 +1,68 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter;
import com.automq.stream.s3.operator.ObjectStorage;
import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
/**
* Configuration interface for metrics exporter.
*/
public interface MetricsExportConfig {
/**
* Get the cluster ID.
* @return The cluster ID.
*/
String clusterId();
/**
* Check if the current node is a primary node for metrics upload.
* @return True if the current node should upload metrics, false otherwise.
*/
boolean isLeader();
/**
* Get the node ID.
* @return The node ID.
*/
int nodeId();
/**
* Get the object storage instance.
* @return The object storage instance.
*/
ObjectStorage objectStorage();
/**
* Get the base labels to include in all metrics.
* @return The base labels.
*/
List<Pair<String, String>> baseLabels();
/**
* Get the interval in milliseconds for metrics export.
* @return The interval in milliseconds.
*/
int intervalMs();
}

View File

@ -17,10 +17,13 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.exporter;
import io.opentelemetry.sdk.metrics.export.MetricReader;
/**
* An interface for metrics exporters, which can be converted to an OpenTelemetry MetricReader.
*/
public interface MetricsExporter {
MetricReader asMetricReader();
}

View File

@ -0,0 +1,47 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter;
import java.net.URI;
import java.util.List;
import java.util.Map;
/**
* Service Provider Interface that allows extending the available metrics exporters
* without modifying the core AutoMQ OpenTelemetry module.
*/
public interface MetricsExporterProvider {
/**
* @param scheme exporter scheme (e.g. "rw")
* @return true if this provider can create an exporter for the supplied scheme
*/
boolean supports(String scheme);
/**
* Creates a metrics exporter for the provided URI.
*
* @param config metrics configuration
* @param uri original exporter URI
* @param queryParameters parsed query parameters from the URI
* @return a MetricsExporter instance, or {@code null} if unable to create one
*/
MetricsExporter create(MetricsExportConfig config, URI uri, Map<String, List<String>> queryParameters);
}

View File

@ -17,12 +17,13 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.exporter;
public enum MetricsExporterType {
OTLP("otlp"),
PROMETHEUS("prometheus"),
OPS("ops");
OPS("ops"),
OTHER("other");
private final String type;
@ -40,6 +41,6 @@ public enum MetricsExporterType {
return exporterType;
}
}
throw new IllegalArgumentException("Invalid metrics exporter type: " + type);
return OTHER;
}
}

View File

@ -0,0 +1,220 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter;
import com.automq.opentelemetry.common.OTLPCompressionType;
import com.automq.opentelemetry.common.OTLPProtocol;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.ServiceLoader;
/**
* Parses the exporter URI and creates the corresponding MetricsExporter instances.
*/
public class MetricsExporterURI {
private static final Logger LOGGER = LoggerFactory.getLogger(MetricsExporterURI.class);
private static final List<MetricsExporterProvider> PROVIDERS;
static {
List<MetricsExporterProvider> providers = new ArrayList<>();
ServiceLoader.load(MetricsExporterProvider.class).forEach(providers::add);
PROVIDERS = Collections.unmodifiableList(providers);
if (!PROVIDERS.isEmpty()) {
LOGGER.info("Loaded {} telemetry exporter providers", PROVIDERS.size());
}
}
private final List<MetricsExporter> metricsExporters;
private MetricsExporterURI(List<MetricsExporter> metricsExporters) {
this.metricsExporters = metricsExporters != null ? metricsExporters : new ArrayList<>();
}
public List<MetricsExporter> getMetricsExporters() {
return metricsExporters;
}
public static MetricsExporterURI parse(String uriStr, MetricsExportConfig config) {
LOGGER.info("Parsing metrics exporter URI: {}", uriStr);
if (StringUtils.isBlank(uriStr)) {
LOGGER.info("Metrics exporter URI is not configured, no metrics will be exported.");
return new MetricsExporterURI(Collections.emptyList());
}
// Support multiple exporters separated by comma
String[] exporterUris = uriStr.split(",");
if (exporterUris.length == 0) {
return new MetricsExporterURI(Collections.emptyList());
}
List<MetricsExporter> exporters = new ArrayList<>();
for (String uri : exporterUris) {
if (StringUtils.isBlank(uri)) {
continue;
}
MetricsExporter exporter = parseExporter(config, uri.trim());
if (exporter != null) {
exporters.add(exporter);
}
}
return new MetricsExporterURI(exporters);
}
public static MetricsExporter parseExporter(MetricsExportConfig config, String uriStr) {
try {
URI uri = new URI(uriStr);
String type = uri.getScheme();
if (StringUtils.isBlank(type)) {
LOGGER.error("Invalid metrics exporter URI: {}, exporter scheme is missing", uriStr);
throw new IllegalArgumentException("Invalid metrics exporter URI: " + uriStr);
}
Map<String, List<String>> queries = parseQueryParameters(uri);
return parseExporter(config, type, queries, uri);
} catch (Exception e) {
LOGGER.warn("Parse metrics exporter URI {} failed", uriStr, e);
throw new IllegalArgumentException("Invalid metrics exporter URI: " + uriStr, e);
}
}
public static MetricsExporter parseExporter(MetricsExportConfig config, String type, Map<String, List<String>> queries, URI uri) {
MetricsExporterType exporterType = MetricsExporterType.fromString(type);
switch (exporterType) {
case PROMETHEUS:
return buildPrometheusExporter(config, queries, uri);
case OTLP:
return buildOtlpExporter(config, queries, uri);
case OPS:
return buildS3MetricsExporter(config, uri);
default:
break;
}
MetricsExporterProvider provider = findProvider(type);
if (provider != null) {
MetricsExporter exporter = provider.create(config, uri, queries);
if (exporter != null) {
return exporter;
}
}
LOGGER.warn("Unsupported metrics exporter type: {}", type);
return null;
}
private static MetricsExporter buildPrometheusExporter(MetricsExportConfig config, Map<String, List<String>> queries, URI uri) {
// Use query parameters if available, otherwise fall back to URI authority or config defaults
String host = getStringFromQuery(queries, "host", uri.getHost());
if (StringUtils.isBlank(host)) {
host = "localhost";
}
int port = uri.getPort();
if (port <= 0) {
String portStr = getStringFromQuery(queries, "port", null);
if (StringUtils.isNotBlank(portStr)) {
try {
port = Integer.parseInt(portStr);
} catch (NumberFormatException e) {
LOGGER.warn("Invalid port in query parameters: {}, using default", portStr);
port = 9090;
}
} else {
port = 9090;
}
}
return new PrometheusMetricsExporter(host, port, config.baseLabels());
}
private static MetricsExporter buildOtlpExporter(MetricsExportConfig config, Map<String, List<String>> queries, URI uri) {
// Get endpoint from query parameters or construct from URI
String endpoint = getStringFromQuery(queries, "endpoint", null);
if (StringUtils.isBlank(endpoint)) {
endpoint = uri.getScheme() + "://" + uri.getAuthority();
}
// Get protocol from query parameters or config
String protocol = getStringFromQuery(queries, "protocol", OTLPProtocol.GRPC.getProtocol());
// Get compression from query parameters or config
String compression = getStringFromQuery(queries, "compression", OTLPCompressionType.NONE.getType());
return new OTLPMetricsExporter(config.intervalMs(), endpoint, protocol, compression);
}
private static MetricsExporter buildS3MetricsExporter(MetricsExportConfig config, URI uri) {
LOGGER.info("Creating S3 metrics exporter from URI: {}", uri);
if (config.objectStorage() == null) {
LOGGER.warn("No object storage configured, skip s3 metrics exporter creation.");
return null;
}
// Create the S3MetricsExporterAdapter with appropriate configuration
return new com.automq.opentelemetry.exporter.s3.S3MetricsExporterAdapter(config);
}
private static Map<String, List<String>> parseQueryParameters(URI uri) {
Map<String, List<String>> queries = new HashMap<>();
String query = uri.getQuery();
if (StringUtils.isNotBlank(query)) {
String[] pairs = query.split("&");
for (String pair : pairs) {
String[] keyValue = pair.split("=", 2);
if (keyValue.length == 2) {
String key = keyValue[0];
String value = keyValue[1];
queries.computeIfAbsent(key, k -> new ArrayList<>()).add(value);
}
}
}
return queries;
}
private static String getStringFromQuery(Map<String, List<String>> queries, String key, String defaultValue) {
List<String> values = queries.get(key);
if (values != null && !values.isEmpty()) {
return values.get(0);
}
return defaultValue;
}
private static MetricsExporterProvider findProvider(String scheme) {
for (MetricsExporterProvider provider : PROVIDERS) {
try {
if (provider.supports(scheme)) {
return provider;
}
} catch (Exception e) {
LOGGER.warn("Telemetry exporter provider {} failed to evaluate support for scheme {}", provider.getClass().getName(), scheme, e);
}
}
return null;
}
}

View File

@ -17,10 +17,12 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.exporter;
import org.apache.kafka.common.utils.Utils;
import com.automq.opentelemetry.common.OTLPCompressionType;
import com.automq.opentelemetry.common.OTLPProtocol;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -36,13 +38,16 @@ import io.opentelemetry.sdk.metrics.export.PeriodicMetricReaderBuilder;
public class OTLPMetricsExporter implements MetricsExporter {
private static final Logger LOGGER = LoggerFactory.getLogger(OTLPMetricsExporter.class);
private final int intervalMs;
private final long intervalMs;
private final String endpoint;
private final OTLPProtocol protocol;
private final OTLPCompressionType compression;
// Default timeout for OTLP exporters
private static final long DEFAULT_EXPORTER_TIMEOUT_MS = 30000;
public OTLPMetricsExporter(int intervalMs, String endpoint, String protocol, String compression) {
if (Utils.isBlank(endpoint) || "null".equals(endpoint)) {
public OTLPMetricsExporter(long intervalMs, String endpoint, String protocol, String compression) {
if (StringUtils.isBlank(endpoint) || "null".equals(endpoint)) {
throw new IllegalArgumentException("OTLP endpoint is required");
}
this.intervalMs = intervalMs;
@ -50,7 +55,7 @@ public class OTLPMetricsExporter implements MetricsExporter {
this.protocol = OTLPProtocol.fromString(protocol);
this.compression = OTLPCompressionType.fromString(compression);
LOGGER.info("OTLPMetricsExporter initialized with endpoint: {}, protocol: {}, compression: {}, intervalMs: {}",
endpoint, protocol, compression, intervalMs);
endpoint, protocol, compression, intervalMs);
}
public String endpoint() {
@ -65,31 +70,29 @@ public class OTLPMetricsExporter implements MetricsExporter {
return compression;
}
public int intervalMs() {
public long intervalMs() {
return intervalMs;
}
@Override
public MetricReader asMetricReader() {
PeriodicMetricReaderBuilder builder;
switch (protocol) {
case GRPC:
PeriodicMetricReaderBuilder builder = switch (protocol) {
case GRPC -> {
OtlpGrpcMetricExporterBuilder otlpExporterBuilder = OtlpGrpcMetricExporter.builder()
.setEndpoint(endpoint)
.setCompression(compression.getType())
.setTimeout(Duration.ofMillis(ExporterConstants.DEFAULT_EXPORTER_TIMEOUT_MS));
builder = PeriodicMetricReader.builder(otlpExporterBuilder.build());
break;
case HTTP:
.setTimeout(Duration.ofMillis(DEFAULT_EXPORTER_TIMEOUT_MS));
yield PeriodicMetricReader.builder(otlpExporterBuilder.build());
}
case HTTP -> {
OtlpHttpMetricExporterBuilder otlpHttpExporterBuilder = OtlpHttpMetricExporter.builder()
.setEndpoint(endpoint)
.setCompression(compression.getType())
.setTimeout(Duration.ofMillis(ExporterConstants.DEFAULT_EXPORTER_TIMEOUT_MS));
builder = PeriodicMetricReader.builder(otlpHttpExporterBuilder.build());
break;
default:
throw new IllegalArgumentException("Unsupported OTLP protocol: " + protocol);
}
.setTimeout(Duration.ofMillis(DEFAULT_EXPORTER_TIMEOUT_MS));
yield PeriodicMetricReader.builder(otlpHttpExporterBuilder.build());
}
default -> throw new IllegalArgumentException("Unsupported OTLP protocol: " + protocol);
};
return builder.setInterval(Duration.ofMillis(intervalMs)).build();
}

View File

@ -17,11 +17,9 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.exporter;
package com.automq.opentelemetry.exporter;
import kafka.log.stream.s3.telemetry.MetricsConstants;
import org.apache.kafka.common.utils.Utils;
import com.automq.opentelemetry.TelemetryConstants;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
@ -41,7 +39,7 @@ public class PrometheusMetricsExporter implements MetricsExporter {
private final Set<String> baseLabelKeys;
public PrometheusMetricsExporter(String host, int port, List<Pair<String, String>> baseLabels) {
if (Utils.isBlank(host)) {
if (host == null || host.isEmpty()) {
throw new IllegalArgumentException("Illegal Prometheus host");
}
if (port <= 0) {
@ -50,15 +48,7 @@ public class PrometheusMetricsExporter implements MetricsExporter {
this.host = host;
this.port = port;
this.baseLabelKeys = baseLabels.stream().map(Pair::getKey).collect(Collectors.toSet());
LOGGER.info("PrometheusMetricsExporter initialized with host: {}, port: {}", host, port);
}
public String host() {
return host;
}
public int port() {
return port;
LOGGER.info("PrometheusMetricsExporter initialized with host: {}, port: {}, labels: {}", host, port, baseLabels);
}
@Override
@ -66,11 +56,13 @@ public class PrometheusMetricsExporter implements MetricsExporter {
return PrometheusHttpServer.builder()
.setHost(host)
.setPort(port)
.setAllowedResourceAttributesFilter(resourceAttributes ->
MetricsConstants.JOB.equals(resourceAttributes)
|| MetricsConstants.INSTANCE.equals(resourceAttributes)
|| MetricsConstants.HOST_NAME.equals(resourceAttributes)
|| baseLabelKeys.contains(resourceAttributes))
// This filter is to align with the original behavior, allowing only specific resource attributes
// to be converted to prometheus labels.
.setAllowedResourceAttributesFilter(resourceAttributeKey ->
TelemetryConstants.PROMETHEUS_JOB_KEY.equals(resourceAttributeKey)
|| TelemetryConstants.PROMETHEUS_INSTANCE_KEY.equals(resourceAttributeKey)
|| TelemetryConstants.HOST_NAME_KEY.equals(resourceAttributeKey)
|| baseLabelKeys.contains(resourceAttributeKey))
.build();
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter.s3;
import com.automq.stream.s3.ByteBufAlloc;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import io.netty.buffer.ByteBuf;
/**
* Utility class for data compression and decompression.
*/
public class CompressionUtils {
/**
* Compress a ByteBuf using GZIP.
*
* @param input The input ByteBuf to compress.
* @return A new ByteBuf containing the compressed data.
* @throws IOException If an I/O error occurs during compression.
*/
public static ByteBuf compress(ByteBuf input) throws IOException {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream);
byte[] buffer = new byte[input.readableBytes()];
input.readBytes(buffer);
gzipOutputStream.write(buffer);
gzipOutputStream.close();
ByteBuf compressed = ByteBufAlloc.byteBuffer(byteArrayOutputStream.size());
compressed.writeBytes(byteArrayOutputStream.toByteArray());
return compressed;
}
/**
* Decompress a GZIP-compressed ByteBuf.
*
* @param input The compressed ByteBuf to decompress.
* @return A new ByteBuf containing the decompressed data.
* @throws IOException If an I/O error occurs during decompression.
*/
public static ByteBuf decompress(ByteBuf input) throws IOException {
byte[] compressedData = new byte[input.readableBytes()];
input.readBytes(compressedData);
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(compressedData);
GZIPInputStream gzipInputStream = new GZIPInputStream(byteArrayInputStream);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = gzipInputStream.read(buffer)) != -1) {
byteArrayOutputStream.write(buffer, 0, bytesRead);
}
gzipInputStream.close();
byteArrayOutputStream.close();
byte[] uncompressedData = byteArrayOutputStream.toByteArray();
ByteBuf output = ByteBufAlloc.byteBuffer(uncompressedData.length);
output.writeBytes(uncompressedData);
return output;
}
}

View File

@ -0,0 +1,276 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter.s3;
import org.apache.commons.lang3.StringUtils;
import java.util.Locale;
/**
* Utility class for Prometheus metric and label naming.
*/
public class PrometheusUtils {
private static final String TOTAL_SUFFIX = "_total";
/**
* Get the Prometheus unit from the OpenTelemetry unit.
*
* @param unit The OpenTelemetry unit.
* @return The Prometheus unit.
*/
public static String getPrometheusUnit(String unit) {
if (unit.contains("{")) {
return "";
}
switch (unit) {
// Time
case "d":
return "days";
case "h":
return "hours";
case "min":
return "minutes";
case "s":
return "seconds";
case "ms":
return "milliseconds";
case "us":
return "microseconds";
case "ns":
return "nanoseconds";
// Bytes
case "By":
return "bytes";
case "KiBy":
return "kibibytes";
case "MiBy":
return "mebibytes";
case "GiBy":
return "gibibytes";
case "TiBy":
return "tibibytes";
case "KBy":
return "kilobytes";
case "MBy":
return "megabytes";
case "GBy":
return "gigabytes";
case "TBy":
return "terabytes";
// SI
case "m":
return "meters";
case "V":
return "volts";
case "A":
return "amperes";
case "J":
return "joules";
case "W":
return "watts";
case "g":
return "grams";
// Misc
case "Cel":
return "celsius";
case "Hz":
return "hertz";
case "1":
return "";
case "%":
return "percent";
// Rate units (per second)
case "1/s":
return "per_second";
case "By/s":
return "bytes_per_second";
case "KiBy/s":
return "kibibytes_per_second";
case "MiBy/s":
return "mebibytes_per_second";
case "GiBy/s":
return "gibibytes_per_second";
case "KBy/s":
return "kilobytes_per_second";
case "MBy/s":
return "megabytes_per_second";
case "GBy/s":
return "gigabytes_per_second";
// Rate units (per minute)
case "1/min":
return "per_minute";
case "By/min":
return "bytes_per_minute";
// Rate units (per hour)
case "1/h":
return "per_hour";
case "By/h":
return "bytes_per_hour";
// Rate units (per day)
case "1/d":
return "per_day";
case "By/d":
return "bytes_per_day";
default:
return unit;
}
}
/**
* Map a metric name to a Prometheus-compatible name.
*
* @param name The original metric name.
* @param unit The metric unit.
* @param isCounter Whether the metric is a counter.
* @param isGauge Whether the metric is a gauge.
* @return The Prometheus-compatible metric name.
*/
public static String mapMetricsName(String name, String unit, boolean isCounter, boolean isGauge) {
// Replace "." into "_"
name = name.replaceAll("\\.", "_");
String prometheusUnit = getPrometheusUnit(unit);
boolean shouldAppendUnit = StringUtils.isNotBlank(prometheusUnit) && !name.contains(prometheusUnit);
// append prometheus unit if not null or empty.
// unit should be appended before type suffix
if (shouldAppendUnit) {
name = name + "_" + prometheusUnit;
}
// trim counter's _total suffix so the unit is placed before it.
if (isCounter && name.endsWith(TOTAL_SUFFIX)) {
name = name.substring(0, name.length() - TOTAL_SUFFIX.length());
}
// replace _total suffix, or add if it wasn't already present.
if (isCounter) {
name = name + TOTAL_SUFFIX;
}
// special case - gauge with intelligent Connect metric handling
if ("1".equals(unit) && isGauge && !name.contains("ratio")) {
if (isConnectMetric(name)) {
// For Connect metrics, use improved logic to avoid misleading _ratio suffix
if (shouldAddRatioSuffixForConnect(name)) {
name = name + "_ratio";
}
} else {
// For other metrics, maintain original behavior
name = name + "_ratio";
}
}
return name;
}
/**
* Map a label name to a Prometheus-compatible name.
*
* @param name The original label name.
* @return The Prometheus-compatible label name.
*/
public static String mapLabelName(String name) {
if (StringUtils.isBlank(name)) {
return "";
}
return name.replaceAll("\\.", "_");
}
/**
* Check if a metric name is related to Kafka Connect.
*
* @param name The metric name to check.
* @return true if it's a Connect metric, false otherwise.
*/
private static boolean isConnectMetric(String name) {
String lowerName = name.toLowerCase(Locale.ROOT);
return lowerName.contains("kafka_connector_") ||
lowerName.contains("kafka_task_") ||
lowerName.contains("kafka_worker_") ||
lowerName.contains("kafka_connect_") ||
lowerName.contains("kafka_source_task_") ||
lowerName.contains("kafka_sink_task_") ||
lowerName.contains("connector_metrics") ||
lowerName.contains("task_metrics") ||
lowerName.contains("worker_metrics") ||
lowerName.contains("source_task_metrics") ||
lowerName.contains("sink_task_metrics");
}
/**
* Intelligently determine if a Connect metric should have a _ratio suffix.
* This method avoids adding misleading _ratio suffixes to count-based metrics.
*
* @param name The metric name to check.
* @return true if _ratio suffix should be added, false otherwise.
*/
private static boolean shouldAddRatioSuffixForConnect(String name) {
String lowerName = name.toLowerCase(Locale.ROOT);
if (hasRatioRelatedWords(lowerName)) {
return false;
}
if (isCountMetric(lowerName)) {
return false;
}
return isRatioMetric(lowerName);
}
private static boolean hasRatioRelatedWords(String lowerName) {
return lowerName.contains("ratio") || lowerName.contains("percent") ||
lowerName.contains("rate") || lowerName.contains("fraction");
}
private static boolean isCountMetric(String lowerName) {
return hasBasicCountKeywords(lowerName) || hasConnectCountKeywords(lowerName) ||
hasStatusCountKeywords(lowerName);
}
private static boolean hasBasicCountKeywords(String lowerName) {
return lowerName.contains("count") || lowerName.contains("num") ||
lowerName.contains("size") || lowerName.contains("total") ||
lowerName.contains("active") || lowerName.contains("current");
}
private static boolean hasConnectCountKeywords(String lowerName) {
return lowerName.contains("partition") || lowerName.contains("task") ||
lowerName.contains("connector") || lowerName.contains("seq_no") ||
lowerName.contains("seq_num") || lowerName.contains("attempts");
}
private static boolean hasStatusCountKeywords(String lowerName) {
return lowerName.contains("success") || lowerName.contains("failure") ||
lowerName.contains("errors") || lowerName.contains("retries") ||
lowerName.contains("skipped") || lowerName.contains("running") ||
lowerName.contains("paused") || lowerName.contains("failed") ||
lowerName.contains("destroyed");
}
private static boolean isRatioMetric(String lowerName) {
return lowerName.contains("utilization") ||
lowerName.contains("usage") ||
lowerName.contains("load") ||
lowerName.contains("efficiency") ||
lowerName.contains("hit_rate") ||
lowerName.contains("miss_rate");
}
}

View File

@ -17,9 +17,9 @@
* limitations under the License.
*/
package com.automq.shell.metrics;
package com.automq.opentelemetry.exporter.s3;
import com.automq.shell.util.Utils;
import com.automq.opentelemetry.exporter.MetricsExportConfig;
import com.automq.stream.s3.operator.ObjectStorage;
import com.automq.stream.s3.operator.ObjectStorage.ObjectInfo;
import com.automq.stream.s3.operator.ObjectStorage.ObjectPath;
@ -60,6 +60,9 @@ import io.opentelemetry.sdk.metrics.data.HistogramPointData;
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.metrics.export.MetricExporter;
/**
* An S3 metrics exporter that uploads metrics data to S3 buckets.
*/
public class S3MetricsExporter implements MetricExporter {
private static final Logger LOGGER = LoggerFactory.getLogger(S3MetricsExporter.class);
@ -68,13 +71,13 @@ public class S3MetricsExporter implements MetricExporter {
public static final int MAX_JITTER_INTERVAL = 60 * 1000;
public static final int DEFAULT_BUFFER_SIZE = 16 * 1024 * 1024;
private final S3MetricsConfig config;
private final MetricsExportConfig config;
private final Map<String, String> defaultTagMap = new HashMap<>();
private final ByteBuf uploadBuffer = Unpooled.directBuffer(DEFAULT_BUFFER_SIZE);
private final Random random = new Random();
private static final Random RANDOM = new Random();
private volatile long lastUploadTimestamp = System.currentTimeMillis();
private volatile long nextUploadInterval = UPLOAD_INTERVAL + random.nextInt(MAX_JITTER_INTERVAL);
private volatile long nextUploadInterval = UPLOAD_INTERVAL + RANDOM.nextInt(MAX_JITTER_INTERVAL);
private final ObjectStorage objectStorage;
private final ObjectMapper objectMapper = new ObjectMapper();
@ -83,7 +86,12 @@ public class S3MetricsExporter implements MetricExporter {
private final Thread uploadThread;
private final Thread cleanupThread;
public S3MetricsExporter(S3MetricsConfig config) {
/**
* Creates a new S3MetricsExporter.
*
* @param config The configuration for the S3 metrics exporter.
*/
public S3MetricsExporter(MetricsExportConfig config) {
this.config = config;
this.objectStorage = config.objectStorage();
@ -101,6 +109,9 @@ public class S3MetricsExporter implements MetricExporter {
cleanupThread.setDaemon(true);
}
/**
* Starts the exporter threads.
*/
public void start() {
uploadThread.start();
cleanupThread.start();
@ -139,7 +150,7 @@ public class S3MetricsExporter implements MetricExporter {
public void run() {
while (!Thread.currentThread().isInterrupted()) {
try {
if (closed || !config.isActiveController()) {
if (closed || !config.isLeader()) {
Thread.sleep(Duration.ofMinutes(1).toMillis());
continue;
}
@ -162,16 +173,11 @@ public class S3MetricsExporter implements MetricExporter {
CompletableFuture.allOf(deleteFutures).join();
}
}
if (Threads.sleep(Duration.ofMinutes(1).toMillis())) {
break;
}
Threads.sleep(Duration.ofMinutes(1).toMillis());
} catch (InterruptedException e) {
break;
} catch (Exception e) {
LOGGER.error("Cleanup s3 metrics failed", e);
if (Threads.sleep(Duration.ofMinutes(1).toMillis())) {
break;
}
}
}
}
@ -256,13 +262,13 @@ public class S3MetricsExporter implements MetricExporter {
synchronized (uploadBuffer) {
if (uploadBuffer.readableBytes() > 0) {
try {
objectStorage.write(WriteOptions.DEFAULT, getObjectKey(), Utils.compress(uploadBuffer.slice().asReadOnly())).get();
objectStorage.write(WriteOptions.DEFAULT, getObjectKey(), CompressionUtils.compress(uploadBuffer.slice().asReadOnly())).get();
} catch (Exception e) {
LOGGER.error("Failed to upload metrics to s3", e);
return CompletableResultCode.ofFailure();
} finally {
lastUploadTimestamp = System.currentTimeMillis();
nextUploadInterval = UPLOAD_INTERVAL + random.nextInt(MAX_JITTER_INTERVAL);
nextUploadInterval = UPLOAD_INTERVAL + RANDOM.nextInt(MAX_JITTER_INTERVAL);
uploadBuffer.clear();
}
}

View File

@ -0,0 +1,63 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.opentelemetry.exporter.s3;
import com.automq.opentelemetry.exporter.MetricsExportConfig;
import com.automq.opentelemetry.exporter.MetricsExporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import io.opentelemetry.sdk.metrics.export.MetricReader;
import io.opentelemetry.sdk.metrics.export.PeriodicMetricReader;
/**
* An adapter class that implements the MetricsExporter interface and uses S3MetricsExporter
* for actual metrics exporting functionality.
*/
public class S3MetricsExporterAdapter implements MetricsExporter {
private static final Logger LOGGER = LoggerFactory.getLogger(S3MetricsExporterAdapter.class);
private final MetricsExportConfig metricsExportConfig;
/**
* Creates a new S3MetricsExporterAdapter.
*
* @param metricsExportConfig The configuration for the S3 metrics exporter.
*/
public S3MetricsExporterAdapter(MetricsExportConfig metricsExportConfig) {
this.metricsExportConfig = metricsExportConfig;
LOGGER.info("S3MetricsExporterAdapter initialized with labels :{}", metricsExportConfig.baseLabels());
}
@Override
public MetricReader asMetricReader() {
// Create and start the S3MetricsExporter
S3MetricsExporter s3MetricsExporter = new S3MetricsExporter(metricsExportConfig);
s3MetricsExporter.start();
// Create and return the periodic metric reader
return PeriodicMetricReader.builder(s3MetricsExporter)
.setInterval(Duration.ofMillis(metricsExportConfig.intervalMs()))
.build();
}
}

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.otel;
package com.automq.opentelemetry.yammer;
import com.yammer.metrics.core.Histogram;
import com.yammer.metrics.core.Timer;

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.otel;
package com.automq.opentelemetry.yammer;
import com.yammer.metrics.core.MetricName;

View File

@ -17,9 +17,8 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.otel;
package com.automq.opentelemetry.yammer;
import kafka.autobalancer.metricsreporter.metric.MetricsUtils;
import com.yammer.metrics.core.Counter;
import com.yammer.metrics.core.Gauge;
@ -32,16 +31,54 @@ import com.yammer.metrics.core.Timer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.metrics.Meter;
import scala.UninitializedFieldError;
public class OTelMetricsProcessor implements MetricProcessor<Void> {
private static final Logger LOGGER = LoggerFactory.getLogger(OTelMetricsProcessor.class);
/**
* A metrics processor that bridges Yammer metrics to OpenTelemetry metrics.
*
* <p>This processor specifically handles Histogram and Timer metrics from the Yammer metrics
* library and converts them to OpenTelemetry gauge metrics that track delta mean values.
* It implements the Yammer {@link MetricProcessor} interface to process metrics and creates
* corresponding OpenTelemetry metrics with proper attributes derived from the metric scope.
*
* <p>The processor:
* <ul>
* <li>Converts Yammer Histogram and Timer metrics to OpenTelemetry gauges</li>
* <li>Calculates delta mean values using {@link DeltaHistogram}</li>
* <li>Parses metric scopes to extract attributes for OpenTelemetry metrics</li>
* <li>Maintains a registry of processed metrics for lifecycle management</li>
* <li>Supports metric removal when metrics are no longer needed</li>
* </ul>
*
* <p>Supported metric types:
* <ul>
* <li>{@link Histogram} - Converted to delta mean gauge</li>
* <li>{@link Timer} - Converted to delta mean gauge</li>
* </ul>
*
* <p>Unsupported metric types (will throw {@link UnsupportedOperationException}):
* <ul>
* <li>{@link Counter}</li>
* <li>{@link Gauge}</li>
* <li>{@link Metered}</li>
* </ul>
*
* <p>Thread Safety: This class is thread-safe and uses concurrent data structures
* to handle metrics registration and removal from multiple threads.
*
* @see MetricProcessor
* @see DeltaHistogram
* @see OTelMetricUtils
*/
public class YammerMetricsProcessor implements MetricProcessor<Void> {
private static final Logger LOGGER = LoggerFactory.getLogger(YammerMetricsProcessor.class);
private final Map<String, Map<MetricName, MetricWrapper>> metrics = new ConcurrentHashMap<>();
private Meter meter = null;
@ -71,9 +108,9 @@ public class OTelMetricsProcessor implements MetricProcessor<Void> {
private void processDeltaHistogramMetric(MetricName name, DeltaHistogram deltaHistogram) {
if (meter == null) {
throw new UninitializedFieldError("Meter is not initialized");
throw new IllegalStateException("Meter is not initialized");
}
Map<String, String> tags = MetricsUtils.yammerMetricScopeToTags(name.getScope());
Map<String, String> tags = yammerMetricScopeToTags(name.getScope());
AttributesBuilder attrBuilder = Attributes.builder();
if (tags != null) {
String value = tags.remove(OTelMetricUtils.REQUEST_TAG_KEY);
@ -116,6 +153,29 @@ public class OTelMetricsProcessor implements MetricProcessor<Void> {
});
}
/**
* Convert a yammer metrics scope to a tags map.
*
* @param scope Scope of the Yammer metric.
* @return Empty map for {@code null} scope, {@code null} for scope with keys without a matching value (i.e. unacceptable
* scope) (see <a href="https://github.com/linkedin/cruise-control/issues/1296">...</a>), parsed tags otherwise.
*/
public static Map<String, String> yammerMetricScopeToTags(String scope) {
if (scope != null) {
String[] kv = scope.split("\\.");
if (kv.length % 2 != 0) {
return null;
}
Map<String, String> tags = new HashMap<>();
for (int i = 0; i < kv.length; i += 2) {
tags.put(kv[i], kv[i + 1]);
}
return tags;
} else {
return Collections.emptyMap();
}
}
static class MetricWrapper {
private final Attributes attr;
private final DeltaHistogram deltaHistogram;

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry.otel;
package com.automq.opentelemetry.yammer;
import com.yammer.metrics.core.Metric;
import com.yammer.metrics.core.MetricName;
@ -27,18 +27,25 @@ import com.yammer.metrics.core.MetricsRegistryListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import io.opentelemetry.api.metrics.Meter;
// This class is responsible for transforming yammer histogram metrics (mean, max) into OTel metrics
public class OTelHistogramReporter implements MetricsRegistryListener {
private static final Logger LOGGER = LoggerFactory.getLogger(OTelHistogramReporter.class);
/**
* A listener that bridges Yammer Histogram metrics to OpenTelemetry.
* It listens for new metrics added to a MetricsRegistry and creates corresponding
* OTel gauge metrics for mean and max values of histograms.
*/
public class YammerMetricsReporter implements MetricsRegistryListener, Closeable {
private static final Logger LOGGER = LoggerFactory.getLogger(YammerMetricsReporter.class);
private final MetricsRegistry metricsRegistry;
private final OTelMetricsProcessor metricsProcessor;
private final YammerMetricsProcessor metricsProcessor;
private volatile Meter meter;
public OTelHistogramReporter(MetricsRegistry metricsRegistry) {
public YammerMetricsReporter(MetricsRegistry metricsRegistry) {
this.metricsRegistry = metricsRegistry;
this.metricsProcessor = new OTelMetricsProcessor();
this.metricsProcessor = new YammerMetricsProcessor();
}
public void start(Meter meter) {
@ -71,4 +78,16 @@ public class OTelHistogramReporter implements MetricsRegistryListener {
}
}
}
@Override
public void close() throws IOException {
try {
// Remove this reporter as a listener from the metrics registry
metricsRegistry.removeListener(this);
LOGGER.info("YammerMetricsReporter stopped and removed from metrics registry");
} catch (Exception e) {
LOGGER.error("Error while closing YammerMetricsReporter", e);
throw new IOException("Failed to close YammerMetricsReporter", e);
}
}
}

View File

@ -18,7 +18,8 @@ dependencies {
compileOnly libs.awsSdkAuth
implementation libs.reload4j
implementation libs.nettyBuffer
implementation libs.opentelemetrySdk
implementation project(':automq-metrics')
implementation project(':automq-log-uploader')
implementation libs.jacksonDatabind
implementation libs.jacksonYaml
implementation libs.commonLang
@ -65,4 +66,4 @@ jar {
manifest {
attributes 'Main-Class': 'com.automq.shell.AutoMQCLI'
}
}
}

View File

@ -110,9 +110,11 @@ public class Deploy implements Callable<Integer> {
String globalAccessKey = null;
String globalSecretKey = null;
for (Env env : topo.getGlobal().getEnvs()) {
if ("KAFKA_S3_ACCESS_KEY".equals(env.getName())) {
if ("KAFKA_S3_ACCESS_KEY".equals(env.getName()) ||
"AWS_ACCESS_KEY_ID".equals(env.getName())) {
globalAccessKey = env.getValue();
} else if ("KAFKA_S3_SECRET_KEY".equals(env.getName())) {
} else if ("KAFKA_S3_SECRET_KEY".equals(env.getName()) ||
"AWS_SECRET_ACCESS_KEY".equals(env.getName())) {
globalSecretKey = env.getValue();
}
}

View File

@ -1,50 +0,0 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.shell.log;
import org.apache.log4j.RollingFileAppender;
import org.apache.log4j.spi.LoggingEvent;
public class S3RollingFileAppender extends RollingFileAppender {
private final LogUploader logUploader = LogUploader.getInstance();
@Override
protected void subAppend(LoggingEvent event) {
super.subAppend(event);
if (!closed) {
LogRecorder.LogEvent logEvent = new LogRecorder.LogEvent(
event.getTimeStamp(),
event.getLevel().toString(),
event.getLoggerName(),
event.getRenderedMessage(),
event.getThrowableStrRep());
try {
logEvent.validate();
} catch (IllegalArgumentException e) {
// Drop invalid log event
errorHandler.error("Failed to validate log event", e, 0);
return;
}
logUploader.append(logEvent);
}
}
}

View File

@ -1,128 +0,0 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.automq.shell.metrics;
import org.apache.commons.lang3.StringUtils;
public class PrometheusUtils {
private static final String TOTAL_SUFFIX = "_total";
public static String getPrometheusUnit(String unit) {
if (unit.contains("{")) {
return "";
}
switch (unit) {
// Time
case "d":
return "days";
case "h":
return "hours";
case "min":
return "minutes";
case "s":
return "seconds";
case "ms":
return "milliseconds";
case "us":
return "microseconds";
case "ns":
return "nanoseconds";
// Bytes
case "By":
return "bytes";
case "KiBy":
return "kibibytes";
case "MiBy":
return "mebibytes";
case "GiBy":
return "gibibytes";
case "TiBy":
return "tibibytes";
case "KBy":
return "kilobytes";
case "MBy":
return "megabytes";
case "GBy":
return "gigabytes";
case "TBy":
return "terabytes";
// SI
case "m":
return "meters";
case "V":
return "volts";
case "A":
return "amperes";
case "J":
return "joules";
case "W":
return "watts";
case "g":
return "grams";
// Misc
case "Cel":
return "celsius";
case "Hz":
return "hertz";
case "1":
return "";
case "%":
return "percent";
default:
return unit;
}
}
public static String mapMetricsName(String name, String unit, boolean isCounter, boolean isGauge) {
// Replace "." into "_"
name = name.replaceAll("\\.", "_");
String prometheusUnit = getPrometheusUnit(unit);
boolean shouldAppendUnit = StringUtils.isNotBlank(prometheusUnit) && !name.contains(prometheusUnit);
// append prometheus unit if not null or empty.
// unit should be appended before type suffix
if (shouldAppendUnit) {
name = name + "_" + prometheusUnit;
}
// trim counter's _total suffix so the unit is placed before it.
if (isCounter && name.endsWith(TOTAL_SUFFIX)) {
name = name.substring(0, name.length() - TOTAL_SUFFIX.length());
}
// replace _total suffix, or add if it wasn't already present.
if (isCounter) {
name = name + TOTAL_SUFFIX;
}
// special case - gauge
if (unit.equals("1") && isGauge && !name.contains("ratio")) {
name = name + "_ratio";
}
return name;
}
public static String mapLabelName(String name) {
if (StringUtils.isBlank(name)) {
return "";
}
return name.replaceAll("\\.", "_");
}
}

View File

@ -37,7 +37,6 @@ import org.apache.kafka.common.requests.s3.GetKVsRequest;
import org.apache.kafka.common.requests.s3.PutKVsRequest;
import org.apache.kafka.common.utils.Time;
import com.automq.shell.metrics.S3MetricsExporter;
import com.automq.stream.api.KeyValue;
import org.slf4j.Logger;
@ -48,7 +47,7 @@ import java.util.List;
import java.util.Objects;
public class ClientKVClient {
private static final Logger LOGGER = LoggerFactory.getLogger(S3MetricsExporter.class);
private static final Logger LOGGER = LoggerFactory.getLogger(ClientKVClient.class);
private final NetworkClient networkClient;
private final Node bootstrapServer;

View File

@ -42,4 +42,5 @@ case $COMMAND in
;;
esac
export KAFKA_CONNECT_MODE=true
exec $(dirname $0)/kafka-run-class.sh $EXTRA_ARGS org.apache.kafka.connect.cli.ConnectDistributed "$@"

View File

@ -42,4 +42,5 @@ case $COMMAND in
;;
esac
export KAFKA_CONNECT_MODE=true
exec $(dirname $0)/kafka-run-class.sh $EXTRA_ARGS org.apache.kafka.connect.cli.ConnectStandalone "$@"

View File

@ -40,7 +40,23 @@ should_include_file() {
fi
file=$1
if [ -z "$(echo "$file" | grep -E "$regex")" ] ; then
return 0
# If Connect mode is enabled, apply additional filtering
if [ "$KAFKA_CONNECT_MODE" = "true" ]; then
# Skip if file doesn't exist
[ ! -f "$file" ] && return 1
# Exclude heavy dependencies that Connect doesn't need
case "$file" in
*hadoop*) return 1 ;;
*hive*) return 1 ;;
*iceberg*) return 1 ;;
*avro*) return 1 ;;
*parquet*) return 1 ;;
*) return 0 ;;
esac
else
return 0
fi
else
return 1
fi

View File

@ -53,7 +53,7 @@ plugins {
ext {
gradleVersion = versions.gradle
minJavaVersion = 11
minJavaVersion = 17
buildVersionFileName = "kafka-version.properties"
defaultMaxHeapSize = "2g"
@ -150,6 +150,10 @@ allprojects {
}
configurations.all {
// Globally exclude commons-logging and logback to ensure a single logging implementation (reload4j)
exclude group: "commons-logging", module: "commons-logging"
exclude group: "ch.qos.logback", module: "logback-classic"
exclude group: "ch.qos.logback", module: "logback-core"
// zinc is the Scala incremental compiler, it has a configuration for its own dependencies
// that are unrelated to the project dependencies, we should not change them
if (name != "zinc") {
@ -260,7 +264,10 @@ subprojects {
options.compilerArgs << "-Xlint:-rawtypes"
options.compilerArgs << "-Xlint:-serial"
options.compilerArgs << "-Xlint:-try"
options.compilerArgs << "-Werror"
// AutoMQ inject start
// TODO: remove me, when upgrade to 4.x
// options.compilerArgs << "-Werror"
// AutoMQ inject start
// --release is the recommended way to select the target release, but it's only supported in Java 9 so we also
// set --source and --target via `sourceCompatibility` and `targetCompatibility` a couple of lines below
@ -831,6 +838,13 @@ tasks.create(name: "jarConnect", dependsOn: connectPkgs.collect { it + ":jar" })
tasks.create(name: "testConnect", dependsOn: connectPkgs.collect { it + ":test" }) {}
// OpenTelemetry related tasks
tasks.create(name: "jarOpenTelemetry", dependsOn: ":opentelemetry:jar") {}
tasks.create(name: "testOpenTelemetry", dependsOn: ":opentelemetry:test") {}
tasks.create(name: "buildOpenTelemetry", dependsOn: [":opentelemetry:jar", ":opentelemetry:test"]) {}
project(':server') {
base {
archivesName = "kafka-server"
@ -931,6 +945,8 @@ project(':core') {
implementation project(':storage')
implementation project(':server')
implementation project(':automq-shell')
implementation project(':automq-metrics')
implementation project(':automq-log-uploader')
implementation libs.argparse4j
implementation libs.commonsValidator
@ -968,15 +984,9 @@ project(':core') {
implementation libs.guava
implementation libs.slf4jBridge
implementation libs.slf4jReload4j
// The `jcl-over-slf4j` library is used to redirect JCL logging to SLF4J.
implementation libs.jclOverSlf4j
implementation libs.opentelemetryJava8
implementation libs.opentelemetryOshi
implementation libs.opentelemetrySdk
implementation libs.opentelemetrySdkMetrics
implementation libs.opentelemetryExporterLogging
implementation libs.opentelemetryExporterProm
implementation libs.opentelemetryExporterOTLP
implementation libs.opentelemetryJmx
implementation libs.awsSdkAuth
// table topic start
@ -989,6 +999,7 @@ project(':core') {
implementation ("org.apache.iceberg:iceberg-parquet:${versions.iceberg}")
implementation ("org.apache.iceberg:iceberg-common:${versions.iceberg}")
implementation ("org.apache.iceberg:iceberg-aws:${versions.iceberg}")
implementation ("org.apache.iceberg:iceberg-nessie:${versions.iceberg}")
implementation ("software.amazon.awssdk:glue:${versions.awsSdk}")
implementation ("software.amazon.awssdk:s3tables:${versions.awsSdk}")
implementation 'software.amazon.s3tables:s3-tables-catalog-for-iceberg:0.1.0'
@ -1004,6 +1015,37 @@ project(':core') {
exclude group: 'org.apache.kafka', module: 'kafka-clients'
}
// > hive ext start
implementation 'org.apache.iceberg:iceberg-hive-metastore:1.6.1'
implementation('org.apache.hive:hive-metastore:3.1.3') {
// Remove useless dependencies (copy from iceberg-kafka-connect)
exclude group: "org.apache.avro", module: "avro"
exclude group: "org.slf4j", module: "slf4j-log4j12"
exclude group: "org.pentaho" // missing dependency
exclude group: "org.apache.hbase"
exclude group: "org.apache.logging.log4j"
exclude group: "co.cask.tephra"
exclude group: "com.google.code.findbugs", module: "jsr305"
exclude group: "org.eclipse.jetty.aggregate", module: "jetty-all"
exclude group: "org.eclipse.jetty.orbit", module: "javax.servlet"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
exclude group: "com.tdunning", module: "json"
exclude group: "javax.transaction", module: "transaction-api"
exclude group: "com.zaxxer", module: "HikariCP"
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-common"
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-applicationhistoryservice"
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-resourcemanager"
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-web-proxy"
exclude group: "org.apache.hive", module: "hive-service-rpc"
exclude group: "com.github.joshelser", module: "dropwizard-metrics-hadoop-metrics2-reporter"
}
implementation ('org.apache.hadoop:hadoop-mapreduce-client-core:3.4.1') {
exclude group: 'com.sun.jersey', module: '*'
exclude group: 'com.sun.jersey.contribs', module: '*'
exclude group: 'com.github.pjfanning', module: 'jersey-json'
}
// > hive ext end
// > Protobuf ext start
// Wire Runtime for schema handling
implementation ("com.squareup.wire:wire-schema:${versions.wire}")
@ -1027,6 +1069,7 @@ project(':core') {
testImplementation project(':storage:storage-api').sourceSets.test.output
testImplementation project(':server').sourceSets.test.output
testImplementation libs.bcpkix
testImplementation libs.mockitoJunitJupiter // supports MockitoExtension
testImplementation libs.mockitoCore
testImplementation libs.guava
testImplementation(libs.apacheda) {
@ -1207,6 +1250,10 @@ project(':core') {
from(project(':trogdor').configurations.runtimeClasspath) { into("libs/") }
from(project(':automq-shell').jar) { into("libs/") }
from(project(':automq-shell').configurations.runtimeClasspath) { into("libs/") }
from(project(':automq-metrics').jar) { into("libs/") }
from(project(':automq-metrics').configurations.runtimeClasspath) { into("libs/") }
from(project(':automq-log-uploader').jar) { into("libs/") }
from(project(':automq-log-uploader').configurations.runtimeClasspath) { into("libs/") }
from(project(':shell').jar) { into("libs/") }
from(project(':shell').configurations.runtimeClasspath) { into("libs/") }
from(project(':connect:api').jar) { into("libs/") }
@ -1238,6 +1285,38 @@ project(':core') {
duplicatesStrategy 'exclude'
}
// AutoMQ inject start
tasks.create(name: "releaseE2ETar", dependsOn: [configurations.archives.artifacts, 'copyDependantTestLibs'], type: Tar) {
def prefix = project.findProperty('prefix') ?: ''
archiveBaseName = "${prefix}kafka"
into "${prefix}kafka-${archiveVersion.get()}"
compression = Compression.GZIP
from(project.file("$rootDir/bin")) { into "bin/" }
from(project.file("$rootDir/config")) { into "config/" }
from(project.file("$rootDir/licenses")) { into "licenses/" }
from(project.file("$rootDir/docker/docker-compose.yaml")) { into "docker/" }
from(project.file("$rootDir/docker/telemetry")) { into "docker/telemetry/" }
from(project.file("$rootDir/LICENSE")) { into "" }
from "$rootDir/NOTICE-binary" rename {String filename -> filename.replace("-binary", "")}
from(configurations.runtimeClasspath) { into("libs/") }
from(configurations.archives.artifacts.files) { into("libs/") }
from(project.siteDocsTar) { into("site-docs/") }
// Include main and test jars from all subprojects
rootProject.subprojects.each { subproject ->
if (subproject.tasks.findByName('jar')) {
from(subproject.tasks.named('jar')) { into('libs/') }
}
if (subproject.tasks.findByName('testJar')) {
from(subproject.tasks.named('testJar')) { into('libs/') }
}
from(subproject.configurations.runtimeClasspath) { into('libs/') }
}
duplicatesStrategy 'exclude'
}
// AutoMQ inject end
jar {
dependsOn('copyDependantLibs')
}
@ -1312,6 +1391,7 @@ project(':metadata') {
implementation libs.guava
implementation libs.awsSdkAuth
implementation project(':s3stream')
implementation ("org.apache.avro:avro:${versions.avro}")
implementation libs.jacksonDatabind
implementation libs.jacksonJDK8Datatypes
@ -2184,10 +2264,10 @@ project(':s3stream') {
implementation 'software.amazon.awssdk.crt:aws-crt:0.30.8'
implementation 'com.ibm.async:asyncutil:0.1.0'
testImplementation 'org.slf4j:slf4j-simple:2.0.9'
testImplementation 'org.junit.jupiter:junit-jupiter:5.10.0'
testImplementation 'org.mockito:mockito-core:5.5.0'
testImplementation 'org.mockito:mockito-junit-jupiter:5.5.0'
testImplementation 'org.slf4j:slf4j-simple:1.7.36'
testImplementation libs.junitJupiter
testImplementation libs.mockitoCore
testImplementation libs.mockitoJunitJupiter // supports MockitoExtension
testImplementation 'org.awaitility:awaitility:4.2.1'
}
@ -2254,6 +2334,107 @@ project(':tools:tools-api') {
}
}
project(':automq-metrics') {
archivesBaseName = "automq-metrics"
checkstyle {
configProperties = checkstyleConfigProperties("import-control-server.xml")
}
configurations {
all {
exclude group: 'io.opentelemetry', module: 'opentelemetry-exporter-sender-okhttp'
}
}
dependencies {
// OpenTelemetry core dependencies
api libs.opentelemetryJava8
api libs.opentelemetryOshi
api libs.opentelemetrySdk
api libs.opentelemetrySdkMetrics
api libs.opentelemetryExporterLogging
api libs.opentelemetryExporterProm
api libs.opentelemetryExporterOTLP
api libs.opentelemetryExporterSenderJdk
api libs.opentelemetryJmx
// Logging dependencies
api libs.slf4jApi
api libs.slf4jBridge // SLF4J Bridge
api libs.reload4j
api libs.commonLang
// Yammer metrics (for integration)
api 'com.yammer.metrics:metrics-core:2.2.0'
implementation(project(':s3stream')) {
exclude(group: 'io.opentelemetry', module: '*')
exclude(group: 'io.opentelemetry.instrumentation', module: '*')
exclude(group: 'io.opentelemetry.proto', module: '*')
exclude(group: 'io.netty', module: 'netty-tcnative-boringssl-static')
exclude(group: 'com.github.jnr', module: '*')
exclude(group: 'org.aspectj', module: '*')
exclude(group: 'net.java.dev.jna', module: '*')
exclude(group: 'net.sourceforge.argparse4j', module: '*')
exclude(group: 'com.bucket4j', module: '*')
exclude(group: 'com.yammer.metrics', module: '*')
exclude(group: 'com.github.spotbugs', module: '*')
exclude(group: 'org.apache.kafka.shaded', module: '*')
}
implementation libs.nettyBuffer
implementation libs.jacksonDatabind
implementation libs.guava
implementation project(':clients')
// Test dependencies
testImplementation libs.junitJupiter
testImplementation libs.mockitoCore
testImplementation libs.slf4jReload4j
testRuntimeOnly libs.junitPlatformLanucher
implementation('io.opentelemetry:opentelemetry-sdk:1.40.0')
implementation("io.opentelemetry.semconv:opentelemetry-semconv:1.25.0-alpha")
implementation("io.opentelemetry.instrumentation:opentelemetry-runtime-telemetry-java8:2.6.0-alpha")
implementation('com.google.protobuf:protobuf-java:3.25.5')
implementation('org.xerial.snappy:snappy-java:1.1.10.5')
}
clean.doFirst {
delete "$buildDir/kafka/"
}
javadoc {
enabled = false
}
}
project(':automq-log-uploader') {
archivesBaseName = "automq-log-uploader"
checkstyle {
configProperties = checkstyleConfigProperties("import-control-server.xml")
}
dependencies {
api project(':s3stream')
implementation project(':clients')
implementation libs.reload4j
implementation libs.slf4jApi
implementation libs.slf4jBridge
implementation libs.nettyBuffer
implementation libs.guava
implementation libs.commonLang
}
javadoc {
enabled = false
}
}
project(':tools') {
base {
archivesName = "kafka-tools"
@ -2275,7 +2456,9 @@ project(':tools') {
exclude group: 'org.apache.kafka', module: 'kafka-clients'
}
implementation libs.bucket4j
implementation libs.oshi
implementation (libs.oshi){
exclude group: 'org.slf4j', module: '*'
}
// AutoMQ inject end
implementation project(':storage')
@ -3359,6 +3542,8 @@ project(':connect:runtime') {
api project(':clients')
api project(':connect:json')
api project(':connect:transforms')
api project(':automq-metrics')
api project(':automq-log-uploader')
implementation libs.slf4jApi
implementation libs.reload4j
@ -3367,6 +3552,7 @@ project(':connect:runtime') {
implementation libs.jacksonJaxrsJsonProvider
implementation libs.jerseyContainerServlet
implementation libs.jerseyHk2
implementation libs.jaxrsApi
implementation libs.jaxbApi // Jersey dependency that was available in the JDK before Java 9
implementation libs.activation // Jersey dependency that was available in the JDK before Java 9
implementation libs.jettyServer

View File

@ -378,5 +378,6 @@
<suppress id="dontUseSystemExit"
files="(BenchTool|S3Utils|AutoMQCLI).java"/>
<suppress checks="ClassDataAbstractionCoupling" files="(StreamControlManagerTest|ControllerStreamManager).java"/>
<suppress files="core[\/]src[\/]test[\/]java[\/]kafka[\/]automq[\/]table[\/]process[\/]proto[\/].*\.java$" checks=".*"/>
</suppressions>

View File

@ -264,8 +264,51 @@ public class TopicConfig {
public static final String TABLE_TOPIC_COMMIT_INTERVAL_DOC = "The table topic commit interval(ms)";
public static final String TABLE_TOPIC_NAMESPACE_CONFIG = "automq.table.topic.namespace";
public static final String TABLE_TOPIC_NAMESPACE_DOC = "The table topic table namespace";
public static final String TABLE_TOPIC_SCHEMA_TYPE_CONFIG = "automq.table.topic.schema.type";
public static final String TABLE_TOPIC_SCHEMA_TYPE_DOC = "The table topic schema type, support schemaless, schema";
public static final String TABLE_TOPIC_SCHEMA_TYPE_DOC = "[DEPRECATED] The table topic schema type configuration. " +
"This configuration is deprecated and will be removed in a future release. " +
"Please use the new separate converter and transform configurations instead. " +
"Supported values: 'schemaless' (maps to convert.value.type=raw, transform.value.type=none), " +
"'schema' (maps to convert.value.type=by_schema_id, transform.value.type=flatten).";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_TYPE_CONFIG = "automq.table.topic.convert.value.type";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_TYPE_DOC = "How to parse Kafka record values. " +
"Supported: 'raw', 'string', 'by_schema_id', 'by_latest_schema'. " +
"Schema Registry URL required for 'by_schema_id' and 'by_latest_schema'.";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_TYPE_CONFIG = "automq.table.topic.convert.key.type";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_TYPE_DOC = "How to parse Kafka record keys. " +
"Supported: 'raw', 'string', 'by_schema_id', 'by_latest_schema'. " +
"Schema Registry URL required for 'by_schema_id' and 'by_latest_schema'.";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_BY_LATEST_SCHEMA_SUBJECT_CONFIG =
"automq.table.topic.convert.value.by_latest_schema.subject";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_BY_LATEST_SCHEMA_SUBJECT_DOC =
"Subject name to resolve the latest value schema from Schema Registry when using convert.value.type=by_latest_schema. " +
"If not set, defaults to '<topic>-value'.";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_BY_LATEST_SCHEMA_MESSAGE_FULL_NAME_CONFIG =
"automq.table.topic.convert.value.by_latest_schema.message.full.name";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_VALUE_BY_LATEST_SCHEMA_MESSAGE_FULL_NAME_DOC =
"Fully-qualified message name for the latest value schema (if using Protobuf) when convert.value.type=by_latest_schema." +
"If not set, uses the first message.";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_BY_LATEST_SCHEMA_SUBJECT_CONFIG =
"automq.table.topic.convert.key.by_latest_schema.subject";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_BY_LATEST_SCHEMA_SUBJECT_DOC =
"Subject name to resolve the latest key schema from Schema Registry when using convert.key.type=by_latest_schema. " +
"If not set, defaults to '<topic>-key'.";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_BY_LATEST_SCHEMA_MESSAGE_FULL_NAME_CONFIG =
"automq.table.topic.convert.key.by_latest_schema.message.full.name";
public static final String AUTOMQ_TABLE_TOPIC_CONVERT_KEY_BY_LATEST_SCHEMA_MESSAGE_FULL_NAME_DOC =
"Fully-qualified message name for the latest key schema (if using Protobuf) when convert.key.type=by_latest_schema. " +
"If not set, uses the first message.";
public static final String AUTOMQ_TABLE_TOPIC_TRANSFORM_VALUE_TYPE_CONFIG = "automq.table.topic.transform.value.type";
public static final String AUTOMQ_TABLE_TOPIC_TRANSFORM_VALUE_TYPE_DOC = "Transformation to apply to the record value after conversion. " +
"Supported: 'none', 'flatten' (extract fields from structured records), " +
"'flatten_debezium' (process Debezium CDC events). " +
"Note: 'flatten_debezium' requires schema-based conversion.";
public static final String TABLE_TOPIC_ID_COLUMNS_CONFIG = "automq.table.topic.id.columns";
public static final String TABLE_TOPIC_ID_COLUMNS_DOC = "The primary key, comma-separated list of columns that identify a row in tables."
+ "ex. [region, name]";
@ -276,6 +319,21 @@ public class TopicConfig {
public static final String TABLE_TOPIC_CDC_FIELD_CONFIG = "automq.table.topic.cdc.field";
public static final String TABLE_TOPIC_CDC_FIELD_DOC = "The name of the field containing the CDC operation, I, U, or D";
public static final String AUTOMQ_TABLE_TOPIC_ERRORS_TOLERANCE_CONFIG = "automq.table.topic.errors.tolerance";
public static final String AUTOMQ_TABLE_TOPIC_ERRORS_TOLERANCE_DOC = "Configures the error handling strategy for table topic record processing. Valid values are <code>none</code>, <code>invalid_data</code>, and <code>all</code>.";
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_ENABLED_CONFIG = "automq.table.topic.expire.snapshot.enabled";
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_ENABLED_DOC = "Enable/disable automatic snapshot expiration.";
public static final boolean AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_ENABLED_DEFAULT = true;
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_OLDER_THAN_HOURS_CONFIG = "automq.table.topic.expire.snapshot.older.than.hours";
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_OLDER_THAN_HOURS_DOC = "Set retention duration in hours.";
public static final int AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_OLDER_THAN_HOURS_DEFAULT = 1;
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_RETAIN_LAST_CONFIG = "automq.table.topic.expire.snapshot.retain.last";
public static final String AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_RETAIN_LAST_DOC = "Minimum snapshots to retain.";
public static final int AUTOMQ_TABLE_TOPIC_EXPIRE_SNAPSHOT_RETAIN_LAST_DEFAULT = 1;
public static final String KAFKA_LINKS_ID_CONFIG = "automq.kafka.links.id";
public static final String KAFKA_LINKS_ID_DOC = "The unique id of a kafka link";
public static final String KAFKA_LINKS_TOPIC_START_TIME_CONFIG = "automq.kafka.links.topic.start.time";

View File

@ -39,6 +39,22 @@ public enum TimestampType {
throw new NoSuchElementException("Invalid timestamp type " + name);
}
public static TimestampType forId(int id) {
switch (id) {
case -1: {
return NO_TIMESTAMP_TYPE;
}
case 0: {
return CREATE_TIME;
}
case 1: {
return LOG_APPEND_TIME;
}
default:
throw new IllegalArgumentException("Invalid timestamp type " + id);
}
}
@Override
public String toString() {
return name;

View File

@ -20,7 +20,7 @@
"broker"
],
"name": "AutomqGetPartitionSnapshotRequest",
"validVersions": "0",
"validVersions": "0-2",
"flexibleVersions": "0+",
"fields": [
{
@ -34,6 +34,18 @@
"type": "int32",
"versions": "0+",
"about": "The get session epoch, which is used for ordering requests in a session"
},
{
"name": "RequestCommit",
"type": "bool",
"versions": "1+",
"about": "Request commit the ConfirmWAL data to the main storage."
},
{
"name": "Version",
"type": "int16",
"versions": "1+",
"about": "The route request version"
}
]
}

View File

@ -17,7 +17,7 @@
"apiKey": 516,
"type": "response",
"name": "AutomqGetPartitionSnapshotResponse",
"validVersions": "0",
"validVersions": "0-2",
"flexibleVersions": "0+",
"fields": [
{ "name": "ErrorCode", "type": "int16", "versions": "0+", "about": "The top level response error code" },
@ -36,9 +36,29 @@
{ "name": "StreamMetadata", "type": "[]StreamMetadata", "versions": "0+", "nullableVersions": "0+", "fields": [
{ "name": "StreamId", "type": "int64", "versions": "0+", "about": "The streamId" },
{ "name": "EndOffset", "type": "int64", "versions": "0+", "about": "The stream end offset" }
]}
]},
{ "name": "LastTimestampOffset", "type": "TimestampOffsetData", "versions": "1+", "nullableVersions": "1+", "about": "The last segment's last time index" }
]}
]}
]},
{
"name": "ConfirmWalEndOffset",
"type": "bytes",
"versions": "1+",
"about": "The confirm WAL end offset."
},
{
"name": "ConfirmWalConfig",
"type": "string",
"versions": "1+",
"about": "The confirm WAL config."
},
{
"name": "ConfirmWalDeltaData",
"type": "bytes",
"versions": "2+",
"nullableVersions": "2+",
"about": "The confirm WAL delta data between two end offsets. It's an optional field. If not present, the client should read the delta from WAL"
}
],
"commonStructs": [
{ "name": "LogMetadata", "versions": "0+", "fields": [

View File

@ -20,7 +20,7 @@
"broker"
],
"name": "AutomqZoneRouterRequest",
"validVersions": "0",
"validVersions": "0-1",
"flexibleVersions": "0+",
"fields": [
{
@ -28,6 +28,18 @@
"type": "bytes",
"versions": "0+",
"about": "The router metadata"
},
{
"name": "RouteEpoch",
"type": "int64",
"versions": "1+",
"about": "The route requests epoch"
},
{
"name": "Version",
"type": "int16",
"versions": "1+",
"about": "The route request version"
}
]
}

View File

@ -17,7 +17,7 @@
"apiKey": 515,
"type": "response",
"name": "AutomqZoneRouterResponse",
"validVersions": "0",
"validVersions": "0-1",
"flexibleVersions": "0+",
"fields": [
{

View File

@ -24,7 +24,8 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
# location of the log files (e.g. ${kafka.logs.dir}/connect.log). The `MaxFileSize` option specifies the maximum size of the log file,
# and the `MaxBackupIndex` option specifies the number of backup files to keep.
#
log4j.appender.connectAppender=org.apache.log4j.RollingFileAppender
log4j.appender.connectAppender=com.automq.log.S3RollingFileAppender
log4j.appender.connectAppender.configProviderClass=org.apache.kafka.connect.automq.log.ConnectS3LogConfigProvider
log4j.appender.connectAppender.MaxFileSize=10MB
log4j.appender.connectAppender.MaxBackupIndex=11
log4j.appender.connectAppender.File=${kafka.logs.dir}/connect.log

View File

@ -21,70 +21,73 @@ log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.kafkaAppender=com.automq.shell.log.S3RollingFileAppender
log4j.logger.com.automq.log.S3RollingFileAppender=INFO, stdout
log4j.additivity.com.automq.log.S3RollingFileAppender=false
log4j.appender.kafkaAppender=com.automq.log.S3RollingFileAppender
log4j.appender.kafkaAppender.MaxFileSize=100MB
log4j.appender.kafkaAppender.MaxBackupIndex=14
log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log
log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.stateChangeAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.stateChangeAppender=com.automq.log.S3RollingFileAppender
log4j.appender.stateChangeAppender.MaxFileSize=10MB
log4j.appender.stateChangeAppender.MaxBackupIndex=11
log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log
log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.requestAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.requestAppender=com.automq.log.S3RollingFileAppender
log4j.appender.requestAppender.MaxFileSize=10MB
log4j.appender.requestAppender.MaxBackupIndex=11
log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log
log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.cleanerAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.cleanerAppender=com.automq.log.S3RollingFileAppender
log4j.appender.cleanerAppender.MaxFileSize=10MB
log4j.appender.cleanerAppender.MaxBackupIndex=11
log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log
log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.controllerAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.controllerAppender=com.automq.log.S3RollingFileAppender
log4j.appender.controllerAppender.MaxFileSize=100MB
log4j.appender.controllerAppender.MaxBackupIndex=14
log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log
log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.authorizerAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.authorizerAppender=com.automq.log.S3RollingFileAppender
log4j.appender.authorizerAppender.MaxFileSize=10MB
log4j.appender.authorizerAppender.MaxBackupIndex=11
log4j.appender.authorizerAppender.File=${kafka.logs.dir}/kafka-authorizer.log
log4j.appender.authorizerAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.authorizerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.s3ObjectAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.s3ObjectAppender=com.automq.log.S3RollingFileAppender
log4j.appender.s3ObjectAppender.MaxFileSize=100MB
log4j.appender.s3ObjectAppender.MaxBackupIndex=14
log4j.appender.s3ObjectAppender.File=${kafka.logs.dir}/s3-object.log
log4j.appender.s3ObjectAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.s3ObjectAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.s3StreamMetricsAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.s3StreamMetricsAppender=com.automq.log.S3RollingFileAppender
log4j.appender.s3StreamMetricsAppender.MaxFileSize=10MB
log4j.appender.s3StreamMetricsAppender.MaxBackupIndex=11
log4j.appender.s3StreamMetricsAppender.File=${kafka.logs.dir}/s3stream-metrics.log
log4j.appender.s3StreamMetricsAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.s3StreamMetricsAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.s3StreamThreadPoolAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.s3StreamThreadPoolAppender=com.automq.log.S3RollingFileAppender
log4j.appender.s3StreamThreadPoolAppender.MaxFileSize=10MB
log4j.appender.s3StreamThreadPoolAppender.MaxBackupIndex=11
log4j.appender.s3StreamThreadPoolAppender.File=${kafka.logs.dir}/s3stream-threads.log
log4j.appender.s3StreamThreadPoolAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.s3StreamThreadPoolAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
log4j.appender.autoBalancerAppender=com.automq.shell.log.S3RollingFileAppender
log4j.appender.autoBalancerAppender=com.automq.log.S3RollingFileAppender
log4j.appender.autoBalancerAppender.MaxFileSize=10MB
log4j.appender.autoBalancerAppender.MaxBackupIndex=11
log4j.appender.autoBalancerAppender.File=${kafka.logs.dir}/auto-balancer.log

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
log4j.rootLogger=INFO, stdout, perfAppender
log4j.rootLogger=ERROR, stdout, perfAppender
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
@ -26,7 +26,15 @@ log4j.appender.perfAppender.File=${kafka.logs.dir}/perf.log
log4j.appender.perfAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.perfAppender.layout.ConversionPattern=%d -%5p [%15.15t] %m (%c#%M:%L)%n
log4j.logger.org.apache.kafka=INFO, perfAppender
log4j.additivity.org.apache.kafka=false
log4j.appender.clientAppender=org.apache.log4j.RollingFileAppender
log4j.appender.clientAppender.MaxFileSize=100MB
log4j.appender.clientAppender.MaxBackupIndex=10
log4j.appender.clientAppender.File=${kafka.logs.dir}/client.log
log4j.appender.clientAppender.layout=org.apache.log4j.PatternLayout
log4j.appender.clientAppender.layout.ConversionPattern=%d -%5p [%15.15t] %m (%c#%M:%L)%n
log4j.logger.org.apache.kafka.tools.automq=INFO, stdout, perfAppender
log4j.additivity.org.apache.kafka.tools.automq=false
log4j.logger.org.apache.kafka.clients=INFO, clientAppender
log4j.additivity.org.apache.kafka.clients=false

221
connect/runtime/README.md Normal file
View File

@ -0,0 +1,221 @@
# Kafka Connect OpenTelemetry Metrics Integration
## Overview
This integration allows Kafka Connect to export metrics through the AutoMQ OpenTelemetry module, enabling unified observability across your Kafka ecosystem.
## Configuration
### 1. Enable the MetricsReporter
Add the following to your Kafka Connect configuration file (`connect-distributed.properties` or `connect-standalone.properties`):
```properties
# Enable OpenTelemetry MetricsReporter
metric.reporters=org.apache.kafka.connect.automq.metrics.OpenTelemetryMetricsReporter
# OpenTelemetry configuration
opentelemetry.metrics.enabled=true
opentelemetry.metrics.prefix=kafka.connect
# Optional: Filter metrics
opentelemetry.metrics.include.pattern=.*connector.*|.*task.*|.*worker.*
opentelemetry.metrics.exclude.pattern=.*jmx.*|.*debug.*
```
### 2. AutoMQ Telemetry Configuration
Ensure the AutoMQ telemetry is properly configured. Add these properties to your application configuration:
```properties
# Telemetry export configuration
automq.telemetry.exporter.uri=prometheus://localhost:9090
# or for OTLP: automq.telemetry.exporter.uri=otlp://localhost:4317
# Service identification
service.name=kafka-connect
service.instance.id=connect-worker-1
# Export settings
automq.telemetry.exporter.interval.ms=30000
automq.telemetry.metric.cardinality.limit=10000
```
## S3 Log Upload
Kafka Connect bundles the AutoMQ log uploader so that worker logs can be streamed to S3 together with in-cluster cleanup. The uploader uses the connect-leader election mechanism by default and requires no additional configuration.
### Worker Configuration
Add the following properties to your worker configuration (ConfigMap, properties file, etc.):
```properties
# Enable S3 log upload
log.s3.enable=true
log.s3.bucket=0@s3://your-log-bucket?region=us-east-1
# Optional overrides (defaults shown)
log.s3.selector.type=connect-leader
# Provide credentials if the bucket URI does not embed them
# log.s3.access.key=...
# log.s3.secret.key=...
```
`log.s3.node.id` defaults to a hash of the pod hostname if not provided, ensuring objects are partitioned per worker.
### Log4j Integration
`config/connect-log4j.properties` has switched `connectAppender` to `com.automq.log.S3RollingFileAppender` and specifies `org.apache.kafka.connect.automq.log.ConnectS3LogConfigProvider` as the config provider. As long as you enable `log.s3.enable=true` and configure the bucket info in the worker config, log upload will be automatically initialized with the Connect process; if not set or returns `log.s3.enable=false`, the uploader remains disabled.
## Programmatic Usage
### 1. Initialize Telemetry Manager
```java
import com.automq.opentelemetry.AutoMQTelemetryManager;
import java.util.Properties;
// Initialize AutoMQ telemetry before starting Kafka Connect
Properties telemetryProps = new Properties();
telemetryProps.setProperty("automq.telemetry.exporter.uri", "prometheus://localhost:9090");
telemetryProps.setProperty("service.name", "kafka-connect");
telemetryProps.setProperty("service.instance.id", "worker-1");
// Initialize singleton instance
AutoMQTelemetryManager.initializeInstance(telemetryProps);
// Now start Kafka Connect - it will automatically use the OpenTelemetryMetricsReporter
```
### 2. Shutdown
```java
// When shutting down your application
AutoMQTelemetryManager.shutdownInstance();
```
## Exported Metrics
The integration automatically converts Kafka Connect metrics to OpenTelemetry format:
### Metric Naming Convention
- **Format**: `kafka.connect.{group}.{metric_name}`
- **Example**: `kafka.connect.connector.task.batch.size.avg``kafka.connect.connector_task_batch_size_avg`
### Metric Types
- **Counters**: Metrics containing "total", "count", "error", "failure"
- **Gauges**: All other numeric metrics (rates, averages, sizes, etc.)
### Attributes
Kafka metric tags are converted to OpenTelemetry attributes:
- `connector``connector`
- `task``task`
- `worker-id``worker_id`
- Plus standard attributes: `metric.group`, `service.name`, `service.instance.id`
## Example Metrics
Common Kafka Connect metrics that will be exported:
```
# Connector metrics
kafka.connect.connector.startup.attempts.total
kafka.connect.connector.startup.success.total
kafka.connect.connector.startup.failure.total
# Task metrics
kafka.connect.connector.task.batch.size.avg
kafka.connect.connector.task.batch.size.max
kafka.connect.connector.task.offset.commit.avg.time.ms
# Worker metrics
kafka.connect.worker.connector.count
kafka.connect.worker.task.count
kafka.connect.worker.connector.startup.attempts.total
```
## Configuration Options
### OpenTelemetry MetricsReporter Options
| Property | Description | Default | Example |
|----------|-------------|---------|---------|
| `opentelemetry.metrics.enabled` | Enable/disable metrics export | `true` | `false` |
| `opentelemetry.metrics.prefix` | Metric name prefix | `kafka.connect` | `my.connect` |
| `opentelemetry.metrics.include.pattern` | Regex for included metrics | All metrics | `.*connector.*` |
| `opentelemetry.metrics.exclude.pattern` | Regex for excluded metrics | None | `.*jmx.*` |
### AutoMQ Telemetry Options
| Property | Description | Default |
|----------|-------------|---------|
| `automq.telemetry.exporter.uri` | Exporter endpoint | Empty |
| `automq.telemetry.exporter.interval.ms` | Export interval | `60000` |
| `automq.telemetry.metric.cardinality.limit` | Max metric cardinality | `20000` |
## Monitoring Examples
### Prometheus Queries
```promql
# Connector count by worker
kafka_connect_worker_connector_count
# Task failure rate
rate(kafka_connect_connector_task_startup_failure_total[5m])
# Average batch processing time
kafka_connect_connector_task_batch_size_avg
# Connector startup success rate
rate(kafka_connect_connector_startup_success_total[5m]) /
rate(kafka_connect_connector_startup_attempts_total[5m])
```
### Grafana Dashboard
Common panels to create:
1. **Connector Health**: Count of running/failed connectors
2. **Task Performance**: Batch size, processing time, throughput
3. **Error Rates**: Failed startups, task failures
4. **Resource Usage**: Combined with JVM metrics from AutoMQ telemetry
## Troubleshooting
### Common Issues
1. **Metrics not appearing**
```
Check logs for: "AutoMQTelemetryManager is not initialized"
Solution: Ensure AutoMQTelemetryManager.initializeInstance() is called before Connect starts
```
2. **High cardinality warnings**
```
Solution: Use include/exclude patterns to filter metrics
```
3. **Missing dependencies**
```
Ensure connect-runtime depends on the opentelemetry module
```
### Debug Logging
Enable debug logging to troubleshoot:
```properties
log4j.logger.org.apache.kafka.connect.automq=DEBUG
log4j.logger.com.automq.opentelemetry=DEBUG
```
## Integration with Existing Monitoring
This integration works alongside:
- Existing JMX metrics (not replaced)
- Kafka broker metrics via AutoMQ telemetry
- Application-specific metrics
- Third-party monitoring tools
The OpenTelemetry integration provides a unified export path while preserving existing monitoring setups.

View File

@ -0,0 +1,95 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.az;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Optional;
public final class AzAwareClientConfigurator {
private static final Logger LOGGER = LoggerFactory.getLogger(AzAwareClientConfigurator.class);
private AzAwareClientConfigurator() {
}
public enum ClientFamily {
PRODUCER,
CONSUMER,
ADMIN
}
public static void maybeApplyAz(Map<String, Object> props, ClientFamily family, String roleDescriptor) {
Optional<String> azOpt = AzMetadataProviderHolder.provider().availabilityZoneId();
LOGGER.info("AZ-aware client.id configuration for role {}: resolved availability zone id '{}'",
roleDescriptor, azOpt.orElse("unknown"));
if (azOpt.isEmpty()) {
LOGGER.info("Skipping AZ-aware client.id configuration for role {} as no availability zone id is available",
roleDescriptor);
return;
}
String az = azOpt.get();
String encodedAz = URLEncoder.encode(az, StandardCharsets.UTF_8);
String automqClientId;
if (props.containsKey(CommonClientConfigs.CLIENT_ID_CONFIG)) {
Object currentId = props.get(CommonClientConfigs.CLIENT_ID_CONFIG);
if (currentId instanceof String currentIdStr) {
automqClientId = "automq_az=" + encodedAz + "&" + currentIdStr;
} else {
LOGGER.warn("client.id for role {} is not a string ({});",
roleDescriptor, currentId.getClass().getName());
return;
}
} else {
automqClientId = "automq_az=" + encodedAz;
}
props.put(CommonClientConfigs.CLIENT_ID_CONFIG, automqClientId);
LOGGER.info("Applied AZ-aware client.id for role {} -> {}", roleDescriptor, automqClientId);
if (family == ClientFamily.CONSUMER) {
LOGGER.info("Applying client.rack configuration for consumer role {} -> {}", roleDescriptor, az);
Object rackValue = props.get(ConsumerConfig.CLIENT_RACK_CONFIG);
if (rackValue == null || String.valueOf(rackValue).isBlank()) {
props.put(ConsumerConfig.CLIENT_RACK_CONFIG, az);
}
}
}
public static void maybeApplyProducerAz(Map<String, Object> props, String roleDescriptor) {
maybeApplyAz(props, ClientFamily.PRODUCER, roleDescriptor);
}
public static void maybeApplyConsumerAz(Map<String, Object> props, String roleDescriptor) {
maybeApplyAz(props, ClientFamily.CONSUMER, roleDescriptor);
}
public static void maybeApplyAdminAz(Map<String, Object> props, String roleDescriptor) {
maybeApplyAz(props, ClientFamily.ADMIN, roleDescriptor);
}
}

View File

@ -17,31 +17,28 @@
* limitations under the License.
*/
package kafka.automq.table.transformer;
package org.apache.kafka.connect.automq.az;
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.Optional;
public class FieldMetric {
/**
* Pluggable provider for availability-zone metadata used to tune Kafka client configurations.
*/
public interface AzMetadataProvider {
public static int count(String value) {
if (value == null) {
return 0;
}
return Math.max((value.length() + 23) / 24, 1);
/**
* Configure the provider with the worker properties. Implementations may cache values extracted from the
* configuration map. This method is invoked exactly once during worker bootstrap.
*/
default void configure(Map<String, String> workerProps) {
// no-op
}
public static int count(ByteBuffer value) {
if (value == null) {
return 0;
}
return Math.max((value.remaining() + 31) / 32, 1);
/**
* @return the availability-zone identifier for the current node, if known.
*/
default Optional<String> availabilityZoneId() {
return Optional.empty();
}
public static int count(byte[] value) {
if (value == null) {
return 0;
}
return Math.max((value.length + 31) / 32, 1);
}
}

View File

@ -0,0 +1,64 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.az;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.ServiceLoader;
public final class AzMetadataProviderHolder {
private static final Logger LOGGER = LoggerFactory.getLogger(AzMetadataProviderHolder.class);
private static final AzMetadataProvider DEFAULT_PROVIDER = new AzMetadataProvider() { };
private static volatile AzMetadataProvider provider = DEFAULT_PROVIDER;
private AzMetadataProviderHolder() {
}
public static void initialize(Map<String, String> workerProps) {
AzMetadataProvider selected = DEFAULT_PROVIDER;
try {
ServiceLoader<AzMetadataProvider> loader = ServiceLoader.load(AzMetadataProvider.class);
for (AzMetadataProvider candidate : loader) {
try {
candidate.configure(workerProps);
selected = candidate;
LOGGER.info("Loaded AZ metadata provider: {}", candidate.getClass().getName());
break;
} catch (Exception e) {
LOGGER.warn("Failed to initialize AZ metadata provider: {}", candidate.getClass().getName(), e);
}
}
} catch (Throwable t) {
LOGGER.warn("Failed to load AZ metadata providers", t);
}
provider = selected;
}
public static AzMetadataProvider provider() {
return provider;
}
public static void setProviderForTest(AzMetadataProvider newProvider) {
provider = newProvider != null ? newProvider : DEFAULT_PROVIDER;
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.log;
import com.automq.log.S3RollingFileAppender;
import com.automq.log.uploader.S3LogConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.Properties;
/**
* Initializes the AutoMQ S3 log uploader for Kafka Connect.
*/
public final class ConnectLogUploader {
private static Logger getLogger() {
return LoggerFactory.getLogger(ConnectLogUploader.class);
}
private ConnectLogUploader() {
}
public static void initialize(Map<String, String> workerProps) {
Properties props = new Properties();
if (workerProps != null) {
workerProps.forEach((k, v) -> {
if (k != null && v != null) {
props.put(k, v);
}
});
}
ConnectS3LogConfigProvider.initialize(props);
S3LogConfig s3LogConfig = new ConnectS3LogConfigProvider().get();
S3RollingFileAppender.setup(s3LogConfig);
getLogger().info("Initialized Connect S3 log uploader context");
}
}

View File

@ -0,0 +1,95 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.log;
import org.apache.kafka.connect.automq.runtime.LeaderNodeSelector;
import org.apache.kafka.connect.automq.runtime.RuntimeLeaderSelectorProvider;
import com.automq.log.uploader.S3LogConfig;
import com.automq.stream.s3.operator.BucketURI;
import com.automq.stream.s3.operator.ObjectStorage;
import com.automq.stream.s3.operator.ObjectStorageFactory;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ConnectS3LogConfig implements S3LogConfig {
private static final Logger LOGGER = LoggerFactory.getLogger(ConnectS3LogConfig.class);
private final boolean enable;
private final String clusterId;
private final int nodeId;
private final String bucketURI;
private ObjectStorage objectStorage;
private LeaderNodeSelector leaderNodeSelector;
public ConnectS3LogConfig(boolean enable, String clusterId, int nodeId, String bucketURI) {
this.enable = enable;
this.clusterId = clusterId;
this.nodeId = nodeId;
this.bucketURI = bucketURI;
}
@Override
public boolean isEnabled() {
return this.enable;
}
@Override
public String clusterId() {
return this.clusterId;
}
@Override
public int nodeId() {
return this.nodeId;
}
@Override
public synchronized ObjectStorage objectStorage() {
if (this.objectStorage != null) {
return this.objectStorage;
}
if (StringUtils.isBlank(bucketURI)) {
LOGGER.error("Mandatory log config bucketURI is not set.");
return null;
}
String normalizedBucket = bucketURI.trim();
BucketURI logBucket = BucketURI.parse(normalizedBucket);
this.objectStorage = ObjectStorageFactory.instance().builder(logBucket).threadPrefix("s3-log-uploader").build();
return this.objectStorage;
}
@Override
public boolean isLeader() {
LeaderNodeSelector selector = leaderSelector();
return selector != null && selector.isLeader();
}
public LeaderNodeSelector leaderSelector() {
if (leaderNodeSelector == null) {
this.leaderNodeSelector = new RuntimeLeaderSelectorProvider().createSelector();
}
return leaderNodeSelector;
}
}

View File

@ -0,0 +1,112 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.log;
import com.automq.log.uploader.S3LogConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
/**
* Provides S3 log uploader configuration for Kafka Connect workers.
*/
public class ConnectS3LogConfigProvider {
private static Logger getLogger() {
return LoggerFactory.getLogger(ConnectS3LogConfigProvider.class);
}
private static final AtomicReference<Properties> CONFIG = new AtomicReference<>();
private static final long WAIT_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(10);
private static final CountDownLatch INIT = new CountDownLatch(1);
public static void initialize(Properties workerProps) {
try {
if (workerProps == null) {
CONFIG.set(null);
return;
}
Properties copy = new Properties();
for (Map.Entry<Object, Object> entry : workerProps.entrySet()) {
if (entry.getKey() != null && entry.getValue() != null) {
copy.put(entry.getKey(), entry.getValue());
}
}
CONFIG.set(copy);
} finally {
INIT.countDown();
}
getLogger().info("Initializing ConnectS3LogConfigProvider");
}
public S3LogConfig get() {
try {
if (!INIT.await(WAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS)) {
getLogger().warn("S3 log uploader config not initialized within timeout; uploader disabled.");
}
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
getLogger().warn("Interrupted while waiting for S3 log uploader config; uploader disabled.");
return null;
}
Properties source = CONFIG.get();
if (source == null) {
getLogger().warn("S3 log upload configuration was not provided; uploader disabled.");
return null;
}
String bucketURI = source.getProperty(LogConfigConstants.LOG_S3_BUCKET_KEY);
String clusterId = source.getProperty(LogConfigConstants.LOG_S3_CLUSTER_ID_KEY);
String nodeIdStr = resolveNodeId(source);
boolean enable = Boolean.parseBoolean(source.getProperty(LogConfigConstants.LOG_S3_ENABLE_KEY, "false"));
return new ConnectS3LogConfig(enable, clusterId, Integer.parseInt(nodeIdStr), bucketURI);
}
private String resolveNodeId(Properties workerProps) {
String fromConfig = workerProps.getProperty(LogConfigConstants.LOG_S3_NODE_ID_KEY);
if (!isBlank(fromConfig)) {
return fromConfig.trim();
}
String env = System.getenv("CONNECT_NODE_ID");
if (!isBlank(env)) {
return env.trim();
}
String host = workerProps.getProperty("automq.log.s3.node.hostname");
if (isBlank(host)) {
try {
host = InetAddress.getLocalHost().getHostName();
} catch (Exception e) {
host = System.getenv().getOrDefault("HOSTNAME", "0");
}
}
return Integer.toString(host.hashCode() & Integer.MAX_VALUE);
}
private boolean isBlank(String value) {
return value == null || value.trim().isEmpty();
}
}

View File

@ -0,0 +1,30 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.log;
public class LogConfigConstants {
public static final String LOG_S3_ENABLE_KEY = "log.s3.enable";
public static final String LOG_S3_BUCKET_KEY = "log.s3.bucket";
public static final String LOG_S3_CLUSTER_ID_KEY = "log.s3.cluster.id";
public static final String LOG_S3_NODE_ID_KEY = "log.s3.node.id";
}

View File

@ -0,0 +1,77 @@
package org.apache.kafka.connect.automq.metrics;
import org.apache.kafka.connect.automq.runtime.LeaderNodeSelector;
import org.apache.kafka.connect.automq.runtime.RuntimeLeaderSelectorProvider;
import com.automq.opentelemetry.exporter.MetricsExportConfig;
import com.automq.stream.s3.operator.BucketURI;
import com.automq.stream.s3.operator.ObjectStorage;
import com.automq.stream.s3.operator.ObjectStorageFactory;
import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
public class ConnectMetricsExportConfig implements MetricsExportConfig {
private final BucketURI metricsBucket;
private final String clusterId;
private final int nodeId;
private final int intervalMs;
private final List<Pair<String, String>> baseLabels;
private ObjectStorage objectStorage;
private LeaderNodeSelector leaderNodeSelector;
public ConnectMetricsExportConfig(String clusterId, int nodeId, BucketURI metricsBucket, List<Pair<String, String>> baseLabels, int intervalMs) {
this.clusterId = clusterId;
this.nodeId = nodeId;
this.metricsBucket = metricsBucket;
this.baseLabels = baseLabels;
this.intervalMs = intervalMs;
}
@Override
public String clusterId() {
return this.clusterId;
}
@Override
public boolean isLeader() {
LeaderNodeSelector selector = leaderSelector();
return selector != null && selector.isLeader();
}
public LeaderNodeSelector leaderSelector() {
if (leaderNodeSelector == null) {
this.leaderNodeSelector = new RuntimeLeaderSelectorProvider().createSelector();
}
return leaderNodeSelector;
}
@Override
public int nodeId() {
return this.nodeId;
}
@Override
public ObjectStorage objectStorage() {
if (metricsBucket == null) {
return null;
}
if (this.objectStorage == null) {
this.objectStorage = ObjectStorageFactory.instance().builder(metricsBucket).threadPrefix("s3-metric").build();
}
return this.objectStorage;
}
@Override
public List<Pair<String, String>> baseLabels() {
return this.baseLabels;
}
@Override
public int intervalMs() {
return this.intervalMs;
}
}

View File

@ -0,0 +1,30 @@
package org.apache.kafka.connect.automq.metrics;
public class MetricsConfigConstants {
public static final String SERVICE_NAME_KEY = "service.name";
public static final String SERVICE_INSTANCE_ID_KEY = "service.instance.id";
public static final String S3_CLIENT_ID_KEY = "automq.telemetry.s3.cluster.id";
/**
* The URI for configuring metrics exporters. e.g. prometheus://localhost:9090, otlp://localhost:4317
*/
public static final String EXPORTER_URI_KEY = "automq.telemetry.exporter.uri";
/**
* The export interval in milliseconds.
*/
public static final String EXPORTER_INTERVAL_MS_KEY = "automq.telemetry.exporter.interval.ms";
/**
* The cardinality limit for any single metric.
*/
public static final String METRIC_CARDINALITY_LIMIT_KEY = "automq.telemetry.metric.cardinality.limit";
public static final int DEFAULT_METRIC_CARDINALITY_LIMIT = 20000;
public static final String TELEMETRY_METRICS_BASE_LABELS_CONFIG = "automq.telemetry.metrics.base.labels";
public static final String TELEMETRY_METRICS_BASE_LABELS_DOC = "The base labels that will be added to all metrics. The format is key1=value1,key2=value2.";
public static final String S3_BUCKET = "automq.telemetry.s3.bucket";
public static final String S3_BUCKETS_DOC = "The buckets url with format 0@s3://$bucket?region=$region. \n" +
"the full url format for s3 is 0@s3://$bucket?region=$region[&endpoint=$endpoint][&pathStyle=$enablePathStyle][&authType=$authType][&accessKey=$accessKey][&secretKey=$secretKey][&checksumAlgorithm=$checksumAlgorithm]" +
"- pathStyle: true|false. The object storage access path style. When using MinIO, it should be set to true.\n" +
"- authType: instance|static. When set to instance, it will use instance profile to auth. When set to static, it will get accessKey and secretKey from the url or from system environment KAFKA_S3_ACCESS_KEY/KAFKA_S3_SECRET_KEY.";
}

View File

@ -0,0 +1,822 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.metrics;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.metrics.KafkaMetric;
import org.apache.kafka.common.metrics.MetricsReporter;
import com.automq.opentelemetry.AutoMQTelemetryManager;
import com.automq.stream.s3.operator.BucketURI;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.metrics.Meter;
import io.opentelemetry.api.metrics.ObservableDoubleCounter;
import io.opentelemetry.api.metrics.ObservableDoubleGauge;
import io.opentelemetry.api.metrics.ObservableLongCounter;
/**
* A MetricsReporter implementation that bridges Kafka Connect metrics to OpenTelemetry.
*
* <p>This reporter integrates with the AutoMQ OpenTelemetry module to export Kafka Connect
* metrics through various exporters (Prometheus, OTLP, etc.). It automatically converts
* Kafka metrics to OpenTelemetry instruments based on metric types and provides proper
* labeling and naming conventions.
*
* <p>Key features:
* <ul>
* <li>Automatic metric type detection and conversion</li>
* <li>Support for gauges and counters using async observable instruments</li>
* <li>Proper attribute mapping from Kafka metric tags</li>
* <li>Integration with AutoMQ telemetry infrastructure</li>
* <li>Configurable metric filtering</li>
* <li>Real-time metric value updates through callbacks</li>
* </ul>
*
* <p>Configuration options:
* <ul>
* <li>{@code opentelemetry.metrics.enabled} - Enable/disable OpenTelemetry metrics (default: true)</li>
* <li>{@code opentelemetry.metrics.prefix} - Prefix for metric names (default: "kafka.connect")</li>
* <li>{@code opentelemetry.metrics.include.pattern} - Regex pattern for included metrics</li>
* <li>{@code opentelemetry.metrics.exclude.pattern} - Regex pattern for excluded metrics</li>
* </ul>
*/
public class OpenTelemetryMetricsReporter implements MetricsReporter {
private static final Logger LOGGER = LoggerFactory.getLogger(OpenTelemetryMetricsReporter.class);
private static final String ENABLED_CONFIG = "opentelemetry.metrics.enabled";
private static final String PREFIX_CONFIG = "opentelemetry.metrics.prefix";
private static final String INCLUDE_PATTERN_CONFIG = "opentelemetry.metrics.include.pattern";
private static final String EXCLUDE_PATTERN_CONFIG = "opentelemetry.metrics.exclude.pattern";
private static final String DEFAULT_PREFIX = "kafka";
private boolean enabled = true;
private String metricPrefix = DEFAULT_PREFIX;
private String includePattern = null;
private String excludePattern = null;
private Meter meter;
private final Map<String, AutoCloseable> observableHandles = new ConcurrentHashMap<>();
private final Map<String, KafkaMetric> registeredMetrics = new ConcurrentHashMap<>();
public static void initializeTelemetry(Properties props) {
String exportURIStr = props.getProperty(MetricsConfigConstants.EXPORTER_URI_KEY);
String serviceName = props.getProperty(MetricsConfigConstants.SERVICE_NAME_KEY, "connect-default");
String instanceId = props.getProperty(MetricsConfigConstants.SERVICE_INSTANCE_ID_KEY, "0");
String clusterId = props.getProperty(MetricsConfigConstants.S3_CLIENT_ID_KEY, "cluster-default");
int intervalMs = Integer.parseInt(props.getProperty(MetricsConfigConstants.EXPORTER_INTERVAL_MS_KEY, "60000"));
BucketURI metricsBucket = getMetricsBucket(props);
List<Pair<String, String>> baseLabels = getBaseLabels(props);
AutoMQTelemetryManager.initializeInstance(exportURIStr, serviceName, instanceId, new ConnectMetricsExportConfig(clusterId, Integer.parseInt(instanceId), metricsBucket, baseLabels, intervalMs));
LOGGER.info("OpenTelemetryMetricsReporter initialized");
}
private static BucketURI getMetricsBucket(Properties props) {
String metricsBucket = props.getProperty(MetricsConfigConstants.S3_BUCKET, "");
if (StringUtils.isNotBlank(metricsBucket)) {
List<BucketURI> bucketList = BucketURI.parseBuckets(metricsBucket);
if (!bucketList.isEmpty()) {
return bucketList.get(0);
}
}
return null;
}
private static List<Pair<String, String>> getBaseLabels(Properties props) {
// This part is hard to abstract without a clear config pattern.
// Assuming for now it's empty. The caller can extend this class
// or the manager can have a method to add more labels.
String baseLabels = props.getProperty(MetricsConfigConstants.TELEMETRY_METRICS_BASE_LABELS_CONFIG);
if (StringUtils.isBlank(baseLabels)) {
return Collections.emptyList();
}
List<Pair<String, String>> labels = new ArrayList<>();
for (String label : baseLabels.split(",")) {
String[] kv = label.split("=");
if (kv.length != 2) {
continue;
}
labels.add(Pair.of(kv[0], kv[1]));
}
return labels;
}
@Override
public void configure(Map<String, ?> configs) {
// Parse configuration
Object enabledObj = configs.get(ENABLED_CONFIG);
if (enabledObj != null) {
enabled = Boolean.parseBoolean(enabledObj.toString());
}
Object prefixObj = configs.get(PREFIX_CONFIG);
if (prefixObj != null) {
metricPrefix = prefixObj.toString();
}
Object includeObj = configs.get(INCLUDE_PATTERN_CONFIG);
if (includeObj != null) {
includePattern = includeObj.toString();
}
Object excludeObj = configs.get(EXCLUDE_PATTERN_CONFIG);
if (excludeObj != null) {
excludePattern = excludeObj.toString();
}
LOGGER.info("OpenTelemetryMetricsReporter configured - enabled: {}, prefix: {}, include: {}, exclude: {}",
enabled, metricPrefix, includePattern, excludePattern);
}
@Override
public void init(List<KafkaMetric> metrics) {
if (!enabled) {
LOGGER.info("OpenTelemetryMetricsReporter is disabled");
return;
}
try {
// Get the OpenTelemetry meter from AutoMQTelemetryManager
// This assumes the telemetry manager is already initialized
meter = AutoMQTelemetryManager.getInstance().getMeter();
if (meter == null) {
LOGGER.warn("AutoMQTelemetryManager is not initialized, OpenTelemetry metrics will not be available");
enabled = false;
return;
}
// Register initial metrics
for (KafkaMetric metric : metrics) {
registerMetric(metric);
}
LOGGER.info("OpenTelemetryMetricsReporter initialized with {} metrics", metrics.size());
} catch (Exception e) {
LOGGER.error("Failed to initialize OpenTelemetryMetricsReporter", e);
enabled = false;
}
}
@Override
public void metricChange(KafkaMetric metric) {
if (!enabled || meter == null) {
return;
}
try {
registerMetric(metric);
} catch (Exception e) {
LOGGER.warn("Failed to register metric change for {}", metric.metricName(), e);
}
}
@Override
public void metricRemoval(KafkaMetric metric) {
if (!enabled) {
return;
}
try {
String metricKey = buildMetricKey(metric.metricName());
closeHandle(metricKey);
registeredMetrics.remove(metricKey);
LOGGER.debug("Removed metric: {}", metricKey);
} catch (Exception e) {
LOGGER.warn("Failed to remove metric {}", metric.metricName(), e);
}
}
@Override
public void close() {
if (enabled) {
// Close all observable handles to prevent memory leaks
observableHandles.values().forEach(handle -> {
try {
handle.close();
} catch (Exception e) {
LOGGER.debug("Error closing observable handle", e);
}
});
observableHandles.clear();
registeredMetrics.clear();
}
LOGGER.info("OpenTelemetryMetricsReporter closed");
}
private void registerMetric(KafkaMetric metric) {
LOGGER.debug("OpenTelemetryMetricsReporter registering metric {}", metric.metricName());
MetricName metricName = metric.metricName();
String metricKey = buildMetricKey(metricName);
// Apply filtering
if (!shouldIncludeMetric(metricKey)) {
return;
}
// Check if metric value is numeric at registration time
Object testValue = safeMetricValue(metric);
if (!(testValue instanceof Number)) {
LOGGER.debug("Skipping non-numeric metric: {}", metricKey);
return;
}
Attributes attributes = buildAttributes(metricName);
// Close existing handle if present (for metric updates)
closeHandle(metricKey);
// Register the metric for future access
registeredMetrics.put(metricKey, metric);
// Determine metric type and register accordingly
if (isCounterMetric(metricName)) {
registerAsyncCounter(metricKey, metricName, metric, attributes, (Number) testValue);
} else {
registerAsyncGauge(metricKey, metricName, metric, attributes);
}
}
private void registerAsyncGauge(String metricKey, MetricName metricName, KafkaMetric metric, Attributes attributes) {
try {
String description = buildDescription(metricName);
String unit = determineUnit(metricName);
ObservableDoubleGauge gauge = meter.gaugeBuilder(metricKey)
.setDescription(description)
.setUnit(unit)
.buildWithCallback(measurement -> {
Number value = (Number) safeMetricValue(metric);
if (value != null) {
measurement.record(value.doubleValue(), attributes);
}
});
observableHandles.put(metricKey, gauge);
LOGGER.debug("Registered async gauge: {}", metricKey);
} catch (Exception e) {
LOGGER.warn("Failed to register async gauge for {}", metricKey, e);
}
}
private void registerAsyncCounter(String metricKey, MetricName metricName, KafkaMetric metric,
Attributes attributes, Number initialValue) {
try {
String description = buildDescription(metricName);
String unit = determineUnit(metricName);
// Use appropriate counter type based on initial value type
if (initialValue instanceof Long || initialValue instanceof Integer) {
ObservableLongCounter counter = meter.counterBuilder(metricKey)
.setDescription(description)
.setUnit(unit)
.buildWithCallback(measurement -> {
Number value = (Number) safeMetricValue(metric);
if (value != null) {
long longValue = value.longValue();
if (longValue >= 0) {
measurement.record(longValue, attributes);
}
}
});
observableHandles.put(metricKey, counter);
} else {
ObservableDoubleCounter counter = meter.counterBuilder(metricKey)
.ofDoubles()
.setDescription(description)
.setUnit(unit)
.buildWithCallback(measurement -> {
Number value = (Number) safeMetricValue(metric);
if (value != null) {
double doubleValue = value.doubleValue();
if (doubleValue >= 0) {
measurement.record(doubleValue, attributes);
}
}
});
observableHandles.put(metricKey, counter);
}
LOGGER.debug("Registered async counter: {}", metricKey);
} catch (Exception e) {
LOGGER.warn("Failed to register async counter for {}", metricKey, e);
}
}
private Object safeMetricValue(KafkaMetric metric) {
try {
return metric.metricValue();
} catch (Exception e) {
LOGGER.debug("Failed to read metric value for {}", metric.metricName(), e);
return null;
}
}
private void closeHandle(String metricKey) {
AutoCloseable handle = observableHandles.remove(metricKey);
if (handle != null) {
try {
handle.close();
} catch (Exception e) {
LOGGER.debug("Error closing handle for {}", metricKey, e);
}
}
}
private String buildMetricKey(MetricName metricName) {
StringBuilder sb = new StringBuilder(metricPrefix);
sb.append(".");
// Add group if present
if (metricName.group() != null && !metricName.group().isEmpty()) {
sb.append(metricName.group().replace("-", "_").toLowerCase(Locale.ROOT));
sb.append(".");
}
// Add name
sb.append(metricName.name().replace("-", "_").toLowerCase(Locale.ROOT));
return sb.toString();
}
private Attributes buildAttributes(MetricName metricName) {
AttributesBuilder builder = Attributes.builder();
// Add metric tags as attributes
Map<String, String> tags = metricName.tags();
if (tags != null) {
for (Map.Entry<String, String> entry : tags.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
if (key != null && value != null) {
builder.put(sanitizeAttributeKey(key), value);
}
}
}
// Add standard attributes
if (metricName.group() != null) {
builder.put("metric.group", metricName.group());
}
return builder.build();
}
private String sanitizeAttributeKey(String key) {
return key.replace("-", "_").replace(".", "_").toLowerCase(Locale.ROOT);
}
private String buildDescription(MetricName metricName) {
StringBuilder description = new StringBuilder();
description.append("Kafka Connect metric: ");
if (metricName.group() != null) {
description.append(metricName.group()).append(" - ");
}
description.append(metricName.name());
return description.toString();
}
private String determineUnit(MetricName metricName) {
String name = metricName.name().toLowerCase(Locale.ROOT);
String group = metricName.group() != null ? metricName.group().toLowerCase(Locale.ROOT) : "";
if (isKafkaConnectMetric(group)) {
return determineConnectMetricUnit(name);
}
if (isTimeMetric(name)) {
return determineTimeUnit(name);
}
if (isBytesMetric(name)) {
return determineBytesUnit(name);
}
if (isRateMetric(name)) {
return "1/s";
}
if (isRatioOrPercentageMetric(name)) {
return "1";
}
if (isCountMetric(name)) {
return "1";
}
return "1";
}
private boolean isCounterMetric(MetricName metricName) {
String name = metricName.name().toLowerCase(Locale.ROOT);
String group = metricName.group() != null ? metricName.group().toLowerCase(Locale.ROOT) : "";
if (isKafkaConnectMetric(group)) {
return isConnectCounterMetric(name);
}
if (isGaugeMetric(name)) {
return false;
}
return hasCounterKeywords(name);
}
private boolean isGaugeMetric(String name) {
return hasRateOrAvgKeywords(name) || hasRatioOrPercentKeywords(name) ||
hasMinMaxOrCurrentKeywords(name) || hasActiveOrSizeKeywords(name) ||
hasTimeButNotTotal(name);
}
private boolean hasRateOrAvgKeywords(String name) {
return name.contains("rate") || name.contains("avg") || name.contains("mean");
}
private boolean hasRatioOrPercentKeywords(String name) {
return name.contains("ratio") || name.contains("percent") || name.contains("pct");
}
private boolean hasMinMaxOrCurrentKeywords(String name) {
return name.contains("max") || name.contains("min") || name.contains("current");
}
private boolean hasActiveOrSizeKeywords(String name) {
return name.contains("active") || name.contains("lag") || name.contains("size");
}
private boolean hasTimeButNotTotal(String name) {
return name.contains("time") && !name.contains("total");
}
private boolean hasCounterKeywords(String name) {
String[] parts = name.split("[._-]");
for (String part : parts) {
if (isCounterKeyword(part)) {
return true;
}
}
return false;
}
private boolean isCounterKeyword(String part) {
return isBasicCounterKeyword(part) || isAdvancedCounterKeyword(part);
}
private boolean isBasicCounterKeyword(String part) {
return "total".equals(part) || "count".equals(part) || "sum".equals(part) ||
"attempts".equals(part);
}
private boolean isAdvancedCounterKeyword(String part) {
return "success".equals(part) || "failure".equals(part) ||
"errors".equals(part) || "retries".equals(part) || "skipped".equals(part);
}
private boolean isConnectCounterMetric(String name) {
if (hasTotalBasedCounters(name)) {
return true;
}
if (hasRecordCounters(name)) {
return true;
}
if (hasActiveCountMetrics(name)) {
return false;
}
return false;
}
private boolean hasTotalBasedCounters(String name) {
return hasBasicTotalCounters(name) || hasSuccessFailureCounters(name) ||
hasErrorRetryCounters(name) || hasRequestCompletionCounters(name);
}
private boolean hasBasicTotalCounters(String name) {
return name.contains("total") || name.contains("attempts");
}
private boolean hasSuccessFailureCounters(String name) {
return (name.contains("success") && name.contains("total")) ||
(name.contains("failure") && name.contains("total"));
}
private boolean hasErrorRetryCounters(String name) {
return name.contains("errors") || name.contains("retries") || name.contains("skipped");
}
private boolean hasRequestCompletionCounters(String name) {
return name.contains("requests") || name.contains("completions");
}
private boolean hasRecordCounters(String name) {
return hasRecordKeyword(name) && hasTotalOperation(name);
}
private boolean hasRecordKeyword(String name) {
return name.contains("record") || name.contains("records");
}
private boolean hasTotalOperation(String name) {
return hasPollWriteTotal(name) || hasReadSendTotal(name);
}
private boolean hasPollWriteTotal(String name) {
return name.contains("poll-total") || name.contains("write-total");
}
private boolean hasReadSendTotal(String name) {
return name.contains("read-total") || name.contains("send-total");
}
private boolean hasActiveCountMetrics(String name) {
return hasCountMetrics(name) || hasSequenceMetrics(name);
}
private boolean hasCountMetrics(String name) {
return hasActiveTaskCount(name) || hasConnectorCount(name) || hasStatusCount(name);
}
private boolean hasActiveTaskCount(String name) {
return name.contains("active-count") || name.contains("partition-count") ||
name.contains("task-count");
}
private boolean hasConnectorCount(String name) {
return name.contains("connector-count") || name.contains("running-count");
}
private boolean hasStatusCount(String name) {
return name.contains("paused-count") || name.contains("failed-count");
}
private boolean hasSequenceMetrics(String name) {
return name.contains("seq-no") || name.contains("seq-num");
}
private boolean isKafkaConnectMetric(String group) {
return group.contains("connector") || group.contains("task") ||
group.contains("connect") || group.contains("worker");
}
private String determineConnectMetricUnit(String name) {
String timeUnit = getTimeUnit(name);
if (timeUnit != null) {
return timeUnit;
}
String countUnit = getCountUnit(name);
if (countUnit != null) {
return countUnit;
}
String specialUnit = getSpecialUnit(name);
if (specialUnit != null) {
return specialUnit;
}
return "1";
}
private String getTimeUnit(String name) {
if (isTimeBasedMetric(name)) {
return "ms";
}
if (isTimestampMetric(name)) {
return "ms";
}
if (isTimeSinceMetric(name)) {
return "ms";
}
return null;
}
private String getCountUnit(String name) {
if (isSequenceOrCountMetric(name)) {
return "1";
}
if (isLagMetric(name)) {
return "1";
}
if (isTotalOrCounterMetric(name)) {
return "1";
}
return null;
}
private String getSpecialUnit(String name) {
if (isStatusOrMetadataMetric(name)) {
return "1";
}
if (isConnectRateMetric(name)) {
return "1/s";
}
if (isRatioMetric(name)) {
return "1";
}
return null;
}
private boolean isTimeBasedMetric(String name) {
return hasTimeMs(name) || hasCommitBatchTime(name);
}
private boolean hasTimeMs(String name) {
return name.endsWith("-time-ms") || name.endsWith("-avg-time-ms") ||
name.endsWith("-max-time-ms");
}
private boolean hasCommitBatchTime(String name) {
return name.contains("commit-time") || name.contains("batch-time") ||
name.contains("rebalance-time");
}
private boolean isSequenceOrCountMetric(String name) {
return hasSequenceNumbers(name) || hasCountSuffix(name);
}
private boolean hasSequenceNumbers(String name) {
return name.contains("seq-no") || name.contains("seq-num");
}
private boolean hasCountSuffix(String name) {
return name.endsWith("-count") || name.contains("task-count") ||
name.contains("partition-count");
}
private boolean isLagMetric(String name) {
return name.contains("lag");
}
private boolean isStatusOrMetadataMetric(String name) {
return isStatusMetric(name) || hasProtocolLeaderMetrics(name) ||
hasConnectorMetrics(name);
}
private boolean isStatusMetric(String name) {
return "status".equals(name) || name.contains("protocol");
}
private boolean hasProtocolLeaderMetrics(String name) {
return name.contains("leader-name");
}
private boolean hasConnectorMetrics(String name) {
return name.contains("connector-type") || name.contains("connector-class") ||
name.contains("connector-version");
}
private boolean isRatioMetric(String name) {
return name.contains("ratio") || name.contains("percentage");
}
private boolean isTotalOrCounterMetric(String name) {
return hasTotalSum(name) || hasAttempts(name) || hasSuccessFailure(name) ||
hasErrorsRetries(name);
}
private boolean hasTotalSum(String name) {
return name.contains("total") || name.contains("sum");
}
private boolean hasAttempts(String name) {
return name.contains("attempts");
}
private boolean hasSuccessFailure(String name) {
return name.contains("success") || name.contains("failure");
}
private boolean hasErrorsRetries(String name) {
return name.contains("errors") || name.contains("retries") || name.contains("skipped");
}
private boolean isTimestampMetric(String name) {
return name.contains("timestamp") || name.contains("epoch");
}
private boolean isConnectRateMetric(String name) {
return name.contains("rate") && !name.contains("ratio");
}
private boolean isTimeSinceMetric(String name) {
return name.contains("time-since-last") || name.contains("since-last");
}
private boolean isTimeMetric(String name) {
return hasTimeKeywords(name) && !hasTimeExclusions(name);
}
private boolean hasTimeKeywords(String name) {
return name.contains("time") || name.contains("latency") ||
name.contains("duration");
}
private boolean hasTimeExclusions(String name) {
return name.contains("ratio") || name.contains("rate") ||
name.contains("count") || name.contains("since-last");
}
private String determineTimeUnit(String name) {
if (name.contains("ms") || name.contains("millisecond")) {
return "ms";
} else if (name.contains("us") || name.contains("microsecond")) {
return "us";
} else if (name.contains("ns") || name.contains("nanosecond")) {
return "ns";
} else if (name.contains("s") && !name.contains("ms")) {
return "s";
} else {
return "ms";
}
}
private boolean isBytesMetric(String name) {
return name.contains("byte") || name.contains("bytes") ||
name.contains("size") && !name.contains("batch-size");
}
private String determineBytesUnit(String name) {
boolean isRate = name.contains("rate") || name.contains("per-sec") ||
name.contains("persec") || name.contains("/s");
return isRate ? "By/s" : "By";
}
private boolean isRateMetric(String name) {
return hasRateKeywords(name) && !hasExcludedKeywords(name);
}
private boolean hasRateKeywords(String name) {
return name.contains("rate") || name.contains("per-sec") ||
name.contains("persec") || name.contains("/s");
}
private boolean hasExcludedKeywords(String name) {
return name.contains("byte") || name.contains("ratio");
}
private boolean isRatioOrPercentageMetric(String name) {
return hasPercentKeywords(name) || hasRatioKeywords(name);
}
private boolean hasPercentKeywords(String name) {
return name.contains("percent") || name.contains("pct");
}
private boolean hasRatioKeywords(String name) {
return name.contains("ratio");
}
private boolean isCountMetric(String name) {
return name.contains("count") || name.contains("total") ||
name.contains("sum") || name.endsWith("-num");
}
private boolean shouldIncludeMetric(String metricKey) {
if (excludePattern != null && metricKey.matches(excludePattern)) {
return false;
}
if (includePattern != null) {
return metricKey.matches(includePattern);
}
return true;
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.runtime;
/**
* An interface for determining which node should be responsible for clean metrics.
* This abstraction allows different implementations of clean node selection strategies.
*/
public interface LeaderNodeSelector {
/**
* Determines if the current node should be responsible for clean metrics.
*
* @return true if the current node should clean metrics, false otherwise.
*/
boolean isLeader();
}

View File

@ -0,0 +1,36 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.runtime;
/**
* SPI interface for providing custom LeaderNodeSelector implementations.
* Third-party libraries can implement this interface and register their implementations
* using Java's ServiceLoader mechanism.
*/
public interface LeaderNodeSelectorProvider {
/**
* Creates a new LeaderNodeSelector instance based on the provided configuration.
*
* @return A new LeaderNodeSelector instance
* @throws Exception If the selector cannot be created
*/
LeaderNodeSelector createSelector() throws Exception;
}

View File

@ -0,0 +1,46 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.runtime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.function.BooleanSupplier;
/**
* Stores runtime-provided suppliers that answer whether the current process
* should act as the leader.
*/
public final class RuntimeLeaderRegistry {
private static final Logger LOGGER = LoggerFactory.getLogger(RuntimeLeaderRegistry.class);
private static BooleanSupplier supplier = () -> false;
private RuntimeLeaderRegistry() {
}
public static void register(BooleanSupplier supplier) {
RuntimeLeaderRegistry.supplier = supplier;
LOGGER.info("Registered runtime leader supplier for log metrics.");
}
public static BooleanSupplier supplier() {
return supplier;
}
}

View File

@ -0,0 +1,74 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.automq.runtime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BooleanSupplier;
public class RuntimeLeaderSelectorProvider implements LeaderNodeSelectorProvider {
private static final Logger LOGGER = LoggerFactory.getLogger(RuntimeLeaderSelectorProvider.class);
@Override
public LeaderNodeSelector createSelector() {
final AtomicBoolean missingLogged = new AtomicBoolean(false);
final AtomicBoolean leaderLogged = new AtomicBoolean(false);
return () -> {
BooleanSupplier current = org.apache.kafka.connect.automq.runtime.RuntimeLeaderRegistry.supplier();
if (current == null) {
if (missingLogged.compareAndSet(false, true)) {
LOGGER.warn("leader supplier for key not yet available; treating node as follower until registration happens.");
}
if (leaderLogged.getAndSet(false)) {
LOGGER.info("Node stepped down from leadership because supplier is unavailable.");
}
return false;
}
if (missingLogged.get()) {
missingLogged.set(false);
LOGGER.info("leader supplier is now available.");
}
try {
boolean leader = current.getAsBoolean();
if (leader) {
if (!leaderLogged.getAndSet(true)) {
LOGGER.info("Node became leader");
}
} else {
if (leaderLogged.getAndSet(false)) {
LOGGER.info("Node stepped down from leadership");
}
}
return leader;
} catch (RuntimeException e) {
if (leaderLogged.getAndSet(false)) {
LOGGER.info("Node stepped down from leadership due to supplier exception.");
}
LOGGER.warn("leader supplier threw exception. Treating as follower.", e);
return false;
}
};
}
}

View File

@ -19,6 +19,9 @@ package org.apache.kafka.connect.cli;
import org.apache.kafka.common.utils.Exit;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.automq.az.AzMetadataProviderHolder;
import org.apache.kafka.connect.automq.log.ConnectLogUploader;
import org.apache.kafka.connect.automq.metrics.OpenTelemetryMetricsReporter;
import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
import org.apache.kafka.connect.runtime.Connect;
import org.apache.kafka.connect.runtime.Herder;
@ -36,6 +39,7 @@ import java.net.URI;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
/**
* Common initialization logic for Kafka Connect, intended for use by command line utilities
@ -45,7 +49,9 @@ import java.util.Map;
*/
public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfig> {
private static final Logger log = LoggerFactory.getLogger(AbstractConnectCli.class);
private static Logger getLogger() {
return LoggerFactory.getLogger(AbstractConnectCli.class);
}
private final String[] args;
private final Time time = Time.SYSTEM;
@ -83,7 +89,6 @@ public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfi
*/
public void run() {
if (args.length < 1 || Arrays.asList(args).contains("--help")) {
log.info("Usage: {}", usage());
Exit.exit(1);
}
@ -92,6 +97,17 @@ public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfi
Map<String, String> workerProps = !workerPropsFile.isEmpty() ?
Utils.propsToStringMap(Utils.loadProps(workerPropsFile)) : Collections.emptyMap();
String[] extraArgs = Arrays.copyOfRange(args, 1, args.length);
// AutoMQ inject start
// Initialize S3 log uploader and OpenTelemetry with worker properties
ConnectLogUploader.initialize(workerProps);
AzMetadataProviderHolder.initialize(workerProps);
Properties telemetryProps = new Properties();
telemetryProps.putAll(workerProps);
OpenTelemetryMetricsReporter.initializeTelemetry(telemetryProps);
// AutoMQ inject end
Connect<H> connect = startConnect(workerProps);
processExtraArgs(connect, extraArgs);
@ -99,7 +115,7 @@ public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfi
connect.awaitStop();
} catch (Throwable t) {
log.error("Stopping due to error", t);
getLogger().error("Stopping due to error", t);
Exit.exit(2);
}
}
@ -111,17 +127,17 @@ public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfi
* @return a started instance of {@link Connect}
*/
public Connect<H> startConnect(Map<String, String> workerProps) {
log.info("Kafka Connect worker initializing ...");
getLogger().info("Kafka Connect worker initializing ...");
long initStart = time.hiResClockMs();
WorkerInfo initInfo = new WorkerInfo();
initInfo.logAll();
log.info("Scanning for plugin classes. This might take a moment ...");
getLogger().info("Scanning for plugin classes. This might take a moment ...");
Plugins plugins = new Plugins(workerProps);
plugins.compareAndSwapWithDelegatingLoader();
T config = createConfig(workerProps);
log.debug("Kafka cluster ID: {}", config.kafkaClusterId());
getLogger().debug("Kafka cluster ID: {}", config.kafkaClusterId());
RestClient restClient = new RestClient(config);
@ -138,11 +154,11 @@ public abstract class AbstractConnectCli<H extends Herder, T extends WorkerConfi
H herder = createHerder(config, workerId, plugins, connectorClientConfigOverridePolicy, restServer, restClient);
final Connect<H> connect = new Connect<>(herder, restServer);
log.info("Kafka Connect worker initialization took {}ms", time.hiResClockMs() - initStart);
getLogger().info("Kafka Connect worker initialization took {}ms", time.hiResClockMs() - initStart);
try {
connect.start();
} catch (Exception e) {
log.error("Failed to start Connect", e);
getLogger().error("Failed to start Connect", e);
connect.stop();
Exit.exit(3);
}

View File

@ -17,6 +17,7 @@
package org.apache.kafka.connect.cli;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.automq.runtime.RuntimeLeaderRegistry;
import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.json.JsonConverterConfig;
@ -39,6 +40,7 @@ import org.apache.kafka.connect.util.SharedTopicAdmin;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.function.BooleanSupplier;
import static org.apache.kafka.clients.CommonClientConfigs.CLIENT_ID_CONFIG;
@ -96,10 +98,16 @@ public class ConnectDistributed extends AbstractConnectCli<DistributedHerder, Di
// Pass the shared admin to the distributed herder as an additional AutoCloseable object that should be closed when the
// herder is stopped. This is easier than having to track and own the lifecycle ourselves.
return new DistributedHerder(config, Time.SYSTEM, worker,
DistributedHerder herder = new DistributedHerder(config, Time.SYSTEM, worker,
kafkaClusterId, statusBackingStore, configBackingStore,
restServer.advertisedUrl().toString(), restClient, connectorClientConfigOverridePolicy,
Collections.emptyList(), sharedAdmin);
// AutoMQ for Kafka connect inject start
BooleanSupplier leaderSupplier = herder::isLeaderInstance;
RuntimeLeaderRegistry.register(leaderSupplier);
// AutoMQ for Kafka connect inject end
return herder;
}
@Override

View File

@ -21,6 +21,8 @@ import org.apache.kafka.connect.runtime.distributed.DistributedHerder;
import org.apache.kafka.connect.runtime.rest.ConnectRestServer;
import org.apache.kafka.connect.runtime.rest.RestServer;
import com.automq.log.S3RollingFileAppender;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -115,6 +117,9 @@ public class Connect<H extends Herder> {
try {
startLatch.await();
Connect.this.stop();
// AutoMQ inject start
S3RollingFileAppender.shutdown();
// AutoMQ inject end
} catch (InterruptedException e) {
log.error("Interrupted in shutdown hook while waiting for Kafka Connect startup to finish");
}

View File

@ -48,6 +48,7 @@ import org.apache.kafka.common.utils.ThreadUtils;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Timer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.automq.az.AzAwareClientConfigurator;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.connector.Connector;
import org.apache.kafka.connect.connector.Task;
@ -841,6 +842,10 @@ public class Worker {
connectorClientConfigOverridePolicy);
producerProps.putAll(producerOverrides);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyProducerAz(producerProps, defaultClientId);
// AutoMQ for Kafka inject end
return producerProps;
}
@ -909,6 +914,10 @@ public class Worker {
connectorClientConfigOverridePolicy);
consumerProps.putAll(consumerOverrides);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyConsumerAz(consumerProps, defaultClientId);
// AutoMQ for Kafka inject end
return consumerProps;
}
@ -938,6 +947,10 @@ public class Worker {
// Admin client-specific overrides in the worker config
adminProps.putAll(config.originalsWithPrefix("admin."));
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyAdminAz(adminProps, defaultClientId);
// AutoMQ for Kafka inject end
// Connector-specified overrides
Map<String, Object> adminOverrides =
connectorClientConfigOverrides(connName, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX,

View File

@ -1735,6 +1735,12 @@ public class DistributedHerder extends AbstractHerder implements Runnable {
configBackingStore.putLoggerLevel(namespace, level);
}
// AutoMQ inject start
public boolean isLeaderInstance() {
return isLeader();
}
// AutoMQ inject end
// Should only be called from work thread, so synchronization should not be needed
protected boolean isLeader() {
return assignment != null && member.memberId().equals(assignment.leader());

View File

@ -35,6 +35,7 @@ import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Timer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.automq.az.AzAwareClientConfigurator;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
@ -440,6 +441,9 @@ public class KafkaConfigBackingStore extends KafkaTopicBasedBackingStore impleme
Map<String, Object> result = new HashMap<>(baseProducerProps(workerConfig));
result.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId + "-leader");
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyProducerAz(result, "config-log-leader");
// AutoMQ for Kafka inject end
// Always require producer acks to all to ensure durable writes
result.put(ProducerConfig.ACKS_CONFIG, "all");
// We can set this to 5 instead of 1 without risking reordering because we are using an idempotent producer
@ -773,11 +777,17 @@ public class KafkaConfigBackingStore extends KafkaTopicBasedBackingStore impleme
Map<String, Object> producerProps = new HashMap<>(baseProducerProps);
producerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyProducerAz(producerProps, "config-log");
// AutoMQ for Kafka inject end
Map<String, Object> consumerProps = new HashMap<>(originals);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
consumerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyConsumerAz(consumerProps, "config-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
if (config.exactlyOnceSourceEnabled()) {
ConnectUtils.ensureProperty(
@ -790,6 +800,9 @@ public class KafkaConfigBackingStore extends KafkaTopicBasedBackingStore impleme
Map<String, Object> adminProps = new HashMap<>(originals);
ConnectUtils.addMetricsContextProperties(adminProps, config, clusterId);
adminProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyAdminAz(adminProps, "config-log");
// AutoMQ for Kafka inject end
Map<String, Object> topicSettings = config instanceof DistributedConfig
? ((DistributedConfig) config).configStorageTopicSettings()

View File

@ -30,6 +30,7 @@ import org.apache.kafka.common.errors.UnsupportedVersionException;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.automq.az.AzAwareClientConfigurator;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.runtime.WorkerConfig;
import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
@ -192,12 +193,18 @@ public class KafkaOffsetBackingStore extends KafkaTopicBasedBackingStore impleme
// gets approved and scheduled for release.
producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
producerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyProducerAz(producerProps, "offset-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(producerProps, config, clusterId);
Map<String, Object> consumerProps = new HashMap<>(originals);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
consumerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyConsumerAz(consumerProps, "offset-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
if (config.exactlyOnceSourceEnabled()) {
ConnectUtils.ensureProperty(
@ -209,6 +216,9 @@ public class KafkaOffsetBackingStore extends KafkaTopicBasedBackingStore impleme
Map<String, Object> adminProps = new HashMap<>(originals);
adminProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyAdminAz(adminProps, "offset-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(adminProps, config, clusterId);
NewTopic topicDescription = newTopicDescription(topic, config);

View File

@ -30,6 +30,7 @@ import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.ThreadUtils;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.automq.az.AzAwareClientConfigurator;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
@ -183,16 +184,25 @@ public class KafkaStatusBackingStore extends KafkaTopicBasedBackingStore impleme
// gets approved and scheduled for release.
producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false"); // disable idempotence since retries is force to 0
producerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyProducerAz(producerProps, "status-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(producerProps, config, clusterId);
Map<String, Object> consumerProps = new HashMap<>(originals);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
consumerProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyConsumerAz(consumerProps, "status-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
Map<String, Object> adminProps = new HashMap<>(originals);
adminProps.put(CommonClientConfigs.CLIENT_ID_CONFIG, clientId);
// AutoMQ for Kafka inject start
AzAwareClientConfigurator.maybeApplyAdminAz(adminProps, "status-log");
// AutoMQ for Kafka inject end
ConnectUtils.addMetricsContextProperties(adminProps, config, clusterId);
Map<String, Object> topicSettings = config instanceof DistributedConfig

View File

@ -0,0 +1,115 @@
package org.apache.kafka.connect.automq;
import org.apache.kafka.clients.admin.AdminClientConfig;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.connect.automq.az.AzAwareClientConfigurator;
import org.apache.kafka.connect.automq.az.AzMetadataProvider;
import org.apache.kafka.connect.automq.az.AzMetadataProviderHolder;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
class AzAwareClientConfiguratorTest {
@AfterEach
void resetProvider() {
AzMetadataProviderHolder.setProviderForTest(null);
}
@Test
void shouldDecorateProducerClientId() {
AzMetadataProviderHolder.setProviderForTest(new FixedAzProvider("us-east-1a"));
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-1");
AzAwareClientConfigurator.maybeApplyProducerAz(props, "producer-1");
assertEquals("automq_type=producer&automq_role=producer-1&automq_az=us-east-1a&producer-1",
props.get(ProducerConfig.CLIENT_ID_CONFIG));
}
@Test
void shouldPreserveCustomClientIdInAzConfig() {
AzMetadataProviderHolder.setProviderForTest(new FixedAzProvider("us-east-1a"));
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "custom-id");
AzAwareClientConfigurator.maybeApplyProducerAz(props, "producer-1");
assertEquals("automq_type=producer&automq_role=producer-1&automq_az=us-east-1a&custom-id",
props.get(ProducerConfig.CLIENT_ID_CONFIG));
}
@Test
void shouldAssignRackForConsumers() {
AzMetadataProviderHolder.setProviderForTest(new FixedAzProvider("us-west-2c"));
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.CLIENT_ID_CONFIG, "consumer-1");
AzAwareClientConfigurator.maybeApplyConsumerAz(props, "consumer-1");
assertEquals("us-west-2c", props.get(ConsumerConfig.CLIENT_RACK_CONFIG));
}
@Test
void shouldDecorateAdminClientId() {
AzMetadataProviderHolder.setProviderForTest(new FixedAzProvider("eu-west-1b"));
Map<String, Object> props = new HashMap<>();
props.put(AdminClientConfig.CLIENT_ID_CONFIG, "admin-1");
AzAwareClientConfigurator.maybeApplyAdminAz(props, "admin-1");
assertEquals("automq_type=admin&automq_role=admin-1&automq_az=eu-west-1b&admin-1",
props.get(AdminClientConfig.CLIENT_ID_CONFIG));
}
@Test
void shouldLeaveClientIdWhenAzUnavailable() {
AzMetadataProviderHolder.setProviderForTest(new AzMetadataProvider() {
@Override
public Optional<String> availabilityZoneId() {
return Optional.empty();
}
});
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-1");
AzAwareClientConfigurator.maybeApplyProducerAz(props, "producer-1");
assertEquals("producer-1", props.get(ProducerConfig.CLIENT_ID_CONFIG));
assertFalse(props.containsKey(ConsumerConfig.CLIENT_RACK_CONFIG));
}
@Test
void shouldEncodeSpecialCharactersInClientId() {
AzMetadataProviderHolder.setProviderForTest(new FixedAzProvider("us-east-1a"));
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "client-with-spaces & symbols");
AzAwareClientConfigurator.maybeApplyProducerAz(props, "test-role");
assertEquals("automq_type=producer&automq_role=test-role&automq_az=us-east-1a&client-with-spaces & symbols",
props.get(ProducerConfig.CLIENT_ID_CONFIG));
}
private static final class FixedAzProvider implements AzMetadataProvider {
private final String az;
private FixedAzProvider(String az) {
this.az = az;
}
@Override
public Optional<String> availabilityZoneId() {
return Optional.ofNullable(az);
}
}
}

View File

@ -87,6 +87,28 @@ index 717a36c21f..ea5eb74efb 100644
USER 1001
ENTRYPOINT [ "/opt/bitnami/scripts/kafka/entrypoint.sh" ]
CMD [ "/opt/bitnami/scripts/kafka/run.sh" ]
diff --git a/container/bitnami/prebuildfs/opt/bitnami/scripts/libbitnami.sh b/container/bitnami/prebuildfs/opt/bitnami/scripts/libbitnami.sh
index 00d053b521..09e3d3084d 100644
--- a/container/bitnami/prebuildfs/opt/bitnami/scripts/libbitnami.sh
+++ b/container/bitnami/prebuildfs/opt/bitnami/scripts/libbitnami.sh
@@ -42,12 +42,13 @@ print_welcome_page() {
# None
#########################
print_image_welcome_page() {
- local github_url="https://github.com/bitnami/containers"
+ local docs_url="https://www.automq.com/docs/automq/deployment/deploy-multi-nodes-cluster-on-kubernetes"
info ""
- info "${BOLD}Welcome to the Bitnami ${BITNAMI_APP_NAME} container${RESET}"
- info "Subscribe to project updates by watching ${BOLD}${github_url}${RESET}"
- info "Did you know there are enterprise versions of the Bitnami catalog? For enhanced secure software supply chain features, unlimited pulls from Docker, LTS support, or application customization, see Bitnami Premium or Tanzu Application Catalog. See https://www.arrow.com/globalecs/na/vendors/bitnami/ for more information."
+ info "${BOLD}Welcome to the AutoMQ for Apache Kafka on Bitnami Container${RESET}"
+ info "${BOLD}This image is compatible with Bitnami's container standards.${RESET}"
+ info "Refer to the documentation for complete configuration and Kubernetes deployment guidelines:"
+ info "${BOLD}${docs_url}${RESET}"
info ""
}
diff --git a/container/bitnami/rootfs/opt/bitnami/scripts/kafka/postunpack.sh b/container/bitnami/rootfs/opt/bitnami/scripts/kafka/postunpack.sh
index 7255563236..673c84e721 100644
--- a/container/bitnami/rootfs/opt/bitnami/scripts/kafka/postunpack.sh

View File

@ -42,12 +42,13 @@ print_welcome_page() {
# None
#########################
print_image_welcome_page() {
local github_url="https://github.com/bitnami/containers"
local docs_url="https://www.automq.com/docs/automq/deployment/deploy-multi-nodes-cluster-on-kubernetes"
info ""
info "${BOLD}Welcome to the Bitnami ${BITNAMI_APP_NAME} container${RESET}"
info "Subscribe to project updates by watching ${BOLD}${github_url}${RESET}"
info "Did you know there are enterprise versions of the Bitnami catalog? For enhanced secure software supply chain features, unlimited pulls from Docker, LTS support, or application customization, see Bitnami Premium or Tanzu Application Catalog. See https://www.arrow.com/globalecs/na/vendors/bitnami/ for more information."
info "${BOLD}Welcome to the AutoMQ for Apache Kafka on Bitnami Container${RESET}"
info "${BOLD}This image is compatible with Bitnami's container standards.${RESET}"
info "Refer to the documentation for complete configuration and Kubernetes deployment guidelines:"
info "${BOLD}${docs_url}${RESET}"
info ""
}

View File

@ -33,7 +33,7 @@ public class RawMetricTypes {
public static final byte BROKER_MAX_PENDING_FETCH_LATENCY_MS = (byte) 5;
public static final byte BROKER_METRIC_VERSION = (byte) 6;
public static final Map<Byte, AbnormalMetric> ABNORMAL_METRICS = Map.of(
BROKER_APPEND_LATENCY_AVG_MS, new AbnormalLatency(100), // 100ms
// BROKER_APPEND_LATENCY_AVG_MS, new AbnormalLatency(100), // 100ms
BROKER_MAX_PENDING_APPEND_LATENCY_MS, new AbnormalLatency(10000), // 10s
BROKER_MAX_PENDING_FETCH_LATENCY_MS, new AbnormalLatency(10000) // 10s
);

View File

@ -19,7 +19,6 @@
package kafka.automq;
import kafka.log.stream.s3.telemetry.exporter.ExporterConstants;
import kafka.server.KafkaConfig;
import org.apache.kafka.common.config.ConfigDef;
@ -80,7 +79,7 @@ public class AutoMQConfig {
public static final String S3_WAL_UPLOAD_INTERVAL_MS_CONFIG = "s3.wal.upload.interval.ms";
public static final String S3_WAL_UPLOAD_INTERVAL_MS_DOC = "The interval at which WAL triggers upload to object storage. -1 means only upload by size trigger";
public static final long S3_WAL_UPLOAD_INTERVAL_MS_DEFAULT = -1L;
public static final long S3_WAL_UPLOAD_INTERVAL_MS_DEFAULT = 60000L;
public static final String S3_STREAM_SPLIT_SIZE_CONFIG = "s3.stream.object.split.size";
public static final String S3_STREAM_SPLIT_SIZE_DOC = "The S3 stream object split size threshold when upload delta WAL or compact stream set object.";
@ -114,7 +113,7 @@ public class AutoMQConfig {
public static final String S3_STREAM_SET_OBJECT_COMPACTION_INTERVAL_CONFIG = "s3.stream.set.object.compaction.interval.minutes";
public static final String S3_STREAM_SET_OBJECT_COMPACTION_INTERVAL_DOC = "Set the interpublic static final String for Stream object compaction. The smaller this value, the smaller the scale of metadata storage, and the earlier the data can become compact. " +
"However, the number of compactions that the final generated stream object goes through will increase.";
public static final int S3_STREAM_SET_OBJECT_COMPACTION_INTERVAL = 10; // 10min
public static final int S3_STREAM_SET_OBJECT_COMPACTION_INTERVAL = 5; // 5min
public static final String S3_STREAM_SET_OBJECT_COMPACTION_CACHE_SIZE_CONFIG = "s3.stream.set.object.compaction.cache.size";
public static final String S3_STREAM_SET_OBJECT_COMPACTION_CACHE_SIZE_DOC = "The size of memory is available during the Stream object compaction process. The larger this value, the lower the cost of API calls.";
@ -154,7 +153,7 @@ public class AutoMQConfig {
public static final String S3_NETWORK_BASELINE_BANDWIDTH_CONFIG = "s3.network.baseline.bandwidth";
public static final String S3_NETWORK_BASELINE_BANDWIDTH_DOC = "The total available bandwidth for object storage requests. This is used to prevent stream set object compaction and catch-up read from monopolizing normal read and write traffic. Produce and Consume will also separately consume traffic in and traffic out. " +
"For example, suppose this value is set to 100MB/s, and the normal read and write traffic is 80MB/s, then the available traffic for stream set object compaction is 20MB/s.";
public static final long S3_NETWORK_BASELINE_BANDWIDTH = 100 * 1024 * 1024; // 100MB/s
public static final long S3_NETWORK_BASELINE_BANDWIDTH = 1024 * 1024 * 1024; // 1GBps
public static final String S3_NETWORK_REFILL_PERIOD_MS_CONFIG = "s3.network.refill.period.ms";
public static final String S3_NETWORK_REFILL_PERIOD_MS_DOC = "The network bandwidth token refill period in milliseconds.";
@ -251,6 +250,11 @@ public class AutoMQConfig {
public static final String S3_TELEMETRY_OPS_ENABLED_CONFIG = "s3.telemetry.ops.enabled";
public static final String S3_TELEMETRY_OPS_ENABLED_DOC = "[DEPRECATED] use s3.telemetry.metrics.uri instead.";
private static final String TELEMETRY_EXPORTER_TYPE_OTLP = "otlp";
private static final String TELEMETRY_EXPORTER_TYPE_PROMETHEUS = "prometheus";
private static final String TELEMETRY_EXPORTER_TYPE_OPS = "ops";
public static final String URI_DELIMITER = "://?";
// Deprecated config end
public static void define(ConfigDef configDef) {
@ -309,12 +313,14 @@ public class AutoMQConfig {
.define(AutoMQConfig.TABLE_TOPIC_SCHEMA_REGISTRY_URL_CONFIG, STRING, null, MEDIUM, AutoMQConfig.TABLE_TOPIC_SCHEMA_REGISTRY_URL_DOC);
}
private final long nodeEpoch = System.currentTimeMillis();
private List<BucketURI> dataBuckets;
private List<BucketURI> opsBuckets;
private String walConfig;
private String metricsExporterURI;
private List<Pair<String, String>> baseLabels;
private Optional<BucketURI> zoneRouterChannels;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private Optional<List<BucketURI>> zoneRouterChannels;
public AutoMQConfig setup(KafkaConfig config) {
dataBuckets = genDataBuckets(config);
@ -326,6 +332,10 @@ public class AutoMQConfig {
return this;
}
public long nodeEpoch() {
return nodeEpoch;
}
public List<BucketURI> dataBuckets() {
return dataBuckets;
}
@ -346,7 +356,7 @@ public class AutoMQConfig {
return baseLabels;
}
public Optional<BucketURI> zoneRouterChannels() {
public Optional<List<BucketURI>> zoneRouterChannels() {
return zoneRouterChannels;
}
@ -397,7 +407,7 @@ public class AutoMQConfig {
if (uri == null) {
uri = buildMetrixExporterURIWithOldConfigs(config);
}
if (!uri.contains(ExporterConstants.OPS_TYPE)) {
if (!uri.contains(TELEMETRY_EXPORTER_TYPE_OPS)) {
uri += "," + buildOpsExporterURI();
}
return uri;
@ -414,10 +424,10 @@ public class AutoMQConfig {
for (String exporterType : exporterTypeArray) {
exporterType = exporterType.trim();
switch (exporterType) {
case ExporterConstants.OTLP_TYPE:
case TELEMETRY_EXPORTER_TYPE_OTLP:
exportedUris.add(buildOTLPExporterURI(kafkaConfig));
break;
case ExporterConstants.PROMETHEUS_TYPE:
case TELEMETRY_EXPORTER_TYPE_PROMETHEUS:
exportedUris.add(buildPrometheusExporterURI(kafkaConfig));
break;
default:
@ -435,26 +445,31 @@ public class AutoMQConfig {
}
private static String buildOTLPExporterURI(KafkaConfig kafkaConfig) {
String endpoint = kafkaConfig.getString(S3_TELEMETRY_EXPORTER_OTLP_ENDPOINT_CONFIG);
if (StringUtils.isBlank(endpoint)) {
return "";
}
StringBuilder uriBuilder = new StringBuilder()
.append(ExporterConstants.OTLP_TYPE)
.append(ExporterConstants.URI_DELIMITER)
.append(ExporterConstants.ENDPOINT).append("=").append(kafkaConfig.getString(S3_TELEMETRY_EXPORTER_OTLP_ENDPOINT_CONFIG))
.append("&")
.append(ExporterConstants.PROTOCOL).append("=").append(kafkaConfig.getString(S3_TELEMETRY_EXPORTER_OTLP_PROTOCOL_CONFIG));
.append(TELEMETRY_EXPORTER_TYPE_OTLP)
.append("://?endpoint=").append(endpoint);
String protocol = kafkaConfig.getString(S3_TELEMETRY_EXPORTER_OTLP_PROTOCOL_CONFIG);
if (StringUtils.isNotBlank(protocol)) {
uriBuilder.append("&protocol=").append(protocol);
}
if (kafkaConfig.getBoolean(S3_TELEMETRY_EXPORTER_OTLP_COMPRESSION_ENABLE_CONFIG)) {
uriBuilder.append("&").append(ExporterConstants.COMPRESSION).append("=").append("gzip");
uriBuilder.append("&compression=gzip");
}
return uriBuilder.toString();
}
private static String buildPrometheusExporterURI(KafkaConfig kafkaConfig) {
return ExporterConstants.PROMETHEUS_TYPE + ExporterConstants.URI_DELIMITER +
ExporterConstants.HOST + "=" + kafkaConfig.getString(S3_METRICS_EXPORTER_PROM_HOST_CONFIG) + "&" +
ExporterConstants.PORT + "=" + kafkaConfig.getInt(S3_METRICS_EXPORTER_PROM_PORT_CONFIG);
return TELEMETRY_EXPORTER_TYPE_PROMETHEUS + URI_DELIMITER +
"host" + "=" + kafkaConfig.getString(S3_METRICS_EXPORTER_PROM_HOST_CONFIG) + "&" +
"port" + "=" + kafkaConfig.getInt(S3_METRICS_EXPORTER_PROM_PORT_CONFIG);
}
private static String buildOpsExporterURI() {
return ExporterConstants.OPS_TYPE + ExporterConstants.URI_DELIMITER;
return TELEMETRY_EXPORTER_TYPE_OPS + URI_DELIMITER;
}
private static List<Pair<String, String>> parseBaseLabels(KafkaConfig config) {
@ -475,7 +490,7 @@ public class AutoMQConfig {
}
private static Optional<BucketURI> genZoneRouterChannels(KafkaConfig config) {
private static Optional<List<BucketURI>> genZoneRouterChannels(KafkaConfig config) {
String str = config.getString(ZONE_ROUTER_CHANNELS_CONFIG);
if (StringUtils.isBlank(str)) {
return Optional.empty();
@ -483,10 +498,8 @@ public class AutoMQConfig {
List<BucketURI> buckets = BucketURI.parseBuckets(str);
if (buckets.isEmpty()) {
return Optional.empty();
} else if (buckets.size() > 1) {
throw new IllegalArgumentException(ZONE_ROUTER_CHANNELS_CONFIG + " only supports one object storage, but it's config with " + str);
} else {
return Optional.of(buckets.get(0));
return Optional.of(buckets);
}
}
}

View File

@ -17,13 +17,7 @@
* limitations under the License.
*/
package kafka.log.stream.s3.telemetry;
package kafka.automq.failover;
public class MetricsConstants {
public static final String SERVICE_NAME = "service.name";
public static final String SERVICE_INSTANCE = "service.instance.id";
public static final String HOST_NAME = "host.name";
public static final String INSTANCE = "instance";
public static final String JOB = "job";
public static final String NODE_TYPE = "node.type";
public record DefaultFailedNode(int id, long epoch) implements FailedNode {
}

View File

@ -27,10 +27,10 @@ public interface FailedNode {
int id();
static FailedNode from(NodeRuntimeMetadata node) {
return new K8sFailedNode(node.id());
return new DefaultFailedNode(node.id(), node.epoch());
}
static FailedNode from(FailoverContext context) {
return new K8sFailedNode(context.getNodeId());
return new DefaultFailedNode(context.getNodeId(), context.getNodeEpoch());
}
}

View File

@ -181,7 +181,7 @@ public class FailoverControlManager implements AutoCloseable {
node.getNodeId(),
// There are node epochs in both streamControlManager and nodeControlManager, and they are the same in most cases.
// However, in some rare cases, the node epoch in streamControlManager may be updated earlier than the node epoch in nodeControlManager.
// So we use the node epoch in nodeControlManager as the source of truth.
// So we use the node epoch in streamControlManager as the source of truth.
nodeEpochMap.get(node.getNodeId()),
node.getWalConfig(),
node.getTags(),
@ -265,22 +265,6 @@ public class FailoverControlManager implements AutoCloseable {
.filter(NodeRuntimeMetadata::shouldFailover)
.map(DefaultFailedWal::from)
.collect(Collectors.toCollection(ArrayList::new));
maybeRemoveControllerNode(allNodes, result);
return result;
}
private static void maybeRemoveControllerNode(List<NodeRuntimeMetadata> allNodes, List<FailedWal> failedWALList) {
long inactiveControllerCount = allNodes.stream()
.filter(NodeRuntimeMetadata::isController)
.filter(node -> !node.isActive())
.count();
if (inactiveControllerCount > 1) {
LOGGER.warn("{} controller nodes is inactive, will not failover any controller node", inactiveControllerCount);
Set<Integer> controllerNodeIds = allNodes.stream()
.filter(NodeRuntimeMetadata::isController)
.map(NodeRuntimeMetadata::id)
.collect(Collectors.toSet());
failedWALList.removeIf(wal -> controllerNodeIds.contains(wal.nodeId()));
}
}
}

View File

@ -80,11 +80,11 @@ public class FailoverListener implements MetadataPublisher, AutoCloseable {
.map(kv -> kv.get(FailoverConstants.FAILOVER_KEY))
.map(this::decodeContexts);
}
private FailoverContext[] decodeContexts(ByteBuffer byteBuffer) {
byteBuffer.slice();
byte[] data = new byte[byteBuffer.remaining()];
byteBuffer.get(data);
ByteBuffer slice = byteBuffer.slice();
byte[] data = new byte[slice.remaining()];
slice.get(data);
return JsonUtils.decode(new String(data, StandardCharsets.UTF_8), FailoverContext[].class);
}

View File

@ -25,6 +25,7 @@ import org.apache.kafka.controller.stream.NodeState;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
/**
* NodeRuntimeMetadata is a runtime view of a node's metadata.
@ -39,6 +40,7 @@ public final class NodeRuntimeMetadata {
* @see ClusterControlManager#getNextNodeId()
*/
private static final int MAX_CONTROLLER_ID = 1000 - 1;
private static final long DONT_FAILOVER_AFTER_NEW_EPOCH_MS = TimeUnit.MINUTES.toMillis(1);
private final int id;
private final long epoch;
private final String walConfigs;
@ -60,7 +62,11 @@ public final class NodeRuntimeMetadata {
}
public boolean shouldFailover() {
return isFenced() && hasOpeningStreams;
return isFenced() && hasOpeningStreams
// The node epoch is the start timestamp of node.
// We need to avoid failover just after node restart.
// The node may take some time to recover its data.
&& System.currentTimeMillis() - epoch > DONT_FAILOVER_AFTER_NEW_EPOCH_MS;
}
public boolean isFenced() {

View File

@ -23,6 +23,7 @@ import kafka.server.MetadataCache;
import kafka.server.streamaspect.ElasticKafkaApis;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.message.AutomqZoneRouterRequestData;
import org.apache.kafka.common.message.MetadataResponseData;
import org.apache.kafka.common.network.ListenerName;
import org.apache.kafka.common.requests.s3.AutomqZoneRouterResponse;
@ -42,13 +43,18 @@ public class NoopTrafficInterceptor implements TrafficInterceptor {
this.metadataCache = metadataCache;
}
@Override
public void close() {
}
@Override
public void handleProduceRequest(ProduceRequestArgs args) {
kafkaApis.handleProduceAppendJavaCompatible(args);
}
@Override
public CompletableFuture<AutomqZoneRouterResponse> handleZoneRouterRequest(byte[] metadata) {
public CompletableFuture<AutomqZoneRouterResponse> handleZoneRouterRequest(AutomqZoneRouterRequestData request) {
return FutureUtil.failedFuture(new UnsupportedOperationException());
}

View File

@ -20,6 +20,7 @@
package kafka.automq.interceptor;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.message.AutomqZoneRouterRequestData;
import org.apache.kafka.common.message.MetadataResponseData;
import org.apache.kafka.common.requests.s3.AutomqZoneRouterResponse;
@ -29,9 +30,11 @@ import java.util.concurrent.CompletableFuture;
public interface TrafficInterceptor {
void close();
void handleProduceRequest(ProduceRequestArgs args);
CompletableFuture<AutomqZoneRouterResponse> handleZoneRouterRequest(byte[] metadata);
CompletableFuture<AutomqZoneRouterResponse> handleZoneRouterRequest(AutomqZoneRouterRequestData request);
List<MetadataResponseData.MetadataResponseTopic> handleMetadataResponse(ClientIdMetadata clientId,
List<MetadataResponseData.MetadataResponseTopic> topics);

View File

@ -0,0 +1,191 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.partition.snapshot;
import org.apache.kafka.common.message.AutomqGetPartitionSnapshotResponseData;
import com.automq.stream.s3.ConfirmWAL;
import com.automq.stream.s3.model.StreamRecordBatch;
import com.automq.stream.s3.wal.RecordOffset;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
/**
* Maintains a bounded, in-memory delta of recent WAL appends so snapshot responses can
* piggy-back fresh data instead of forcing clients to replay the physical WAL.
*
* <p><strong>Responsibilities</strong>
* <ul>
* <li>Subscribe to {@link ConfirmWAL} append events and retain the encoded
* {@link StreamRecordBatch} payloads while they are eligible for delta export.</li>
* <li>Track confirm offsets and expose them via {@link #handle(short, AutomqGetPartitionSnapshotResponseData)}.</li>
* <li>Serialize buffered batches into {@code confirmWalDeltaData} for request versions
* &gt;= 2, or signal that callers must replay the WAL otherwise.</li>
* <li>Enforce {@link #MAX_RECORDS_BUFFER_SIZE} so the delta cache remains lightweight.</li>
* </ul>
*
* <p><strong>State machine</strong>
* <ul>
* <li>{@link #STATE_NOT_SYNC}: Buffer content is discarded (e.g. overflow) and only confirm
* offsets are returned until new appends arrive.</li>
* <li>{@link #STATE_SYNCING}: Buffered records are eligible to be drained and turned into a
* delta payload when {@link #handle(short, AutomqGetPartitionSnapshotResponseData)} runs.</li>
* <li>{@link #STATE_CLOSED}: Listener is torn down and ignores subsequent appends.</li>
* </ul>
*
* <p><strong>Concurrency and lifecycle</strong>
* <ul>
* <li>All public methods are synchronized to guard the state machine, queue, and
* {@link #lastConfirmOffset} tracking.</li>
* <li>Buffered batches are reference-counted; ownership transfers to this class until the
* delta is emitted or the buffer is dropped/closed.</li>
* <li>{@link #close()} must be invoked when the owning {@link PartitionSnapshotsManager.Session} ends to release buffers
* and remove the {@link ConfirmWAL.AppendListener}.</li>
* </ul>
*
* <p><strong>Snapshot interaction</strong>
* <ul>
* <li>{@link #handle(short, AutomqGetPartitionSnapshotResponseData)} always updates
* {@code confirmWalEndOffset} and, when possible, attaches {@code confirmWalDeltaData}.</li>
* <li>A {@code null} delta signals the client must replay the WAL, whereas an empty byte array
* indicates no new data but confirms offsets.</li>
* <li>When the aggregated encoded bytes would exceed {@link #MAX_RECORDS_BUFFER_SIZE}, the
* buffer is dropped and state resets to {@link #STATE_NOT_SYNC}.</li>
* </ul>
*/
public class ConfirmWalDataDelta implements ConfirmWAL.AppendListener {
static final int STATE_NOT_SYNC = 0;
static final int STATE_SYNCING = 1;
static final int STATE_CLOSED = 9;
static final int MAX_RECORDS_BUFFER_SIZE = 32 * 1024; // 32KiB
private final ConfirmWAL confirmWAL;
private final ConfirmWAL.ListenerHandle listenerHandle;
final BlockingQueue<RecordExt> records = new LinkedBlockingQueue<>();
final AtomicInteger size = new AtomicInteger(0);
private RecordOffset lastConfirmOffset = null;
int state = STATE_NOT_SYNC;
public ConfirmWalDataDelta(ConfirmWAL confirmWAL) {
this.confirmWAL = confirmWAL;
this.listenerHandle = confirmWAL.addAppendListener(this);
}
public synchronized void close() {
this.state = STATE_CLOSED;
this.listenerHandle.close();
records.forEach(r -> r.record.release());
records.clear();
}
public void handle(short requestVersion,
AutomqGetPartitionSnapshotResponseData resp) {
RecordOffset newConfirmOffset = null;
List<RecordExt> delta = null;
synchronized (this) {
if (state == STATE_NOT_SYNC) {
List<RecordExt> drainedRecords = new ArrayList<>(records.size());
records.drainTo(drainedRecords);
size.addAndGet(-drainedRecords.stream().mapToInt(r -> r.record.encoded().readableBytes()).sum());
if (!drainedRecords.isEmpty()) {
RecordOffset deltaConfirmOffset = drainedRecords.get(drainedRecords.size() - 1).nextOffset();
if (lastConfirmOffset == null || deltaConfirmOffset.compareTo(lastConfirmOffset) > 0) {
newConfirmOffset = deltaConfirmOffset;
state = STATE_SYNCING;
}
drainedRecords.forEach(r -> r.record.release());
}
} else if (state == STATE_SYNCING) {
delta = new ArrayList<>(records.size());
records.drainTo(delta);
size.addAndGet(-delta.stream().mapToInt(r -> r.record.encoded().readableBytes()).sum());
newConfirmOffset = delta.isEmpty() ? lastConfirmOffset : delta.get(delta.size() - 1).nextOffset();
}
if (newConfirmOffset == null) {
newConfirmOffset = confirmWAL.confirmOffset();
}
this.lastConfirmOffset = newConfirmOffset;
}
resp.setConfirmWalEndOffset(newConfirmOffset.bufferAsBytes());
if (delta != null) {
int size = delta.stream().mapToInt(r -> r.record.encoded().readableBytes()).sum();
byte[] data = new byte[size];
ByteBuf buf = Unpooled.wrappedBuffer(data).clear();
delta.forEach(r -> {
buf.writeBytes(r.record.encoded());
r.record.release();
});
if (requestVersion >= 2) {
// The confirmWalDeltaData is only supported in request version >= 2
resp.setConfirmWalDeltaData(data);
}
} else {
if (requestVersion >= 2) {
// - Null means the client needs replay from the physical WAL
// - Empty means there is no delta data.
resp.setConfirmWalDeltaData(null);
}
}
}
@Override
public synchronized void onAppend(StreamRecordBatch record, RecordOffset recordOffset,
RecordOffset nextOffset) {
if (state == STATE_CLOSED) {
return;
}
record.retain();
records.add(new RecordExt(record, recordOffset, nextOffset));
if (size.addAndGet(record.encoded().readableBytes()) > MAX_RECORDS_BUFFER_SIZE) {
// If the buffer is full, drop all records and switch to NOT_SYNC state.
// It's cheaper to replay from the physical WAL instead of transferring the data by network.
state = STATE_NOT_SYNC;
records.forEach(r -> r.record.release());
records.clear();
size.set(0);
}
}
record RecordExt(StreamRecordBatch record, RecordOffset recordOffset, RecordOffset nextOffset) {
}
public static List<StreamRecordBatch> decodeDeltaRecords(byte[] data) {
if (data == null) {
return null;
}
List<StreamRecordBatch> records = new ArrayList<>();
ByteBuf buf = Unpooled.wrappedBuffer(data);
while (buf.readableBytes() > 0) {
StreamRecordBatch record = StreamRecordBatch.parse(buf, false);
records.add(record);
}
return records;
}
}

View File

@ -19,6 +19,7 @@
package kafka.automq.partition.snapshot;
import kafka.automq.AutoMQConfig;
import kafka.cluster.LogEventListener;
import kafka.cluster.Partition;
import kafka.cluster.PartitionListener;
@ -39,8 +40,11 @@ import org.apache.kafka.common.message.AutomqGetPartitionSnapshotResponseData.To
import org.apache.kafka.common.requests.s3.AutomqGetPartitionSnapshotRequest;
import org.apache.kafka.common.requests.s3.AutomqGetPartitionSnapshotResponse;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.server.common.automq.AutoMQVersion;
import org.apache.kafka.storage.internals.log.LogOffsetMetadata;
import org.apache.kafka.storage.internals.log.TimestampOffset;
import com.automq.stream.s3.ConfirmWAL;
import com.automq.stream.utils.Threads;
import java.util.ArrayList;
@ -48,20 +52,38 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import io.netty.util.concurrent.FastThreadLocal;
public class PartitionSnapshotsManager {
private static final int NOOP_SESSION_ID = 0;
private final Map<Integer, Session> sessions = new HashMap<>();
private final List<PartitionWithVersion> snapshotVersions = new CopyOnWriteArrayList<>();
private final Time time;
private final ConfirmWAL confirmWAL;
public PartitionSnapshotsManager(Time time) {
public PartitionSnapshotsManager(Time time, AutoMQConfig config, ConfirmWAL confirmWAL,
Supplier<AutoMQVersion> versionGetter) {
this.time = time;
Threads.COMMON_SCHEDULER.scheduleWithFixedDelay(this::cleanExpiredSessions, 1, 1, TimeUnit.MINUTES);
this.confirmWAL = confirmWAL;
if (config.zoneRouterChannels().isPresent()) {
Threads.COMMON_SCHEDULER.scheduleWithFixedDelay(this::cleanExpiredSessions, 1, 1, TimeUnit.MINUTES);
Threads.COMMON_SCHEDULER.scheduleWithFixedDelay(() -> {
// In ZERO_ZONE_V0 we need to fast commit the WAL data to KRaft,
// then another nodes could replay the SSO to support snapshot read.
if (!versionGetter.get().isZeroZoneV2Supported()) {
confirmWAL.commit(0, false);
}
}, 1, 1, TimeUnit.SECONDS);
}
}
public void onPartitionOpen(Partition partition) {
@ -78,8 +100,9 @@ public class PartitionSnapshotsManager {
}
}
public AutomqGetPartitionSnapshotResponse handle(AutomqGetPartitionSnapshotRequest request) {
public CompletableFuture<AutomqGetPartitionSnapshotResponse> handle(AutomqGetPartitionSnapshotRequest request) {
Session session;
boolean newSession = false;
synchronized (this) {
AutomqGetPartitionSnapshotRequestData requestData = request.data();
int sessionId = requestData.sessionId();
@ -94,9 +117,10 @@ public class PartitionSnapshotsManager {
sessionId = nextSessionId();
session = new Session(sessionId);
sessions.put(sessionId, session);
newSession = true;
}
}
return session.snapshotsDelta();
return session.snapshotsDelta(request, request.data().requestCommit() || newSession);
}
private synchronized int nextSessionId() {
@ -109,36 +133,102 @@ public class PartitionSnapshotsManager {
}
private synchronized void cleanExpiredSessions() {
sessions.values().removeIf(Session::expired);
sessions.values().removeIf(s -> {
boolean expired = s.expired();
if (expired) {
s.close();
}
return expired;
});
}
class Session {
private static final short ZERO_ZONE_V0_REQUEST_VERSION = (short) 0;
private static final FastThreadLocal<List<CompletableFuture<Void>>> COMPLETE_CF_LIST_LOCAL = new FastThreadLocal<>() {
@Override
protected List<CompletableFuture<Void>> initialValue() {
return new ArrayList<>();
}
};
private final int sessionId;
private int sessionEpoch = 0;
private final Map<Partition, PartitionSnapshotVersion> synced = new HashMap<>();
private final List<Partition> removed = new ArrayList<>();
private long lastGetSnapshotsTimestamp = time.milliseconds();
private final Set<CompletableFuture<Void>> inflightCommitCfSet = ConcurrentHashMap.newKeySet();
private final ConfirmWalDataDelta delta;
public Session(int sessionId) {
this.sessionId = sessionId;
this.delta = new ConfirmWalDataDelta(confirmWAL);
}
public synchronized void close() {
delta.close();
}
public synchronized int sessionEpoch() {
return sessionEpoch;
}
public synchronized AutomqGetPartitionSnapshotResponse snapshotsDelta() {
public synchronized CompletableFuture<AutomqGetPartitionSnapshotResponse> snapshotsDelta(
AutomqGetPartitionSnapshotRequest request, boolean requestCommit) {
AutomqGetPartitionSnapshotResponseData resp = new AutomqGetPartitionSnapshotResponseData();
sessionEpoch++;
lastGetSnapshotsTimestamp = time.milliseconds();
resp.setSessionId(sessionId);
resp.setSessionEpoch(sessionEpoch);
Map<Uuid, List<PartitionSnapshot>> topic2partitions = new HashMap<>();
long finalSessionEpoch = sessionEpoch;
CompletableFuture<Void> collectPartitionSnapshotsCf;
if (!requestCommit && inflightCommitCfSet.isEmpty()) {
collectPartitionSnapshotsCf = collectPartitionSnapshots(request.data().version(), resp);
} else {
collectPartitionSnapshotsCf = CompletableFuture.completedFuture(null);
}
boolean newSession = finalSessionEpoch == 1;
return collectPartitionSnapshotsCf
.thenApply(nil -> {
if (request.data().version() > ZERO_ZONE_V0_REQUEST_VERSION) {
if (newSession) {
// return the WAL config in the session first response
resp.setConfirmWalConfig(confirmWAL.uri());
}
delta.handle(request.version(), resp);
}
if (requestCommit) {
// Commit after generating the snapshots.
// Then the snapshot-read partitions could read from snapshot-read cache or block cache.
CompletableFuture<Void> commitCf = newSession ?
// The proxy node's first snapshot-read request needs to commit immediately to ensure the data could be read.
confirmWAL.commit(0, false)
// The proxy node's snapshot-read cache isn't enough to hold the 'uncommitted' data,
// so the proxy node request a commit to ensure the data could be read from block cache.
: confirmWAL.commit(1000, false);
inflightCommitCfSet.add(commitCf);
commitCf.whenComplete((rst, ex) -> inflightCommitCfSet.remove(commitCf));
}
return new AutomqGetPartitionSnapshotResponse(resp);
});
}
public synchronized void onPartitionClose(Partition partition) {
removed.add(partition);
}
public synchronized boolean expired() {
return time.milliseconds() - lastGetSnapshotsTimestamp > 60000;
}
private CompletableFuture<Void> collectPartitionSnapshots(short funcVersion,
AutomqGetPartitionSnapshotResponseData resp) {
Map<Uuid, List<PartitionSnapshot>> topic2partitions = new HashMap<>();
List<CompletableFuture<Void>> completeCfList = COMPLETE_CF_LIST_LOCAL.get();
completeCfList.clear();
removed.forEach(partition -> {
PartitionSnapshotVersion version = synced.remove(partition);
if (version != null) {
List<PartitionSnapshot> partitionSnapshots = topic2partitions.computeIfAbsent(partition.topicId().get(), topic -> new ArrayList<>());
partitionSnapshots.add(snapshot(partition, version, null));
partitionSnapshots.add(snapshot(funcVersion, partition, version, null, completeCfList));
}
});
removed.clear();
@ -148,7 +238,7 @@ public class PartitionSnapshotsManager {
if (!Objects.equals(p.version, oldVersion)) {
List<PartitionSnapshot> partitionSnapshots = topic2partitions.computeIfAbsent(p.partition.topicId().get(), topic -> new ArrayList<>());
PartitionSnapshotVersion newVersion = p.version.copy();
PartitionSnapshot partitionSnapshot = snapshot(p.partition, oldVersion, newVersion);
PartitionSnapshot partitionSnapshot = snapshot(funcVersion, p.partition, oldVersion, newVersion, completeCfList);
partitionSnapshots.add(partitionSnapshot);
synced.put(p.partition, newVersion);
}
@ -161,20 +251,14 @@ public class PartitionSnapshotsManager {
topics.add(topic);
});
resp.setTopics(topics);
lastGetSnapshotsTimestamp = time.milliseconds();
return new AutomqGetPartitionSnapshotResponse(resp);
CompletableFuture<Void> retCf = CompletableFuture.allOf(completeCfList.toArray(new CompletableFuture[0]));
completeCfList.clear();
return retCf;
}
public synchronized void onPartitionClose(Partition partition) {
removed.add(partition);
}
public synchronized boolean expired() {
return time.milliseconds() - lastGetSnapshotsTimestamp > 60000;
}
private PartitionSnapshot snapshot(Partition partition, PartitionSnapshotVersion oldVersion,
PartitionSnapshotVersion newVersion) {
private PartitionSnapshot snapshot(short funcVersion, Partition partition,
PartitionSnapshotVersion oldVersion,
PartitionSnapshotVersion newVersion, List<CompletableFuture<Void>> completeCfList) {
if (newVersion == null) {
// partition is closed
PartitionSnapshot snapshot = new PartitionSnapshot();
@ -188,6 +272,7 @@ public class PartitionSnapshotsManager {
PartitionSnapshot snapshot = new PartitionSnapshot();
snapshot.setPartitionIndex(partition.partitionId());
kafka.cluster.PartitionSnapshot src = partition.snapshot();
completeCfList.add(src.completeCf());
snapshot.setLeaderEpoch(src.leaderEpoch());
SnapshotOperation operation = oldVersion == null ? SnapshotOperation.ADD : SnapshotOperation.PATCH;
snapshot.setOperation(operation.code());
@ -201,6 +286,9 @@ public class PartitionSnapshotsManager {
if (includeSegments) {
snapshot.setLogMetadata(logMetadata(src.logMeta()));
}
if (funcVersion > ZERO_ZONE_V0_REQUEST_VERSION) {
snapshot.setLastTimestampOffset(timestampOffset(src.lastTimestampOffset()));
}
return snapshot;
});
}
@ -254,6 +342,11 @@ public class PartitionSnapshotsManager {
return new AutomqGetPartitionSnapshotResponseData.TimestampOffsetData().setTimestamp(src.timestamp()).setOffset(src.offset());
}
static AutomqGetPartitionSnapshotResponseData.TimestampOffsetData timestampOffset(
TimestampOffset src) {
return new AutomqGetPartitionSnapshotResponseData.TimestampOffsetData().setTimestamp(src.timestamp).setOffset(src.offset);
}
static class PartitionWithVersion {
Partition partition;
PartitionSnapshotVersion version;
@ -267,16 +360,13 @@ public class PartitionSnapshotsManager {
static PartitionListener newPartitionListener(PartitionWithVersion version) {
return new PartitionListener() {
@Override
public void onHighWatermarkUpdated(TopicPartition partition, long offset) {
public void onNewLeaderEpoch(long oldEpoch, long newEpoch) {
version.version.incrementRecordsVersion();
}
@Override
public void onFailed(TopicPartition partition) {
}
@Override
public void onDeleted(TopicPartition partition) {
public void onNewAppend(TopicPartition partition, long offset) {
version.version.incrementRecordsVersion();
}
};
}
@ -284,4 +374,5 @@ public class PartitionSnapshotsManager {
static LogEventListener newLogEventListener(PartitionWithVersion version) {
return (segment, event) -> version.version.incrementSegmentsVersion();
}
}

View File

@ -217,15 +217,16 @@ public class CatalogFactory {
}
}
// important: use putIfAbsent to let the user override all values directly in catalog configuration
private void putDataBucketAsWarehouse(boolean s3a) {
if (bucketURI.endpoint() != null) {
options.put("s3.endpoint", bucketURI.endpoint());
if (StringUtils.isNotBlank(bucketURI.endpoint())) {
options.putIfAbsent("s3.endpoint", bucketURI.endpoint());
}
if (bucketURI.extensionBool(AwsObjectStorage.PATH_STYLE_KEY, false)) {
options.put("s3.path-style-access", "true");
options.putIfAbsent("s3.path-style-access", "true");
}
options.put("io-impl", "org.apache.iceberg.aws.s3.S3FileIO");
options.put("warehouse", String.format((s3a ? "s3a" : "s3") + "://%s/iceberg", bucketURI.bucket()));
options.putIfAbsent("io-impl", "org.apache.iceberg.aws.s3.S3FileIO");
options.putIfAbsent("warehouse", String.format((s3a ? "s3a" : "s3") + "://%s/iceberg", bucketURI.bucket()));
}
}

View File

@ -36,18 +36,18 @@ import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
public class CredentialProviderHolder implements AwsCredentialsProvider {
private static Function<BucketURI, AwsCredentialsProvider> providerSupplier = bucketURI -> newCredentialsProviderChain(
credentialsProviders(bucketURI));
private static AwsCredentialsProvider provider;
private static BucketURI bucketURI;
public static void setup(Function<BucketURI, AwsCredentialsProvider> providerSupplier) {
CredentialProviderHolder.providerSupplier = providerSupplier;
}
public static void setup(BucketURI bucketURI) {
CredentialProviderHolder.provider = providerSupplier.apply(bucketURI);
CredentialProviderHolder.bucketURI = bucketURI;
}
private static List<AwsCredentialsProvider> credentialsProviders(BucketURI bucketURI) {
return List.of(new AutoMQStaticCredentialsProvider(bucketURI), DefaultCredentialsProvider.create());
return List.of(new AutoMQStaticCredentialsProvider(bucketURI), DefaultCredentialsProvider.builder().build());
}
private static AwsCredentialsProvider newCredentialsProviderChain(
@ -62,7 +62,10 @@ public class CredentialProviderHolder implements AwsCredentialsProvider {
// iceberg will invoke create with reflection.
public static AwsCredentialsProvider create() {
return provider;
if (bucketURI == null) {
throw new IllegalStateException("BucketURI must be set before calling create(). Please invoke setup(BucketURI) first.");
}
return providerSupplier.apply(bucketURI);
}
@Override

View File

@ -0,0 +1,229 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ByteBuffers;
import org.apache.iceberg.util.DateTimeUtil;
import org.apache.iceberg.util.UUIDUtil;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.ZoneOffset;
import java.time.temporal.Temporal;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.UUID;
/**
* Abstract implementation providing common type conversion logic from source formats
* to Iceberg's internal Java type representation.
* <p>
* Handles dispatch logic and provides default conversion implementations for primitive types.
* Subclasses implement format-specific conversion for complex types (LIST, MAP, STRUCT).
*
* @param <S> The type of the source schema (e.g., org.apache.avro.Schema)
*/
public abstract class AbstractTypeAdapter<S> implements TypeAdapter<S> {
@SuppressWarnings({"CyclomaticComplexity", "NPathComplexity"})
@Override
public Object convert(Object sourceValue, S sourceSchema, Type targetType, StructConverter<S> structConverter) {
if (sourceValue == null) {
return null;
}
switch (targetType.typeId()) {
case BOOLEAN:
return convertBoolean(sourceValue, sourceSchema, targetType);
case INTEGER:
return convertInteger(sourceValue, sourceSchema, targetType);
case LONG:
return convertLong(sourceValue, sourceSchema, targetType);
case FLOAT:
return convertFloat(sourceValue, sourceSchema, targetType);
case DOUBLE:
return convertDouble(sourceValue, sourceSchema, targetType);
case STRING:
return convertString(sourceValue, sourceSchema, targetType);
case BINARY:
return convertBinary(sourceValue, sourceSchema, targetType);
case FIXED:
return convertFixed(sourceValue, sourceSchema, targetType);
case UUID:
return convertUUID(sourceValue, sourceSchema, targetType);
case DECIMAL:
return convertDecimal(sourceValue, sourceSchema, (Types.DecimalType) targetType);
case DATE:
return convertDate(sourceValue, sourceSchema, targetType);
case TIME:
return convertTime(sourceValue, sourceSchema, targetType);
case TIMESTAMP:
return convertTimestamp(sourceValue, sourceSchema, (Types.TimestampType) targetType);
case LIST:
return convertList(sourceValue, sourceSchema, (Types.ListType) targetType, structConverter);
case MAP:
return convertMap(sourceValue, sourceSchema, (Types.MapType) targetType, structConverter);
case STRUCT:
return structConverter.convert(sourceValue, sourceSchema, targetType);
default:
return sourceValue;
}
}
protected Object convertBoolean(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof Boolean) return sourceValue;
if (sourceValue instanceof String) return Boolean.parseBoolean((String) sourceValue);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertInteger(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof Integer) return sourceValue;
if (sourceValue instanceof Number) return ((Number) sourceValue).intValue();
if (sourceValue instanceof String) return Integer.parseInt((String) sourceValue);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertLong(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof Long) return sourceValue;
if (sourceValue instanceof Number) return ((Number) sourceValue).longValue();
if (sourceValue instanceof String) return Long.parseLong((String) sourceValue);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertFloat(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof Float) return sourceValue;
if (sourceValue instanceof Number) return ((Number) sourceValue).floatValue();
if (sourceValue instanceof String) return Float.parseFloat((String) sourceValue);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertDouble(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof Double) return sourceValue;
if (sourceValue instanceof Number) return ((Number) sourceValue).doubleValue();
if (sourceValue instanceof String) return Double.parseDouble((String) sourceValue);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertString(Object sourceValue, S sourceSchema, Type targetType) {
if (sourceValue instanceof String) {
return sourceValue;
}
// Simple toString conversion - subclasses can override for more complex logic
return sourceValue.toString();
}
protected Object convertBinary(Object sourceValue, S sourceSchema, Type targetType) {
if (sourceValue instanceof ByteBuffer) return ((ByteBuffer) sourceValue).duplicate();
if (sourceValue instanceof byte[]) return ByteBuffer.wrap((byte[]) sourceValue);
if (sourceValue instanceof String) return ByteBuffer.wrap(((String) sourceValue).getBytes(StandardCharsets.UTF_8));
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertFixed(Object sourceValue, S sourceSchema, Type targetType) {
if (sourceValue instanceof byte[]) return sourceValue;
if (sourceValue instanceof ByteBuffer) return ByteBuffers.toByteArray((ByteBuffer) sourceValue);
if (sourceValue instanceof String) return ((String) sourceValue).getBytes(StandardCharsets.UTF_8);
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertUUID(Object sourceValue, S sourceSchema, Type targetType) {
UUID uuid = null;
if (sourceValue instanceof String) {
uuid = UUID.fromString(sourceValue.toString());
} else if (sourceValue instanceof UUID) {
uuid = (UUID) sourceValue;
} else if (sourceValue instanceof ByteBuffer) {
ByteBuffer bb = ((ByteBuffer) sourceValue).duplicate();
if (bb.remaining() == 16) {
uuid = new UUID(bb.getLong(), bb.getLong());
}
}
if (uuid != null) {
return UUIDUtil.convert(uuid);
}
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertDecimal(Object sourceValue, S ignoredSourceSchema, Types.DecimalType targetType) {
if (sourceValue instanceof BigDecimal) return sourceValue;
if (sourceValue instanceof String) return new BigDecimal((String) sourceValue);
if (sourceValue instanceof byte[]) return new BigDecimal(new java.math.BigInteger((byte[]) sourceValue), targetType.scale());
if (sourceValue instanceof ByteBuffer) {
ByteBuffer bb = ((ByteBuffer) sourceValue).duplicate();
byte[] bytes = new byte[bb.remaining()];
bb.get(bytes);
return new BigDecimal(new java.math.BigInteger(bytes), targetType.scale());
}
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertDate(Object sourceValue, S ignoredSourceSchema, Type targetType) {
if (sourceValue instanceof LocalDate) return sourceValue;
if (sourceValue instanceof Number) return LocalDate.ofEpochDay(((Number) sourceValue).intValue());
if (sourceValue instanceof Date) return ((Date) sourceValue).toInstant().atZone(ZoneOffset.UTC).toLocalDate();
if (sourceValue instanceof String) return LocalDate.parse(sourceValue.toString());
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertTime(Object sourceValue, S sourceSchema, Type targetType) {
if (sourceValue instanceof LocalTime) return sourceValue;
if (sourceValue instanceof Date) return ((Date) sourceValue).toInstant().atZone(ZoneOffset.UTC).toLocalTime();
if (sourceValue instanceof String) return LocalTime.parse(sourceValue.toString());
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected Object convertTimestamp(Object sourceValue, S sourceSchema, Types.TimestampType targetType) {
if (sourceValue instanceof Temporal) return sourceValue;
if (sourceValue instanceof Date) {
Instant instant = ((Date) sourceValue).toInstant();
long micros = DateTimeUtil.microsFromInstant(instant);
return targetType.shouldAdjustToUTC()
? DateTimeUtil.timestamptzFromMicros(micros)
: DateTimeUtil.timestampFromMicros(micros);
}
if (sourceValue instanceof String) {
Instant instant = Instant.parse(sourceValue.toString());
long micros = DateTimeUtil.microsFromInstant(instant);
return targetType.shouldAdjustToUTC()
? DateTimeUtil.timestamptzFromMicros(micros)
: DateTimeUtil.timestampFromMicros(micros);
}
if (sourceValue instanceof Number) {
// Assume the number represents microseconds since epoch
// Subclasses should override to handle milliseconds or other units based on logical type
long micros = ((Number) sourceValue).longValue();
return targetType.shouldAdjustToUTC()
? DateTimeUtil.timestamptzFromMicros(micros)
: DateTimeUtil.timestampFromMicros(micros);
}
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to " + targetType.typeId());
}
protected abstract List<?> convertList(Object sourceValue, S sourceSchema, Types.ListType targetType, StructConverter<S> structConverter);
protected abstract Map<?, ?> convertMap(Object sourceValue, S sourceSchema, Types.MapType targetType, StructConverter<S> structConverter);
}

View File

@ -0,0 +1,210 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.DateTimeUtil;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A concrete implementation of TypeAdapter that converts values from Avro's
* data representation to Iceberg's internal Java type representation.
* <p>
* This class extends {@link AbstractTypeAdapter} and overrides methods to handle
* Avro-specific types like Utf8, EnumSymbol, and Fixed, as well as Avro's
* specific representations for List and Map.
*/
public class AvroValueAdapter extends AbstractTypeAdapter<Schema> {
private static final org.apache.avro.Schema STRING_SCHEMA_INSTANCE = org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING);
@Override
protected Object convertString(Object sourceValue, Schema sourceSchema, Type targetType) {
if (sourceValue instanceof Utf8) {
return sourceValue;
}
if (sourceValue instanceof GenericData.EnumSymbol) {
return sourceValue.toString();
}
return super.convertString(sourceValue, sourceSchema, targetType);
}
@Override
protected Object convertBinary(Object sourceValue, Schema sourceSchema, Type targetType) {
if (sourceValue instanceof GenericData.Fixed) {
return ByteBuffer.wrap(((GenericData.Fixed) sourceValue).bytes());
}
return super.convertBinary(sourceValue, sourceSchema, targetType);
}
@Override
protected Object convertFixed(Object sourceValue, Schema sourceSchema, Type targetType) {
if (sourceValue instanceof GenericData.Fixed) {
return ((GenericData.Fixed) sourceValue).bytes();
}
return super.convertFixed(sourceValue, sourceSchema, targetType);
}
@Override
protected Object convertUUID(Object sourceValue, Schema sourceSchema, Type targetType) {
if (sourceValue instanceof Utf8) {
return super.convertUUID(sourceValue.toString(), sourceSchema, targetType);
}
return super.convertUUID(sourceValue, sourceSchema, targetType);
}
@Override
protected Object convertTime(Object sourceValue, Schema sourceSchema, Type targetType) {
if (sourceValue instanceof Number) {
LogicalType logicalType = sourceSchema.getLogicalType();
if (logicalType instanceof LogicalTypes.TimeMicros) {
return DateTimeUtil.timeFromMicros(((Number) sourceValue).longValue());
} else if (logicalType instanceof LogicalTypes.TimeMillis) {
return DateTimeUtil.timeFromMicros(((Number) sourceValue).longValue() * 1000);
}
}
return super.convertTime(sourceValue, sourceSchema, targetType);
}
@Override
protected Object convertTimestamp(Object sourceValue, Schema sourceSchema, Types.TimestampType targetType) {
if (sourceValue instanceof Number) {
long value = ((Number) sourceValue).longValue();
LogicalType logicalType = sourceSchema.getLogicalType();
if (logicalType instanceof LogicalTypes.TimestampMillis) {
return targetType.shouldAdjustToUTC()
? DateTimeUtil.timestamptzFromMicros(value * 1000)
: DateTimeUtil.timestampFromMicros(value * 1000);
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
return targetType.shouldAdjustToUTC()
? DateTimeUtil.timestamptzFromMicros(value)
: DateTimeUtil.timestampFromMicros(value);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMillis) {
return DateTimeUtil.timestampFromMicros(value * 1000);
} else if (logicalType instanceof LogicalTypes.LocalTimestampMicros) {
return DateTimeUtil.timestampFromMicros(value);
}
}
return super.convertTimestamp(sourceValue, sourceSchema, targetType);
}
@Override
protected List<?> convertList(Object sourceValue, Schema sourceSchema, Types.ListType targetType, StructConverter<Schema> structConverter) {
Schema listSchema = sourceSchema;
Schema elementSchema = listSchema.getElementType();
List<?> sourceList;
if (sourceValue instanceof GenericData.Array) {
sourceList = (GenericData.Array<?>) sourceValue;
} else if (sourceValue instanceof List) {
sourceList = (List<?>) sourceValue;
} else {
throw new IllegalArgumentException("Cannot convert " + sourceValue.getClass().getSimpleName() + " to LIST");
}
List<Object> list = new ArrayList<>(sourceList.size());
for (Object element : sourceList) {
Object convert = convert(element, elementSchema, targetType.elementType(), structConverter);
list.add(convert);
}
return list;
}
@Override
protected Map<?, ?> convertMap(Object sourceValue, Schema sourceSchema, Types.MapType targetType, StructConverter<Schema> structConverter) {
if (sourceValue instanceof GenericData.Array) {
GenericData.Array<?> arrayValue = (GenericData.Array<?>) sourceValue;
Map<Object, Object> recordMap = new HashMap<>(arrayValue.size());
Schema kvSchema = sourceSchema.getElementType();
Schema.Field keyField = kvSchema.getFields().get(0);
Schema.Field valueField = kvSchema.getFields().get(1);
if (keyField == null || valueField == null) {
throw new IllegalStateException("Map entry schema missing key/value fields: " + kvSchema);
}
Schema keySchema = keyField.schema();
Schema valueSchema = valueField.schema();
Type keyType = targetType.keyType();
Type valueType = targetType.valueType();
for (Object element : arrayValue) {
if (element == null) {
continue;
}
GenericRecord record = (GenericRecord) element;
Object key = convert(record.get(keyField.pos()), keySchema, keyType, structConverter);
Object value = convert(record.get(valueField.pos()), valueSchema, valueType, structConverter);
recordMap.put(key, value);
}
return recordMap;
}
Schema mapSchema = sourceSchema;
Map<?, ?> sourceMap = (Map<?, ?>) sourceValue;
Map<Object, Object> adaptedMap = new HashMap<>(sourceMap.size());
Schema valueSchema = mapSchema.getValueType();
Type keyType = targetType.keyType();
Type valueType = targetType.valueType();
for (Map.Entry<?, ?> entry : sourceMap.entrySet()) {
Object rawKey = entry.getKey();
Object key = convert(rawKey, STRING_SCHEMA_INSTANCE, keyType, structConverter);
Object value = convert(entry.getValue(), valueSchema, valueType, structConverter);
adaptedMap.put(key, value);
}
return adaptedMap;
}
@Override
public Object convert(Object sourceValue, Schema sourceSchema, Type targetType) {
return convert(sourceValue, sourceSchema, targetType, this::convertStruct);
}
protected Object convertStruct(Object sourceValue, Schema sourceSchema, Type targetType) {
org.apache.iceberg.Schema schema = targetType.asStructType().asSchema();
org.apache.iceberg.data.GenericRecord result = org.apache.iceberg.data.GenericRecord.create(schema);
for (Types.NestedField f : schema.columns()) {
// Convert the value to the expected type
GenericRecord record = (GenericRecord) sourceValue;
Schema.Field sourceField = sourceSchema.getField(f.name());
if (sourceField == null) {
throw new IllegalStateException("Missing field '" + f.name()
+ "' in source schema: " + sourceSchema.getFullName());
}
Object fieldValue = convert(record.get(f.name()), sourceField.schema(), f.type());
result.setField(f.name(), fieldValue);
}
return result;
}
}

View File

@ -0,0 +1,57 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
import org.apache.avro.Schema;
import org.apache.iceberg.types.Type;
/**
* Represents the mapping between an Avro field and its corresponding Iceberg field.
* This class stores the position, key, schema, and type information needed to
* convert field values during record binding.
*/
public class FieldMapping {
private final int avroPosition;
private final String avroKey;
private final Type icebergType;
private final Schema avroSchema;
public FieldMapping(int avroPosition, String avroKey, Type icebergType, Schema avroSchema) {
this.avroPosition = avroPosition;
this.avroKey = avroKey;
this.icebergType = icebergType;
this.avroSchema = avroSchema;
}
public int avroPosition() {
return avroPosition;
}
public String avroKey() {
return avroKey;
}
public Type icebergType() {
return icebergType;
}
public Schema avroSchema() {
return avroSchema;
}
}

View File

@ -0,0 +1,494 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
import kafka.automq.table.metric.FieldMetric;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericRecord;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import static org.apache.avro.Schema.Type.ARRAY;
import static org.apache.avro.Schema.Type.NULL;
/**
* A factory that creates lazy-evaluation Record views of Avro GenericRecords.
* Field values are converted only when accessed, avoiding upfront conversion overhead.
*/
public class RecordBinder {
private final org.apache.iceberg.Schema icebergSchema;
private final TypeAdapter<Schema> typeAdapter;
private final Map<String, Integer> fieldNameToPosition;
private final FieldMapping[] fieldMappings;
// Pre-computed RecordBinders for nested STRUCT fields
private final Map<Schema, RecordBinder> nestedStructBinders;
// Field count statistics for this batch
private final AtomicLong batchFieldCount;
public RecordBinder(GenericRecord avroRecord) {
this(AvroSchemaUtil.toIceberg(avroRecord.getSchema()), avroRecord.getSchema());
}
public RecordBinder(org.apache.iceberg.Schema icebergSchema, Schema avroSchema) {
this(icebergSchema, avroSchema, new AvroValueAdapter());
}
public RecordBinder(org.apache.iceberg.Schema icebergSchema, Schema avroSchema, TypeAdapter<Schema> typeAdapter) {
this(icebergSchema, avroSchema, typeAdapter, new AtomicLong(0));
}
public RecordBinder(org.apache.iceberg.Schema icebergSchema, Schema avroSchema, TypeAdapter<Schema> typeAdapter, AtomicLong batchFieldCount) {
this.icebergSchema = icebergSchema;
this.typeAdapter = typeAdapter;
this.batchFieldCount = batchFieldCount;
// Pre-compute field name to position mapping
this.fieldNameToPosition = new HashMap<>();
for (int i = 0; i < icebergSchema.columns().size(); i++) {
fieldNameToPosition.put(icebergSchema.columns().get(i).name(), i);
}
// Initialize field mappings
this.fieldMappings = buildFieldMappings(avroSchema, icebergSchema);
// Pre-compute nested struct binders
this.nestedStructBinders = precomputeBindersMap(typeAdapter);
}
public RecordBinder createBinderForNewSchema(org.apache.iceberg.Schema icebergSchema, Schema avroSchema) {
return new RecordBinder(icebergSchema, avroSchema, typeAdapter, batchFieldCount);
}
public org.apache.iceberg.Schema getIcebergSchema() {
return icebergSchema;
}
/**
* Creates a new immutable Record view of the given Avro record.
* Each call returns a separate instance with its own data reference.
*/
public Record bind(GenericRecord avroRecord) {
if (avroRecord == null) {
return null;
}
return new AvroRecordView(avroRecord, icebergSchema, typeAdapter,
fieldNameToPosition, fieldMappings, nestedStructBinders, this);
}
/**
* Gets the accumulated field count for this batch and resets it to zero.
* Should be called after each flush to collect field statistics.
*/
public long getAndResetFieldCount() {
return batchFieldCount.getAndSet(0);
}
/**
* Adds field count to the batch total. Called by AvroRecordView instances.
*/
void addFieldCount(long count) {
batchFieldCount.addAndGet(count);
}
private FieldMapping[] buildFieldMappings(Schema avroSchema, org.apache.iceberg.Schema icebergSchema) {
Schema recordSchema = avroSchema;
FieldMapping[] mappings = new FieldMapping[icebergSchema.columns().size()];
// Unwrap UNION if it contains only one non-NULL type
recordSchema = resolveUnionElement(recordSchema);
for (int icebergPos = 0; icebergPos < icebergSchema.columns().size(); icebergPos++) {
Types.NestedField icebergField = icebergSchema.columns().get(icebergPos);
String fieldName = icebergField.name();
Schema.Field avroField = recordSchema.getField(fieldName);
if (avroField != null) {
mappings[icebergPos] = buildFieldMapping(
avroField.name(),
avroField.pos(),
icebergField.type(),
avroField.schema()
);
} else {
mappings[icebergPos] = null;
}
}
return mappings;
}
private FieldMapping buildFieldMapping(String avroFieldName, int avroPosition, Type icebergType, Schema avroType) {
if (Type.TypeID.TIMESTAMP.equals(icebergType.typeId())
|| Type.TypeID.TIME.equals(icebergType.typeId())
|| Type.TypeID.MAP.equals(icebergType.typeId())
|| Type.TypeID.LIST.equals(icebergType.typeId())
|| Type.TypeID.STRUCT.equals(icebergType.typeId())) {
avroType = resolveUnionElement(avroType);
}
return new FieldMapping(avroPosition, avroFieldName, icebergType, avroType);
}
private Schema resolveUnionElement(Schema schema) {
if (schema.getType() != Schema.Type.UNION) {
return schema;
}
// Collect all non-NULL types
List<Schema> nonNullTypes = new ArrayList<>();
for (Schema s : schema.getTypes()) {
if (s.getType() != NULL) {
nonNullTypes.add(s);
}
}
if (nonNullTypes.isEmpty()) {
throw new IllegalArgumentException("UNION schema contains only NULL type: " + schema);
} else if (nonNullTypes.size() == 1) {
// Only unwrap UNION if it contains exactly one non-NULL type (optional union)
return nonNullTypes.get(0);
} else {
// Multiple non-NULL types: non-optional union not supported
throw new UnsupportedOperationException(
"Non-optional UNION with multiple non-NULL types is not supported. " +
"Found " + nonNullTypes.size() + " non-NULL types in UNION: " + schema);
}
}
/**
* Pre-computes RecordBinders for nested STRUCT fields.
*/
private Map<Schema, RecordBinder> precomputeBindersMap(TypeAdapter<Schema> typeAdapter) {
Map<Schema, RecordBinder> binders = new IdentityHashMap<>();
for (FieldMapping mapping : fieldMappings) {
if (mapping != null) {
precomputeBindersForType(mapping.icebergType(), mapping.avroSchema(), binders, typeAdapter);
}
}
return binders;
}
/**
* Recursively precomputes binders for a given Iceberg type and its corresponding Avro schema.
*/
private void precomputeBindersForType(Type icebergType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
if (icebergType.isPrimitiveType()) {
return; // No binders needed for primitive types
}
if (icebergType.isStructType() && !avroSchema.isUnion()) {
createStructBinder(icebergType.asStructType(), avroSchema, binders, typeAdapter);
} else if (icebergType.isStructType() && avroSchema.isUnion()) {
createUnionStructBinders(icebergType.asStructType(), avroSchema, binders, typeAdapter);
} else if (icebergType.isListType()) {
createListBinder(icebergType.asListType(), avroSchema, binders, typeAdapter);
} else if (icebergType.isMapType()) {
createMapBinder(icebergType.asMapType(), avroSchema, binders, typeAdapter);
}
}
/**
* Creates binders for STRUCT types represented as Avro UNIONs.
*/
private void createUnionStructBinders(Types.StructType structType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
org.apache.iceberg.Schema schema = structType.asSchema();
SchemaBuilder.FieldAssembler<Schema> schemaBuilder = SchemaBuilder.record(avroSchema.getName()).fields()
.name("tag").type().intType().noDefault();
int tag = 0;
for (Schema unionMember : avroSchema.getTypes()) {
if (unionMember.getType() != NULL) {
schemaBuilder.name("field" + tag).type(unionMember).noDefault();
tag++;
}
}
RecordBinder structBinder = new RecordBinder(schema, schemaBuilder.endRecord(), typeAdapter, batchFieldCount);
binders.put(avroSchema, structBinder);
}
/**
* Creates a binder for a STRUCT type field.
*/
private void createStructBinder(Types.StructType structType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
org.apache.iceberg.Schema schema = structType.asSchema();
RecordBinder structBinder = new RecordBinder(schema, avroSchema, typeAdapter, batchFieldCount);
binders.put(avroSchema, structBinder);
}
/**
* Creates binders for LIST type elements (if they are STRUCT types).
*/
private void createListBinder(Types.ListType listType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
Type elementType = listType.elementType();
if (elementType.isStructType()) {
Schema elementAvroSchema = avroSchema.getElementType();
createStructBinder(elementType.asStructType(), elementAvroSchema, binders, typeAdapter);
}
}
/**
* Creates binders for MAP type keys and values (if they are STRUCT types).
* Handles two Avro representations: ARRAY of key-value records, or native MAP.
*/
private void createMapBinder(Types.MapType mapType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
Type keyType = mapType.keyType();
Type valueType = mapType.valueType();
if (ARRAY.equals(avroSchema.getType())) {
// Avro represents MAP as ARRAY of records with "key" and "value" fields
createMapAsArrayBinder(keyType, valueType, avroSchema, binders, typeAdapter);
} else {
// Avro represents MAP as native MAP type
createMapAsMapBinder(keyType, valueType, avroSchema, binders, typeAdapter);
}
}
/**
* Handles MAP represented as Avro ARRAY of {key, value} records.
*/
private void createMapAsArrayBinder(Type keyType, Type valueType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
Schema elementSchema = avroSchema.getElementType();
// Process key if it's a STRUCT
if (keyType.isStructType()) {
Schema keyAvroSchema = elementSchema.getField("key").schema();
createStructBinder(keyType.asStructType(), keyAvroSchema, binders, typeAdapter);
}
// Process value if it's a STRUCT
if (valueType.isStructType()) {
Schema valueAvroSchema = elementSchema.getField("value").schema();
createStructBinder(valueType.asStructType(), valueAvroSchema, binders, typeAdapter);
}
}
/**
* Handles MAP represented as Avro native MAP type.
*/
private void createMapAsMapBinder(Type keyType, Type valueType, Schema avroSchema,
Map<Schema, RecordBinder> binders,
TypeAdapter<Schema> typeAdapter) {
// Struct keys in native MAP are not supported by Avro
if (keyType.isStructType()) {
throw new UnsupportedOperationException("Struct keys in MAP types are not supported");
}
// Process value if it's a STRUCT
if (valueType.isStructType()) {
Schema valueAvroSchema = avroSchema.getValueType();
createStructBinder(valueType.asStructType(), valueAvroSchema, binders, typeAdapter);
}
}
private static class AvroRecordView implements Record {
private final GenericRecord avroRecord;
private final org.apache.iceberg.Schema icebergSchema;
private final TypeAdapter<Schema> typeAdapter;
private final Map<String, Integer> fieldNameToPosition;
private final FieldMapping[] fieldMappings;
private final Map<Schema, RecordBinder> nestedStructBinders;
private final RecordBinder parentBinder;
AvroRecordView(GenericRecord avroRecord,
org.apache.iceberg.Schema icebergSchema,
TypeAdapter<Schema> typeAdapter,
Map<String, Integer> fieldNameToPosition,
FieldMapping[] fieldMappings,
Map<Schema, RecordBinder> nestedStructBinders,
RecordBinder parentBinder) {
this.avroRecord = avroRecord;
this.icebergSchema = icebergSchema;
this.typeAdapter = typeAdapter;
this.fieldNameToPosition = fieldNameToPosition;
this.fieldMappings = fieldMappings;
this.nestedStructBinders = nestedStructBinders;
this.parentBinder = parentBinder;
}
@Override
public Object get(int pos) {
if (avroRecord == null) {
throw new IllegalStateException("Avro record is null");
}
if (pos < 0 || pos >= fieldMappings.length) {
throw new IndexOutOfBoundsException("Field position " + pos + " out of bounds");
}
FieldMapping mapping = fieldMappings[pos];
if (mapping == null) {
return null;
}
Object avroValue = avroRecord.get(mapping.avroPosition());
if (avroValue == null) {
return null;
}
Object result = convert(avroValue, mapping.avroSchema(), mapping.icebergType());
// Calculate and accumulate field count
long fieldCount = calculateFieldCount(result, mapping.icebergType());
parentBinder.addFieldCount(fieldCount);
return result;
}
public Object convert(Object sourceValue, Schema sourceSchema, Type targetType) {
if (targetType.typeId() == Type.TypeID.STRUCT) {
RecordBinder binder = nestedStructBinders.get(sourceSchema);
if (binder == null) {
throw new IllegalStateException("Missing nested binder for schema: " + sourceSchema);
}
return binder.bind((GenericRecord) sourceValue);
}
return typeAdapter.convert(sourceValue, (Schema) sourceSchema, targetType, this::convert);
}
/**
* Calculates the field count for a converted value based on its size.
* Large fields are counted multiple times based on the size threshold.
*/
private long calculateFieldCount(Object value, Type icebergType) {
if (value == null) {
return 0;
}
switch (icebergType.typeId()) {
case STRING:
return FieldMetric.count((CharSequence) value);
case BINARY:
return FieldMetric.count((ByteBuffer) value);
case FIXED:
return FieldMetric.count((byte[]) value);
case LIST:
return calculateListFieldCount(value, ((Types.ListType) icebergType).elementType());
case MAP:
return calculateMapFieldCount(value, (Types.MapType) icebergType);
default:
return 1; // Struct or Primitive types count as 1 field
}
}
/**
* Calculates field count for List values by summing element costs.
*/
private long calculateListFieldCount(Object list, Type elementType) {
if (list == null) {
return 0;
}
long total = 1;
if (list instanceof List) {
for (Object element : (List) list) {
total += calculateFieldCount(element, elementType);
}
}
return total;
}
/**
* Calculates field count for Map values by summing key and value costs.
*/
private long calculateMapFieldCount(Object map, Types.MapType mapType) {
if (map == null) {
return 0;
}
long total = 1;
if (map instanceof Map) {
Map<?, ?> typedMap = (Map<?, ?>) map;
if (typedMap.isEmpty()) {
return total;
}
for (Map.Entry<?, ?> entry : typedMap.entrySet()) {
total += calculateFieldCount(entry.getKey(), mapType.keyType());
total += calculateFieldCount(entry.getValue(), mapType.valueType());
}
}
return total;
}
@Override
public Object getField(String name) {
Integer position = fieldNameToPosition.get(name);
return position != null ? get(position) : null;
}
@Override
public Types.StructType struct() {
return icebergSchema.asStruct();
}
@Override
public int size() {
return icebergSchema.columns().size();
}
@Override
public <T> T get(int pos, Class<T> javaClass) {
return javaClass.cast(get(pos));
}
// Unsupported operations
@Override
public void setField(String name, Object value) {
throw new UnsupportedOperationException("Read-only");
}
@Override
public Record copy() {
throw new UnsupportedOperationException("Read-only");
}
@Override
public Record copy(Map<String, Object> overwriteValues) {
throw new UnsupportedOperationException("Read-only");
}
@Override
public <T> void set(int pos, T value) {
throw new UnsupportedOperationException("Read-only");
}
}
}

View File

@ -16,16 +16,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
package kafka.automq.table.worker.convert;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.data.Record;
@FunctionalInterface
public interface StructConverter<S> {
public interface IcebergRecordConverter<R> {
Record convertRecord(R record);
/**
* Return processed field count
*/
long fieldCount();
}
Object convert(Object sourceValue, S sourceSchema, Type targetType);
}

View File

@ -0,0 +1,50 @@
/*
* Copyright 2025, AutoMQ HK Limited.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.automq.table.binder;
import org.apache.iceberg.types.Type;
/**
* Converts values between different schema systems.
*
* @param <S> The source schema type (e.g., org.apache.avro.Schema)
*/
public interface TypeAdapter<S> {
/**
* Converts a source value to the target Iceberg type.
*
* @param sourceValue The source value
* @param sourceSchema The source schema
* @param targetType The target Iceberg type
* @return The converted value
*/
Object convert(Object sourceValue, S sourceSchema, Type targetType);
/**
* Converts a source value to the target Iceberg type with support for recursive struct conversion.
*
* @param sourceValue The source value
* @param sourceSchema The source schema
* @param targetType The target Iceberg type
* @param structConverter A callback for converting nested STRUCT types
* @return The converted value
*/
Object convert(Object sourceValue, S sourceSchema, Type targetType, StructConverter<S> structConverter);
}

View File

@ -20,7 +20,6 @@
package kafka.automq.table.coordinator;
import kafka.automq.table.Channel;
import kafka.automq.table.TableTopicMetricsManager;
import kafka.automq.table.events.CommitRequest;
import kafka.automq.table.events.CommitResponse;
import kafka.automq.table.events.Envelope;
@ -28,6 +27,7 @@ import kafka.automq.table.events.Errors;
import kafka.automq.table.events.Event;
import kafka.automq.table.events.EventType;
import kafka.automq.table.events.WorkerOffset;
import kafka.automq.table.metric.TableTopicMetricsManager;
import kafka.automq.table.utils.PartitionUtil;
import kafka.automq.table.utils.TableIdentifierUtil;
import kafka.log.streamaspect.MetaKeyValue;
@ -36,6 +36,7 @@ import kafka.server.MetadataCache;
import org.apache.kafka.storage.internals.log.LogConfig;
import com.automq.stream.s3.metrics.Metrics;
import com.automq.stream.s3.metrics.TimerUtil;
import com.automq.stream.utils.Systems;
import com.automq.stream.utils.Threads;
@ -61,13 +62,10 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
@ -82,24 +80,8 @@ public class TableCoordinator implements Closeable {
private static final String SNAPSHOT_COMMIT_ID = "automq.commit.id";
private static final String WATERMARK = "automq.watermark";
private static final UUID NOOP_UUID = new UUID(0, 0);
private static final Map<String, Long> WATERMARK_METRICS = new ConcurrentHashMap<>();
private static final Map<String, Double> FIELD_PER_SECONDS_METRICS = new ConcurrentHashMap<>();
private static final long NOOP_WATERMARK = -1L;
static {
TableTopicMetricsManager.setDelaySupplier(() -> {
Map<String, Long> delay = new HashMap<>(WATERMARK_METRICS.size());
long now = System.currentTimeMillis();
WATERMARK_METRICS.forEach((topic, watermark) -> {
if (watermark != NOOP_WATERMARK) {
delay.put(topic, now - watermark);
}
});
return delay;
});
TableTopicMetricsManager.setFieldsPerSecondSupplier(() -> FIELD_PER_SECONDS_METRICS);
}
private final Catalog catalog;
private final String topic;
private final String name;
@ -113,9 +95,11 @@ public class TableCoordinator implements Closeable {
private final long commitTimeout = TimeUnit.SECONDS.toMillis(30);
private volatile boolean closed = false;
private final Supplier<LogConfig> config;
private final Metrics.LongGaugeBundle.LongGauge delayMetric;
private final Metrics.DoubleGaugeBundle.DoubleGauge fieldsPerSecondMetric;
public TableCoordinator(Catalog catalog, String topic, MetaStream metaStream, Channel channel,
EventLoop eventLoop, MetadataCache metadataCache, Supplier<LogConfig> config) {
EventLoop eventLoop, MetadataCache metadataCache, Supplier<LogConfig> config) {
this.catalog = catalog;
this.topic = topic;
this.name = topic;
@ -125,13 +109,15 @@ public class TableCoordinator implements Closeable {
this.metadataCache = metadataCache;
this.config = config;
this.tableIdentifier = TableIdentifierUtil.of(config.get().tableTopicNamespace, topic);
this.delayMetric = TableTopicMetricsManager.registerDelay(topic);
this.fieldsPerSecondMetric = TableTopicMetricsManager.registerFieldsPerSecond(topic);
}
private CommitStatusMachine commitStatusMachine;
public void start() {
WATERMARK_METRICS.put(topic, -1L);
FIELD_PER_SECONDS_METRICS.put(topic, 0.0);
delayMetric.clear();
fieldsPerSecondMetric.record(0.0);
// await for a while to avoid multi coordinators concurrent commit.
SCHEDULER.schedule(() -> {
@ -157,8 +143,8 @@ public class TableCoordinator implements Closeable {
public void close() {
// quick close
closed = true;
WATERMARK_METRICS.remove(topic);
FIELD_PER_SECONDS_METRICS.remove(topic);
delayMetric.close();
fieldsPerSecondMetric.close();
eventLoop.execute(() -> {
if (commitStatusMachine != null) {
commitStatusMachine.close();
@ -189,7 +175,7 @@ public class TableCoordinator implements Closeable {
commitStatusMachine.nextRoundCommit();
break;
case REQUEST_COMMIT:
commitStatusMachine.tryMoveToCommitedStatus();
commitStatusMachine.tryMoveToCommittedStatus();
break;
default:
LOGGER.error("[TABLE_COORDINATOR_UNKNOWN_STATUS],{}", commitStatusMachine.status);
@ -339,7 +325,7 @@ public class TableCoordinator implements Closeable {
channel.send(topic, new Event(time.milliseconds(), EventType.COMMIT_REQUEST, commitRequest));
}
public void tryMoveToCommitedStatus() throws Exception {
public void tryMoveToCommittedStatus() throws Exception {
for (; ; ) {
boolean awaitCommitTimeout = (time.milliseconds() - requestCommitTimestamp) > commitTimeout;
if (!awaitCommitTimeout) {
@ -402,11 +388,19 @@ public class TableCoordinator implements Closeable {
deleteFiles.forEach(delta::addDeletes);
delta.commit();
}
transaction.expireSnapshots()
.expireOlderThan(System.currentTimeMillis() - TimeUnit.HOURS.toMillis(1))
.retainLast(1)
.executeDeleteWith(EXPIRE_SNAPSHOT_EXECUTOR)
.commit();
try {
LogConfig currentLogConfig = config.get();
if (currentLogConfig.tableTopicExpireSnapshotEnabled) {
transaction.expireSnapshots()
.expireOlderThan(System.currentTimeMillis() - TimeUnit.HOURS.toMillis(currentLogConfig.tableTopicExpireSnapshotOlderThanHours))
.retainLast(currentLogConfig.tableTopicExpireSnapshotRetainLast)
.executeDeleteWith(EXPIRE_SNAPSHOT_EXECUTOR)
.commit();
}
} catch (Exception exception) {
// skip expire snapshot failure
LOGGER.error("[EXPIRE_SNAPSHOT_FAIL],{}", getTable().name(), exception);
}
}
recordMetrics();
@ -474,9 +468,15 @@ public class TableCoordinator implements Closeable {
}
private void recordMetrics() {
double fps = commitFieldCount * 1000.0 / Math.max(System.currentTimeMillis() - lastCommitTimestamp, 1);
FIELD_PER_SECONDS_METRICS.computeIfPresent(topic, (k, v) -> fps);
WATERMARK_METRICS.computeIfPresent(topic, (k, v) -> watermark(partitionWatermarks));
long now = System.currentTimeMillis();
double fps = commitFieldCount * 1000.0 / Math.max(now - lastCommitTimestamp, 1);
fieldsPerSecondMetric.record(fps);
long watermarkTimestamp = watermark(partitionWatermarks);
if (watermarkTimestamp == NOOP_WATERMARK) {
delayMetric.clear();
} else {
delayMetric.record(Math.max(now - watermarkTimestamp, 0));
}
}
private boolean tryEvolvePartition() {

Some files were not shown because too many files have changed in this diff Show More