diff --git a/.gitignore b/.gitignore
index a7158425..d7d8a799 100644
--- a/.gitignore
+++ b/.gitignore
@@ -376,3 +376,5 @@ datasets/*
 
 # qnn 3rdParty
 source/backend/qnn/3rdParty/include
+apps/iOS/MNNLLMChat/Chat
+apps/iOS/MNNLLMChat/swift-transformers
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS.xcodeproj/project.pbxproj b/apps/iOS/MNNLLMChat/MNNLLMiOS.xcodeproj/project.pbxproj
index e5a73de4..1f6d4aec 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS.xcodeproj/project.pbxproj
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS.xcodeproj/project.pbxproj
@@ -52,7 +52,7 @@
 			isa = PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet;
 			buildPhase = 3E8591FA2D1D45070067B46F /* Sources */;
 			membershipExceptions = (
-				LLMWrapper/DiffusionSession.h,
+				InferenceEngine/DiffusionSession.h,
 			);
 		};
 /* End PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet section */
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/Contents.json b/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/Contents.json
new file mode 100644
index 00000000..a37c8724
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/Contents.json
@@ -0,0 +1,21 @@
+{
+  "images" : [
+    {
+      "filename" : "star.png",
+      "idiom" : "universal",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "universal",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "universal",
+      "scale" : "3x"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/star.png b/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/star.png
new file mode 100644
index 00000000..48ec8ec5
Binary files /dev/null and b/apps/iOS/MNNLLMChat/MNNLLMiOS/Assets.xcassets/star.imageset/star.png differ
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Interactor/LLMChatInteractor.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Interactor/LLMChatInteractor.swift
index c863804c..c7d8669a 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Interactor/LLMChatInteractor.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Interactor/LLMChatInteractor.swift
@@ -63,7 +63,7 @@ final class LLMChatInteractor: ChatInteractorProtocol {
         }
         
         Task {
-            var status: Message.Status = .sending
+            let status: Message.Status = .sending
             
             var sender: LLMChatUser
             switch userType {
@@ -74,13 +74,15 @@ final class LLMChatInteractor: ChatInteractorProtocol {
             case .system:
                 sender = chatData.system
             }
-            var message: LLMChatMessage = await draftMessage.toLLMChatMessage(
+            let message: LLMChatMessage = await draftMessage.toLLMChatMessage(
                 id: UUID().uuidString,
                 user: sender,
                 status: status)
             
             DispatchQueue.main.async { [weak self] in
                 
+//                PerformanceMonitor.shared.recordUIUpdate()
+                
                 switch userType {
                 case .user, .system:
                     self?.chatState.value.append(message)
@@ -97,18 +99,24 @@ final class LLMChatInteractor: ChatInteractorProtocol {
                     
                 case .assistant:
                     
-                    var updateLastMsg = self?.chatState.value[(self?.chatState.value.count ?? 1) - 1]
-                    
-                    if let isDeepSeek = self?.modelInfo.name.lowercased().contains("deepseek"), isDeepSeek == true,
-                        let text = self?.processor.process(progress: message.text) {
-                        updateLastMsg?.text = text
-                    } else {
-                        updateLastMsg?.text += message.text
-                    }
-                    
-                    message.text = self?.chatState.value[(self?.chatState.value.count ?? 1) - 1].text ?? ""
-                    
-                    self?.chatState.value[(self?.chatState.value.count ?? 1) - 1] = updateLastMsg ?? message
+//                    PerformanceMonitor.shared.measureExecutionTime(operation: "String concatenation") {
+                        var updateLastMsg = self?.chatState.value[(self?.chatState.value.count ?? 1) - 1]
+                        
+                        if let isDeepSeek = self?.modelInfo.modelName.lowercased().contains("deepseek"), isDeepSeek == true,
+                            let text = self?.processor.process(progress: message.text) {
+                            updateLastMsg?.text = text
+                        } else {
+                            if let currentText = updateLastMsg?.text {
+                                updateLastMsg?.text = currentText + message.text
+                            } else {
+                                updateLastMsg?.text = message.text
+                            }
+                        }
+                        
+                        if let updatedMsg = updateLastMsg {
+                            self?.chatState.value[(self?.chatState.value.count ?? 1) - 1] = updatedMsg
+                        }
+//                    }
                 }
             }
         }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatData.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatData.swift
index c344e814..3b26b7a9 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatData.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatData.swift
@@ -25,13 +25,13 @@ final class LLMChatData {
         
         self.assistant = LLMChatUser(
             uid: "2",
-            name: modelInfo.name,
+            name: modelInfo.modelName,
             avatar: AssetExtractor.createLocalUrl(forImageNamed: icon, withExtension: "png")
         )
         
         self.system = LLMChatUser(
             uid: "0",
-            name: modelInfo.name,
+            name: modelInfo.modelName,
             avatar: AssetExtractor.createLocalUrl(forImageNamed: icon, withExtension: "png")
         )
     }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMState.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMState.swift
index 606b4186..d1800e4d 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMState.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMState.swift
@@ -19,7 +19,7 @@ actor LLMState {
         return isProcessing
     }
     
-    func processContent(_ content: String, llm: LLMInferenceEngineWrapper?, completion: @escaping (String) -> Void) {
-        llm?.processInput(content, withOutput: completion)
+    func processContent(_ content: String, llm: LLMInferenceEngineWrapper?, showPerformance: Bool, completion: @escaping (String) -> Void) {
+        llm?.processInput(content, withOutput: completion, showPerformance: true)
     }
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/ModelConfigManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/ModelConfigManager.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/ModelConfigManager.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/ModelConfigManager.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/PerformanceMonitor.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/PerformanceMonitor.swift
new file mode 100644
index 00000000..b0520fac
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/PerformanceMonitor.swift
@@ -0,0 +1,121 @@
+//
+//  PerformanceMonitor.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import Foundation
+import UIKit
+
+/**
+ * PerformanceMonitor - A singleton utility for monitoring and measuring UI performance
+ * 
+ * This class provides real-time performance monitoring capabilities to help identify
+ * UI update bottlenecks, frame drops, and slow operations in iOS applications.
+ * It's particularly useful during development to ensure smooth user experience.
+ * 
+ * Key Features:
+ * - Real-time FPS monitoring and frame drop detection
+ * - UI update lag detection with customizable thresholds
+ * - Execution time measurement for specific operations
+ * - Automatic performance statistics reporting
+ * - Thread-safe singleton implementation
+ * 
+ * Usage Examples:
+ * 
+ * 1. Monitor UI Updates:
+ * ```swift
+ * // Call this in your UI update methods
+ * PerformanceMonitor.shared.recordUIUpdate()
+ * ```
+ * 
+ * 2. Measure Operation Performance:
+ * ```swift
+ * let result = PerformanceMonitor.shared.measureExecutionTime(operation: "Data Processing") {
+ *     // Your expensive operation here
+ *     return processLargeDataSet()
+ * }
+ * ```
+ * 
+ * 3. Integration in ViewModels:
+ * ```swift
+ * func updateUI() {
+ *     PerformanceMonitor.shared.recordUIUpdate()
+ *     // Your UI update code
+ * }
+ * ```
+ * 
+ * Performance Thresholds:
+ * - Target FPS: 60 FPS
+ * - Frame threshold: 25ms (1.5x normal frame time)
+ * - Slow operation threshold: 16ms (1 frame time)
+ */
+class PerformanceMonitor {
+    static let shared = PerformanceMonitor()
+    
+    private var lastUpdateTime: Date = Date()
+    private var updateCount: Int = 0
+    private var frameDropCount: Int = 0
+    private let targetFPS: Double = 60.0
+    private let frameThreshold: TimeInterval = 1.0 / 60.0 * 1.5 // Allow 1.5x normal frame time
+    
+    private init() {}
+    
+    /**
+     * Records a UI update event and monitors performance metrics
+     * 
+     * Call this method whenever you perform UI updates to track performance.
+     * It automatically detects frame drops and calculates FPS statistics.
+     * Performance statistics are logged every second.
+     */
+    func recordUIUpdate() {
+        let currentTime = Date()
+        let timeDiff = currentTime.timeIntervalSince(lastUpdateTime)
+        
+        updateCount += 1
+        
+        // Detect frame drops
+        if timeDiff > frameThreshold {
+            frameDropCount += 1
+            print("⚠️ UI Update Lag detected: \(timeDiff * 1000)ms (expected: \(frameThreshold * 1000)ms)")
+        }
+        
+        // Report statistics every second
+        if timeDiff >= 1.0 {
+            let actualFPS = Double(updateCount) / timeDiff
+            let dropRate = Double(frameDropCount) / Double(updateCount) * 100
+            
+            print("📊 Performance Stats - FPS: \(String(format: "%.1f", actualFPS)), Drop Rate: \(String(format: "%.1f", dropRate))%")
+            
+            // Reset counters for next measurement cycle
+            updateCount = 0
+            frameDropCount = 0
+            lastUpdateTime = currentTime
+        }
+    }
+    
+    /**
+     * Measures execution time for a specific operation
+     * 
+     * Wraps any operation and measures its execution time. Operations taking
+     * longer than 16ms (1 frame time) are logged as slow operations.
+     * 
+     * - Parameters:
+     *   - operation: A descriptive name for the operation being measured
+     *   - block: The operation to measure
+     * - Returns: The result of the operation
+     * - Throws: Re-throws any error thrown by the operation
+     */
+    func measureExecutionTime<T>(operation: String, block: () throws -> T) rethrows -> T {
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let result = try block()
+        let executionTime = CFAbsoluteTimeGetCurrent() - startTime
+        
+        if executionTime > 0.016 { // Over 16ms (1 frame time)
+            print("⏱️ Slow Operation: \(operation) took \(String(format: "%.3f", executionTime * 1000))ms")
+        }
+        
+        return result
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/README_Performance.md b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/README_Performance.md
new file mode 100644
index 00000000..1baf9cd0
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/README_Performance.md
@@ -0,0 +1,214 @@
+# UI Performance Optimization Guide
+
+## Overview
+
+This document explains the performance optimization utilities implemented in the chat system to ensure smooth AI streaming text output and overall UI responsiveness.
+
+## Core Components
+
+### 1. PerformanceMonitor
+
+A singleton utility for real-time performance monitoring and measurement.
+
+#### Implementation Principles
+
+- **Real-time FPS Monitoring**: Tracks UI update frequency and calculates actual FPS
+- **Frame Drop Detection**: Identifies when UI updates exceed the 16.67ms threshold (60 FPS)
+- **Operation Time Measurement**: Measures execution time of specific operations
+- **Automatic Statistics Reporting**: Logs performance metrics every second
+
+#### Key Features
+
+```swift
+class PerformanceMonitor {
+    static let shared = PerformanceMonitor()
+    
+    // Performance thresholds
+    private let targetFPS: Double = 60.0
+    private let frameThreshold: TimeInterval = 1.0 / 60.0 * 1.5 // 25ms threshold
+    
+    func recordUIUpdate() { /* Track UI updates */ }
+    func measureExecutionTime<T>(operation: String, block: () throws -> T) { /* Measure operations */ }
+}
+```
+
+#### Usage in the Chat System
+
+- Integrated into `LLMChatInteractor` to monitor message updates
+- Tracks UI update frequency during AI text streaming
+- Identifies performance bottlenecks in real-time
+
+### 2. UIUpdateOptimizer
+
+An actor-based utility for batching and throttling UI updates during streaming scenarios.
+
+#### Implementation Principles
+
+- **Batching Mechanism**: Groups multiple small updates into larger, more efficient ones
+- **Time-based Throttling**: Limits update frequency to prevent UI overload
+- **Actor-based Thread Safety**: Ensures safe concurrent access to update queue
+- **Automatic Flush Strategy**: Intelligently decides when to apply batched updates
+
+#### Architecture
+
+```swift
+actor UIUpdateOptimizer {
+    static let shared = UIUpdateOptimizer()
+    
+    private var pendingUpdates: [String] = []
+    private let batchSize: Int = 5          // Batch threshold
+    private let flushInterval: TimeInterval = 0.03 // 30ms throttling
+    
+    func addUpdate(_ content: String, completion: @escaping (String) -> Void)
+    func forceFlush(completion: @escaping (String) -> Void)
+}
+```
+
+#### Optimization Strategies
+
+1. **Batch Size Control**: Groups up to 5 updates before flushing
+2. **Time-based Throttling**: Flushes updates every 30ms maximum
+3. **Intelligent Scheduling**: Cancels redundant flush operations
+4. **Main Thread Delegation**: Ensures UI updates occur on the main thread
+
+#### Integration Points
+
+- **LLM Streaming**: Optimizes real-time text output from AI models
+- **Message Updates**: Batches frequent message content changes
+- **Force Flush**: Ensures final content is displayed when streaming ends
+
+## Performance Optimization Flow
+
+```
+AI Model Output → UIUpdateOptimizer → Batched Updates → UI Thread → Display
+                     ↓
+            PerformanceMonitor (Monitoring)
+                     ↓
+              Console Logs (Metrics)
+```
+
+## Testing and Validation
+
+### Performance Metrics
+
+1. **Target Performance**:
+   - Maintain 50+ FPS during streaming
+   - Keep frame drop rate below 5%
+   - Single operations under 16ms
+
+2. **Monitoring Indicators**:
+   - `📊 Performance Stats` - Real-time FPS and drop rate
+   - `⚠️ UI Update Lag detected` - Frame drop warnings
+   - `⏱️ Slow Operation` - Operation time alerts
+
+### Testing Methodology
+
+1. **Streaming Tests**:
+   - Test with long-form AI responses (articles, code)
+   - Monitor console output for performance warnings
+   - Observe visual smoothness of text animation
+
+2. **Load Testing**:
+   - Rapid successive message sending
+   - Large text blocks processing
+   - Multiple concurrent operations
+
+3. **Comparative Analysis**:
+   - Before/after optimization measurements
+   - Different device performance profiles
+   - Various content types and sizes
+
+### Debug Configuration
+
+For development and testing purposes:
+
+```swift
+// Example configuration adjustments (not implemented in production)
+// UIUpdateOptimizer.shared.batchSize = 10
+// UIUpdateOptimizer.shared.flushInterval = 0.05
+```
+
+## Implementation Details
+
+### UIUpdateOptimizer Algorithm
+
+1. **Add Update**: New content is appended to pending queue
+2. **Threshold Check**: Evaluate if immediate flush is needed
+   - Batch size reached (≥5 updates)
+   - Time threshold exceeded (≥30ms since last flush)
+3. **Scheduling**: If not immediate, schedule delayed flush
+4. **Flush Execution**: Combine all pending updates and execute on main thread
+5. **Cleanup**: Clear queue and reset timing
+
+### PerformanceMonitor Algorithm
+
+1. **Update Recording**: Track each UI update call
+2. **Timing Analysis**: Calculate time difference between updates
+3. **Frame Drop Detection**: Compare against 25ms threshold
+4. **Statistics Calculation**: Compute FPS and drop rate every second
+5. **Logging**: Output performance metrics to console
+
+## Integration Examples
+
+### In ViewModels
+
+```swift
+func updateUI() {
+    PerformanceMonitor.shared.recordUIUpdate()
+    // UI update code here
+}
+
+let result = PerformanceMonitor.shared.measureExecutionTime(operation: "Data Processing") {
+    return processLargeDataSet()
+}
+```
+
+### In Streaming Scenarios
+
+```swift
+await UIUpdateOptimizer.shared.addUpdate(newText) { batchedContent in
+    // Update UI with optimized batched content
+    updateTextView(with: batchedContent)
+}
+
+// When stream ends
+await UIUpdateOptimizer.shared.forceFlush { finalContent in
+    finalizeTextDisplay(with: finalContent)
+}
+```
+
+## Troubleshooting
+
+### Common Performance Issues
+
+1. **High Frame Drop Rate**:
+   - Check for blocking operations on main thread
+   - Verify batch size configuration
+   - Monitor memory usage
+
+2. **Slow Operation Warnings**:
+   - Profile specific operations causing delays
+   - Consider background threading for heavy tasks
+   - Optimize data processing algorithms
+
+3. **Inconsistent Performance**:
+   - Check device thermal state
+   - Monitor memory pressure
+   - Verify background app activity
+
+### Diagnostic Tools
+
+- **Console Monitoring**: Watch for performance log messages
+- **Xcode Instruments**: Use Time Profiler for detailed analysis
+- **Memory Graph**: Check for memory leaks affecting performance
+- **Energy Impact**: Monitor battery and thermal effects
+
+## Best Practices
+
+1. **Proactive Monitoring**: Always call `recordUIUpdate()` for critical UI operations
+2. **Batch When Possible**: Use `UIUpdateOptimizer` for frequent updates
+3. **Measure Critical Paths**: Wrap expensive operations with `measureExecutionTime`
+4. **Test on Real Devices**: Performance varies significantly across device types
+5. **Monitor in Production**: Keep performance logging enabled during development
+
+This performance optimization system ensures smooth user experience during AI text generation while providing developers with the tools needed to maintain and improve performance over time.
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/UIUpdateOptimizer.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/UIUpdateOptimizer.swift
new file mode 100644
index 00000000..a877a170
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Services/UIUpdateOptimizer.swift
@@ -0,0 +1,136 @@
+//
+//  UIUpdateOptimizer.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/7.
+//
+
+import Foundation
+import SwiftUI
+
+/**
+ * UIUpdateOptimizer - A utility for batching and throttling UI updates to improve performance
+ * 
+ * This actor-based optimizer helps reduce the frequency of UI updates by batching multiple
+ * updates together and applying throttling mechanisms. It's particularly useful for scenarios
+ * like streaming text updates, real-time data feeds, or any situation where frequent UI
+ * updates might cause performance issues.
+ * 
+ * Key Features:
+ * - Batches multiple updates into a single operation
+ * - Applies time-based throttling to limit update frequency
+ * - Thread-safe actor implementation
+ * - Automatic flush mechanism for pending updates
+ * 
+ * Usage Example:
+ * ```swift
+ * // For streaming text updates
+ * await UIUpdateOptimizer.shared.addUpdate(newText) { batchedContent in
+ *     // Update UI with batched content
+ *     textView.text = batchedContent
+ * }
+ * 
+ * // Force flush remaining updates when stream ends
+ * await UIUpdateOptimizer.shared.forceFlush { finalContent in
+ *     textView.text = finalContent
+ * }
+ * ```
+ * 
+ * Configuration:
+ * - batchSize: Number of updates to batch before triggering immediate flush (default: 5)
+ * - flushInterval: Time interval in seconds between automatic flushes (default: 0.03s / 30ms)
+ */
+actor UIUpdateOptimizer {
+    static let shared = UIUpdateOptimizer()
+    
+    private var pendingUpdates: [String] = []
+    private var lastFlushTime: Date = Date()
+    private var flushTask: Task<Void, Never>?
+    
+    // Configuration constants
+    private let batchSize: Int = 5          // Batch size threshold for immediate flush
+    private let flushInterval: TimeInterval = 0.03 // 30ms throttling interval
+    
+    private init() {}
+    
+    /**
+     * Adds a content update to the pending queue
+     * 
+     * Updates are either flushed immediately if batch size or time threshold is reached,
+     * or scheduled for delayed flushing to optimize performance.
+     * 
+     * - Parameters:
+     *   - content: The content string to add to the update queue
+     *   - completion: Callback executed with the batched content when flushed
+     */
+    func addUpdate(_ content: String, completion: @escaping (String) -> Void) {
+        pendingUpdates.append(content)
+        
+        // Determine if immediate flush is needed based on batch size or time interval
+        let shouldFlushImmediately = pendingUpdates.count >= batchSize ||
+                                   Date().timeIntervalSince(lastFlushTime) >= flushInterval
+        
+        if shouldFlushImmediately {
+            flushUpdates(completion: completion)
+        } else {
+            // Schedule delayed flush to optimize performance
+            scheduleFlush(completion: completion)
+        }
+    }
+    
+    /**
+     * Schedules a delayed flush operation
+     * 
+     * Cancels any existing scheduled flush and creates a new one to avoid
+     * excessive flush operations while maintaining responsiveness.
+     * 
+     * - Parameter completion: Callback to execute when flush occurs
+     */
+    private func scheduleFlush(completion: @escaping (String) -> Void) {
+        // Cancel previous scheduled flush to avoid redundant operations
+        flushTask?.cancel()
+        
+        flushTask = Task {
+            try? await Task.sleep(nanoseconds: UInt64(flushInterval * 1_000_000_000))
+            
+            if !Task.isCancelled && !pendingUpdates.isEmpty {
+                flushUpdates(completion: completion)
+            }
+        }
+    }
+    
+    /**
+     * Flushes all pending updates immediately
+     * 
+     * Combines all pending updates into a single string and executes the completion
+     * callback on the main actor thread for UI updates.
+     * 
+     * - Parameter completion: Callback executed with the combined content
+     */
+    private func flushUpdates(completion: @escaping (String) -> Void) {
+        guard !pendingUpdates.isEmpty else { return }
+        
+        let batchedContent = pendingUpdates.joined()
+        pendingUpdates.removeAll()
+        lastFlushTime = Date()
+        
+        Task { @MainActor in
+            completion(batchedContent)
+        }
+    }
+    
+    /**
+     * Forces immediate flush of any remaining pending updates
+     * 
+     * This method should be called when you need to ensure all pending updates
+     * are processed immediately, such as when a stream ends or the view is about
+     * to disappear.
+     * 
+     * - Parameter completion: Callback executed with any remaining content
+     */
+    func forceFlush(completion: @escaping (String) -> Void) {
+        if !pendingUpdates.isEmpty {
+            flushUpdates(completion: completion)
+        }
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatViewModel.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/ViewModels/LLMChatViewModel.swift
similarity index 76%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatViewModel.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/ViewModels/LLMChatViewModel.swift
index 21633d98..8b2a5e26 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Models/LLMChatViewModel.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/ViewModels/LLMChatViewModel.swift
@@ -11,7 +11,6 @@ import AVFoundation
 import ExyteChat
 import ExyteMediaPicker
 
-
 final class LLMChatViewModel: ObservableObject {
     
     private var llm: LLMInferenceEngineWrapper?
@@ -21,6 +20,7 @@ final class LLMChatViewModel: ObservableObject {
     @Published var messages: [Message] = []
     @Published var isModelLoaded = false
     @Published var isProcessing: Bool = false
+    @Published var currentStreamingMessageId: String? = nil
     
     @Published var useMmap: Bool = false
     
@@ -57,7 +57,7 @@ final class LLMChatViewModel: ObservableObject {
     let modelConfigManager: ModelConfigManager
     
     var isDiffusionModel: Bool {
-        return modelInfo.name.lowercased().contains("diffusion")
+        return modelInfo.modelName.lowercased().contains("diffusion")
     }
     
     init(modelInfo: ModelInfo, history: ChatHistory? = nil) {
@@ -88,7 +88,7 @@ final class LLMChatViewModel: ObservableObject {
             ), userType: .system)
         }
 
-        if modelInfo.name.lowercased().contains("diffusion") {
+        if modelInfo.modelName.lowercased().contains("diffusion") {
             diffusion = DiffusionSession(modelPath: modelPath, completion: { [weak self] success in
                 Task { @MainActor in
                     print("Diffusion Model \(success)")
@@ -150,7 +150,7 @@ final class LLMChatViewModel: ObservableObject {
     func sendToLLM(draft: DraftMessage) {
         self.send(draft: draft, userType: .user)
         if isModelLoaded {
-            if modelInfo.name.lowercased().contains("diffusion") {
+            if modelInfo.modelName.lowercased().contains("diffusion") {
                 self.getDiffusionResponse(draft: draft)
             } else {
                 self.getLLMRespsonse(draft: draft)
@@ -166,9 +166,7 @@ final class LLMChatViewModel: ObservableObject {
         
         Task {
             
-            let tempDir = FileManager.default.temporaryDirectory
-            let imageName = UUID().uuidString + ".jpg"
-            let tempImagePath = tempDir.appendingPathComponent(imageName).path
+            let tempImagePath = FileOperationManager.shared.generateTempImagePath().path
 
             var lastProcess:Int32 = 0
             
@@ -198,7 +196,21 @@ final class LLMChatViewModel: ObservableObject {
     func getLLMRespsonse(draft: DraftMessage) {
         Task {
             await llmState.setProcessing(true)
-            await MainActor.run { self.isProcessing = true }
+            await MainActor.run { 
+                self.isProcessing = true
+                let emptyMessage = DraftMessage(
+                    text: "",
+                    thinkText: "",
+                    medias: [],
+                    recording: nil,
+                    replyMessage: nil,
+                    createdAt: Date()
+                )
+                self.send(draft: emptyMessage, userType: .assistant)
+                if let lastMessage = self.messages.last {
+                    self.currentStreamingMessageId = lastMessage.id
+                }
+            }
             
             var content = draft.text
             let medias = draft.medias
@@ -209,18 +221,10 @@ final class LLMChatViewModel: ObservableObject {
                     continue
                 }
 
-                let isInTempDirectory = url.path.contains("/tmp/")
                 let fileName = url.lastPathComponent
                 
-                if !isInTempDirectory {
-                    guard let fileUrl = AssetExtractor.copyFileToTmpDirectory(from: url, fileName: fileName) else {
-                        continue
-                    }
-                    let processedUrl = convertHEICImage(from: fileUrl)
-                    content = "<img>\(processedUrl?.path ?? "")</img>" + content
-                } else {
-                    let processedUrl = convertHEICImage(from: url)
-                    content = "<img>\(processedUrl?.path ?? "")</img>" + content
+                if let processedUrl = FileOperationManager.shared.processImageFile(from: url, fileName: fileName) {
+                    content = "<img>\(processedUrl.path)</img>" + content
                 }
             }
             
@@ -232,13 +236,36 @@ final class LLMChatViewModel: ObservableObject {
             
             let convertedContent = self.convertDeepSeekMutliChat(content: content)
             
-            await llmState.processContent(convertedContent, llm: self.llm) { [weak self] output in
+            await llmState.processContent(convertedContent, llm: self.llm, showPerformance: true) { [weak self] output in
+                guard let self = self else { return }
+                
+                if output.contains("<eop>") {
+                    // force flush
+                    Task {
+                        await UIUpdateOptimizer.shared.forceFlush { finalOutput in
+                            if !finalOutput.isEmpty {
+                                self.send(draft: DraftMessage(
+                                    text: finalOutput,
+                                    thinkText: "",
+                                    medias: [],
+                                    recording: nil,
+                                    replyMessage: nil,
+                                    createdAt: Date()
+                                ), userType: .assistant)
+                            }
+                        }
+                        
+                        await MainActor.run {
+                            self.isProcessing = false
+                            self.currentStreamingMessageId = nil
+                        }
+                        await self.llmState.setProcessing(false)
+                    }
+                    return
+                }
                 Task { @MainActor in
-                    if (output.contains("<eop>")) {
-                        self?.isProcessing = false
-                        await self?.llmState.setProcessing(false)
-                    } else {
-                        self?.send(draft: DraftMessage(
+                await UIUpdateOptimizer.shared.addUpdate(output) { output in
+                        self.send(draft: DraftMessage(
                             text: output,
                             thinkText: "",
                             medias: [],
@@ -248,6 +275,7 @@ final class LLMChatViewModel: ObservableObject {
                         ), userType: .assistant)
                     }
                 }
+                
             }
         }
     }
@@ -259,7 +287,7 @@ final class LLMChatViewModel: ObservableObject {
     }
     
     private func convertDeepSeekMutliChat(content: String) -> String {
-        if self.modelInfo.name.lowercased().contains("deepseek") {
+        if self.modelInfo.modelName.lowercased().contains("deepseek") {
             /* formate:: <|begin_of_sentence|><|User|>{text}<|Assistant|>{text}<|end_of_sentence|>
              <|User|>{text}<|Assistant|>{text}<|end_of_sentence|>
              */
@@ -286,14 +314,11 @@ final class LLMChatViewModel: ObservableObject {
         }
     }
     
-    private func convertHEICImage(from url: URL) -> URL? {
-        var fileUrl = url
-        if fileUrl.isHEICImage() {
-            if let convertedUrl = AssetExtractor.convertHEICToJPG(heicUrl: fileUrl) {
-                fileUrl = convertedUrl
-            }
-        }
-        return fileUrl
+    // MARK: - Public Methods for File Operations
+    
+    /// Cleans the model temporary folder using FileOperationManager
+    func cleanModelTmpFolder() {
+        FileOperationManager.shared.cleanModelTempFolder(modelPath: modelInfo.localPath)
     }
     
     func onStart() {
@@ -311,14 +336,18 @@ final class LLMChatViewModel: ObservableObject {
     func onStop() {
         ChatHistoryManager.shared.saveChat(
             historyId: historyId,
-            modelId: modelInfo.modelId,
-            modelName: modelInfo.name,
+            modelId: modelInfo.id,
+            modelName: modelInfo.modelName,
             messages: messages
         )
         
         interactor.disconnect()
         llm = nil
-        self.cleanTmpFolder()
+        
+        FileOperationManager.shared.cleanTempDirectories()
+        if !useMmap {
+            FileOperationManager.shared.cleanModelTempFolder(modelPath: modelInfo.localPath)
+        }
     }
 
     func loadMoreMessage(before message: Message) {
@@ -326,40 +355,4 @@ final class LLMChatViewModel: ObservableObject {
             .sink { _ in }
             .store(in: &subscriptions)
     }
-    
-    
-    func cleanModelTmpFolder() {
-        let tmpFolderURL = URL(fileURLWithPath: self.modelInfo.localPath).appendingPathComponent("temp")
-        self.cleanFolder(tmpFolderURL: tmpFolderURL)
-    }
-    
-    private func cleanTmpFolder() {
-        let fileManager = FileManager.default
-        let tmpDirectoryURL = fileManager.temporaryDirectory
-        
-        self.cleanFolder(tmpFolderURL: tmpDirectoryURL)
-        
-        if !useMmap {
-            cleanModelTmpFolder()
-        }
-    }
-    
-    private func cleanFolder(tmpFolderURL: URL) {
-        let fileManager = FileManager.default
-        do {
-            let files = try fileManager.contentsOfDirectory(at: tmpFolderURL, includingPropertiesForKeys: nil)
-            for file in files {
-                if !file.absoluteString.lowercased().contains("networkdownload") {
-                    do {
-                        try fileManager.removeItem(at: file)
-                        print("Deleted file: \(file.path)")
-                    } catch {
-                        print("Error deleting file: \(file.path), \(error.localizedDescription)")
-                    }
-                }
-            }
-        } catch {
-            print("Error accessing tmp directory: \(error.localizedDescription)")
-        }
-    }
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMChatView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMChatView.swift
index ad9f744a..9fe84a0f 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMChatView.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMChatView.swift
@@ -17,12 +17,14 @@ struct LLMChatView: View {
     private let title: String
     private let modelPath: String
 
-    private let recorderSettings = RecorderSettings(audioFormatID: kAudioFormatLinearPCM, sampleRate: 44100, numberOfChannels: 2, linearPCMBitDepth: 16)
+    private let recorderSettings = RecorderSettings(audioFormatID: kAudioFormatLinearPCM,
+                                                    sampleRate: 44100, numberOfChannels: 2,
+                                                    linearPCMBitDepth: 16)
 
     @State private var showSettings = false
 
     init(modelInfo: ModelInfo, history: ChatHistory? = nil) {
-        self.title = modelInfo.name
+        self.title = modelInfo.modelName
         self.modelPath = modelInfo.localPath
         let viewModel = LLMChatViewModel(modelInfo: modelInfo, history: history)
         _viewModel = StateObject(wrappedValue: viewModel)
@@ -32,6 +34,15 @@ struct LLMChatView: View {
         ChatView(messages: viewModel.messages, chatType: .conversation) { draft in
             viewModel.sendToLLM(draft: draft)
         }
+        messageBuilder: { message, positionInGroup, positionInCommentsGroup, showContextMenuClosure, messageActionClosure, showAttachmentClosure in
+            LLMChatMessageView(
+                message: message,
+                positionInGroup: positionInGroup,
+                showContextMenuClosure: showContextMenuClosure,
+                messageActionClosure: messageActionClosure,
+                showAttachmentClosure: showAttachmentClosure
+            )
+        }
         .setAvailableInput(
             self.title.lowercased().contains("vl") ? .textAndMedia :
             self.title.lowercased().contains("audio") ? .textAndAudio :
@@ -109,4 +120,28 @@ struct LLMChatView: View {
         }
         .onDisappear(perform: viewModel.onStop)
     }
+    
+    // MARK: - LLM Chat Message Builder
+    @ViewBuilder
+    private func LLMChatMessageView(
+        message: Message,
+        positionInGroup: PositionInUserGroup,
+        showContextMenuClosure: @escaping () -> Void,
+        messageActionClosure: @escaping (Message, DefaultMessageMenuAction) -> Void,
+        showAttachmentClosure: @escaping (Attachment) -> Void
+    ) -> some View {
+        LLMMessageView(
+            message: message,
+            positionInGroup: positionInGroup,
+            isAssistantMessage: !message.user.isCurrentUser,
+            isStreamingMessage: viewModel.currentStreamingMessageId == message.id,
+            showContextMenuClosure: {
+                if !viewModel.isProcessing {
+                    showContextMenuClosure()
+                }
+            },
+            messageActionClosure: messageActionClosure,
+            showAttachmentClosure: showAttachmentClosure
+        )
+    }
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageTextView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageTextView.swift
new file mode 100644
index 00000000..8682082c
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageTextView.swift
@@ -0,0 +1,338 @@
+//
+//  LLMMessageTextView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/7.
+//
+
+import SwiftUI
+import MarkdownUI
+
+/**
+ * LLMMessageTextView - A specialized text view designed for LLM chat messages with typewriter animation
+ * 
+ * This SwiftUI component provides an enhanced text display specifically designed for AI chat applications.
+ * It supports both plain text and Markdown rendering with an optional typewriter animation effect
+ * that creates a dynamic, engaging user experience during AI response streaming.
+ * 
+ * Key Features:
+ * - Typewriter animation for streaming AI responses
+ * - Markdown support with custom styling
+ * - Smart animation control based on message type and content length
+ * - Automatic animation management with proper cleanup
+ * - Performance-optimized character-by-character rendering
+ * 
+ * Usage Examples:
+ * 
+ * 1. Basic AI Message with Typewriter Effect:
+ * ```swift
+ * LLMMessageTextView(
+ *     text: "Hello! This is an AI response with typewriter animation.",
+ *     messageUseMarkdown: false,
+ *     messageId: "msg_001",
+ *     isAssistantMessage: true,
+ *     isStreamingMessage: true
+ * )
+ * ```
+ * 
+ * 2. Markdown Message with Custom Styling:
+ * ```swift
+ * LLMMessageTextView(
+ *     text: "**Bold text** and *italic text* with `code blocks`",
+ *     messageUseMarkdown: true,
+ *     messageId: "msg_002",
+ *     isAssistantMessage: true,
+ *     isStreamingMessage: true
+ * )
+ * ```
+ * 
+ * 3. User Message (No Animation):
+ * ```swift
+ * LLMMessageTextView(
+ *     text: "This is a user message",
+ *     messageUseMarkdown: false,
+ *     messageId: "msg_003",
+ *     isAssistantMessage: false,
+ *     isStreamingMessage: false
+ * )
+ * ```
+ * 
+ * Animation Configuration:
+ * - typingSpeed: 0.015 seconds per character (adjustable)
+ * - chunkSize: 1 character per animation frame
+ * - Minimum text length for animation: 5 characters
+ * - Auto-cleanup on view disappear or streaming completion
+ */
+struct LLMMessageTextView: View {
+    let text: String?
+    let messageUseMarkdown: Bool
+    let messageId: String
+    let isAssistantMessage: Bool
+    let isStreamingMessage: Bool // Whether this message is currently being streamed
+    
+    @State private var displayedText: String = ""
+    @State private var animationTimer: Timer?
+    
+    // Typewriter animation configuration
+    private let typingSpeed: TimeInterval = 0.015 // Time interval per character
+    private let chunkSize: Int = 1 // Number of characters to display per frame
+    
+    init(text: String?, 
+         messageUseMarkdown: Bool = false,
+         messageId: String,
+         isAssistantMessage: Bool = false,
+         isStreamingMessage: Bool = false) {
+        self.text = text
+        self.messageUseMarkdown = messageUseMarkdown
+        self.messageId = messageId
+        self.isAssistantMessage = isAssistantMessage
+        self.isStreamingMessage = isStreamingMessage
+    }
+    
+    var body: some View {
+        Group {
+            if let text = text, !text.isEmpty {
+                if isAssistantMessage && isStreamingMessage && shouldUseTypewriter {
+                    typewriterView(text)
+                } else {
+                    staticView(text)
+                }
+            }
+        }
+        .onAppear {
+            if let text = text, isAssistantMessage && isStreamingMessage && shouldUseTypewriter {
+                startTypewriterAnimation(for: text)
+            } else if let text = text {
+                displayedText = text
+            }
+        }
+        .onDisappear {
+            stopAnimation()
+        }
+        .onChange(of: text) { oldText, newText in
+            handleTextChange(newText)
+        }
+        .onChange(of: isStreamingMessage) { oldIsStreaming, newIsStreaming in
+            if !newIsStreaming {
+                // Streaming ended, display complete text
+                if let text = text {
+                    displayedText = text
+                }
+                stopAnimation()
+            }
+        }
+    }
+    
+    /**
+     * Determines whether typewriter animation should be used
+     * 
+     * Animation is enabled only for assistant messages with more than 5 characters
+     * to avoid unnecessary animation for short responses.
+     */
+    private var shouldUseTypewriter: Bool {
+        // Enable typewriter effect only for assistant messages with sufficient length
+        return isAssistantMessage && (text?.count ?? 0) > 5
+    }
+    
+    /**
+     * Renders text with typewriter animation effect
+     * 
+     * - Parameter text: The complete text to be animated
+     * - Returns: A view displaying the animated text with optional Markdown support
+     */
+    @ViewBuilder
+    private func typewriterView(_ text: String) -> some View {
+        if messageUseMarkdown {
+            Markdown(displayedText)
+                .markdownBlockStyle(\.blockquote) { configuration in
+                  configuration.label
+                    .padding()
+                    .markdownTextStyle {
+                        FontSize(13)
+                        FontWeight(.light)
+                        BackgroundColor(nil)
+                    }
+                    .overlay(alignment: .leading) {
+                      Rectangle()
+                        .fill(Color.gray)
+                        .frame(width: 4)
+                    }
+                    .background(Color.gray.opacity(0.2))
+                }
+        } else {
+            Text(displayedText)
+        }
+    }
+    
+    /**
+     * Renders static text without animation
+     * 
+     * - Parameter text: The text to be displayed
+     * - Returns: A view displaying the complete text with optional Markdown support
+     */
+    @ViewBuilder
+    private func staticView(_ text: String) -> some View {
+        if messageUseMarkdown {
+            Markdown(text)
+                .markdownBlockStyle(\.blockquote) { configuration in
+                  configuration.label
+                    .padding()
+                    .markdownTextStyle {
+                        FontSize(13)
+                        FontWeight(.light)
+                        BackgroundColor(nil)
+                    }
+                    .overlay(alignment: .leading) {
+                      Rectangle()
+                        .fill(Color.gray)
+                        .frame(width: 4)
+                    }
+                    .background(Color.gray.opacity(0.2))
+                }
+        } else {
+            Text(text)
+        }
+    }
+    
+    /**
+     * Handles text content changes during streaming
+     * 
+     * This method intelligently manages animation continuation, restart, or direct display
+     * based on the relationship between old and new text content.
+     * 
+     * - Parameter newText: The updated text content
+     */
+    private func handleTextChange(_ newText: String?) {
+        guard let newText = newText else {
+            displayedText = ""
+            stopAnimation()
+            return
+        }
+        
+        if isAssistantMessage && isStreamingMessage && shouldUseTypewriter {
+            // Check if new text is an extension of current displayed text
+            if newText.hasPrefix(displayedText) && newText != displayedText {
+                // Continue typewriter animation
+                continueTypewriterAnimation(with: newText)
+            } else if newText != displayedText {
+                // Restart animation with new content
+                restartTypewriterAnimation(with: newText)
+            }
+        } else {
+            // Display text directly without animation
+            displayedText = newText
+            stopAnimation()
+        }
+    }
+    
+    /**
+     * Initiates typewriter animation for the given text
+     * 
+     * - Parameter text: The text to animate
+     */
+    private func startTypewriterAnimation(for text: String) {
+        displayedText = ""
+        continueTypewriterAnimation(with: text)
+    }
+    
+    /**
+     * Continues or resumes typewriter animation
+     * 
+     * This method sets up a timer-based animation that progressively reveals
+     * characters at the configured typing speed.
+     * 
+     * - Parameter text: The complete text to animate
+     */
+    private func continueTypewriterAnimation(with text: String) {
+        guard displayedText.count < text.count else { return }
+        
+        stopAnimation()
+        
+        animationTimer = Timer.scheduledTimer(withTimeInterval: typingSpeed, repeats: true) { timer in
+            DispatchQueue.main.async {
+                self.appendNextCharacters(from: text)
+            }
+        }
+    }
+    
+    /**
+     * Restarts typewriter animation with new content
+     * 
+     * - Parameter text: The new text to animate
+     */
+    private func restartTypewriterAnimation(with text: String) {
+        stopAnimation()
+        displayedText = ""
+        startTypewriterAnimation(for: text)
+    }
+    
+    /**
+     * Appends the next character(s) to the displayed text
+     * 
+     * This method is called by the animation timer to progressively reveal
+     * text characters. It handles proper string indexing and animation completion.
+     * 
+     * - Parameter text: The source text to extract characters from
+     */
+    private func appendNextCharacters(from text: String) {
+        let currentLength = displayedText.count
+        guard currentLength < text.count else {
+            stopAnimation()
+            return
+        }
+        
+        let endIndex = min(currentLength + chunkSize, text.count)
+        let startIndex = text.index(text.startIndex, offsetBy: currentLength)
+        let targetIndex = text.index(text.startIndex, offsetBy: endIndex)
+        
+        let newChars = text[startIndex..<targetIndex]
+        displayedText.append(String(newChars))
+        
+        if displayedText.count >= text.count {
+            stopAnimation()
+        }
+    }
+    
+    /**
+     * Stops and cleans up the typewriter animation
+     * 
+     * This method should be called when animation is no longer needed
+     * to prevent memory leaks and unnecessary timer execution.
+     */
+    private func stopAnimation() {
+        animationTimer?.invalidate()
+        animationTimer = nil
+    }
+}
+
+// MARK: - Preview Provider
+struct LLMMessageTextView_Previews: PreviewProvider {
+    static var previews: some View {
+        VStack(spacing: 20) {
+            LLMMessageTextView(
+                text: "This is a typewriter animation demo text. Hello, this demonstrates the streaming effect!",
+                messageUseMarkdown: false,
+                messageId: "test1",
+                isAssistantMessage: true,
+                isStreamingMessage: true
+            )
+            
+            LLMMessageTextView(
+                text: "**Bold text** and *italic text* with markdown support.",
+                messageUseMarkdown: true,
+                messageId: "test2",
+                isAssistantMessage: true,
+                isStreamingMessage: true
+            )
+            
+            LLMMessageTextView(
+                text: "Regular user message without animation.",
+                messageUseMarkdown: false,
+                messageId: "test3",
+                isAssistantMessage: false,
+                isStreamingMessage: false
+            )
+        }
+        .padding()
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageView.swift
new file mode 100644
index 00000000..59adc952
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/LLMMessageView.swift
@@ -0,0 +1,275 @@
+//
+//  LLMMessageView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/7.
+//
+
+import SwiftUI
+import Foundation
+import SwiftUI
+import ExyteChat
+
+// MARK: - Custom Message View
+struct LLMMessageView: View {
+    let message: Message
+    let positionInGroup: PositionInUserGroup
+    let isAssistantMessage: Bool
+    let isStreamingMessage: Bool
+    let showContextMenuClosure: () -> Void
+    let messageActionClosure: (Message, DefaultMessageMenuAction) -> Void
+    let showAttachmentClosure: (Attachment) -> Void
+    
+    let theme = ChatTheme(
+        colors: .init(
+            messageMyBG: .customBlue.opacity(0.2),
+            messageFriendBG: .clear
+        ),
+        images: .init(
+            attach: Image(systemName: "photo"),
+            attachCamera: Image("attachCamera", bundle: .current)
+        )
+    )
+    
+    @State var avatarViewSize: CGSize = .zero
+    @State var timeSize: CGSize = .zero
+    
+    static let widthWithMedia: CGFloat = 204
+    static let horizontalNoAvatarPadding: CGFloat = 8
+    static let horizontalAvatarPadding: CGFloat = 8
+    static let horizontalTextPadding: CGFloat = 12
+    static let horizontalAttachmentPadding: CGFloat = 1
+    static let horizontalBubblePadding: CGFloat = 70
+    
+    var additionalMediaInset: CGFloat {
+        message.attachments.count > 1 ? LLMMessageView.horizontalAttachmentPadding * 2 : 0
+    }
+    
+    var showAvatar: Bool {
+        positionInGroup == .single
+        || positionInGroup == .last
+    }
+    
+    var topPadding: CGFloat {
+        positionInGroup == .single || positionInGroup == .first ? 8 : 4
+    }
+    
+    var bottomPadding: CGFloat {
+        0
+    }
+    
+    var body: some View {
+        HStack(alignment: .top, spacing: 0) {
+            if !message.user.isCurrentUser {
+                avatarView
+            }
+            
+            VStack(alignment: message.user.isCurrentUser ? .trailing : .leading, spacing: 2) {
+                bubbleView(message)
+            }
+        }
+        .padding(.top, topPadding)
+        .padding(.bottom, bottomPadding)
+        .padding(.trailing, message.user.isCurrentUser ? LLMMessageView.horizontalNoAvatarPadding : 0)
+        .padding(message.user.isCurrentUser ? .leading : .trailing, message.user.isCurrentUser ? LLMMessageView.horizontalBubblePadding : 0)
+        .frame(maxWidth: UIScreen.main.bounds.width, alignment: message.user.isCurrentUser ? .trailing : .leading)
+        .contentShape(Rectangle())
+        .onLongPressGesture {
+            showContextMenuClosure()
+        }
+    }
+    
+    @ViewBuilder
+    func bubbleView(_ message: Message) -> some View {
+        VStack(alignment: .leading, spacing: 0) {
+            if !message.attachments.isEmpty {
+                attachmentsView(message)
+            }
+            
+            if !message.text.isEmpty {
+                textWithTimeView(message)
+            }
+            
+            if let recording = message.recording {
+                VStack(alignment: .trailing, spacing: 8) {
+                    recordingView(recording)
+                    messageTimeView()
+                        .padding(.bottom, 8)
+                        .padding(.trailing, 12)
+                }
+            }
+        }
+        .bubbleBackground(message, theme: theme)
+    }
+    
+    @ViewBuilder
+    var avatarView: some View {
+        Group {
+            if showAvatar {
+                AsyncImage(url: message.user.avatarURL) { image in
+                    image
+                        .resizable()
+                        .aspectRatio(contentMode: .fill)
+                } placeholder: {
+                    Circle()
+                        .fill(Color.gray.opacity(0.3))
+                }
+                .frame(width: 32, height: 32)
+                .clipShape(Circle())
+                .contentShape(Circle())
+            } else {
+                Color.clear.frame(width: 32, height: 32)
+            }
+        }
+        .padding(.horizontal, LLMMessageView.horizontalAvatarPadding)
+        .sizeGetter($avatarViewSize)
+    }
+    
+    @ViewBuilder
+    func attachmentsView(_ message: Message) -> some View {
+        ForEach(message.attachments, id: \.id) { attachment in
+            AsyncImage(url: attachment.thumbnail) { image in
+                image
+                    .resizable()
+                    .aspectRatio(contentMode: .fit)
+            } placeholder: {
+                Rectangle()
+                    .fill(Color.gray.opacity(0.3))
+            }
+            .frame(maxWidth: LLMMessageView.widthWithMedia, maxHeight: 200)
+            .cornerRadius(12)
+            .onTapGesture {
+                showAttachmentClosure(attachment)
+            }
+        }
+        .applyIf(message.attachments.count > 1) {
+            $0
+                .padding(.top, LLMMessageView.horizontalAttachmentPadding)
+                .padding(.horizontal, LLMMessageView.horizontalAttachmentPadding)
+        }
+        .overlay(alignment: .bottomTrailing) {
+            if message.text.isEmpty {
+                messageTimeView(needsCapsule: true)
+                    .padding(4)
+            }
+        }
+        .contentShape(Rectangle())
+    }
+    
+    @ViewBuilder
+    func textWithTimeView(_ message: Message) -> some View {
+        // Message View with Type Writer Animation
+        let messageView = LLMMessageTextView(
+            text: message.text,
+            messageUseMarkdown: true,
+            messageId: message.id,
+            isAssistantMessage: isAssistantMessage,
+            isStreamingMessage: isStreamingMessage
+        )
+        .fixedSize(horizontal: false, vertical: true)
+        .padding(.horizontal, LLMMessageView.horizontalTextPadding)
+        
+        HStack(alignment: .lastTextBaseline, spacing: 12) {
+            messageView
+            if !message.attachments.isEmpty {
+                Spacer()
+            }
+        }
+        .padding(.vertical, 8)
+    }
+    
+    @ViewBuilder
+    func recordingView(_ recording: Recording) -> some View {
+        HStack {
+            Image(systemName: "mic.fill")
+                .foregroundColor(.blue)
+            Text("Audio Message")
+                .font(.caption)
+                .foregroundColor(.secondary)
+        }
+        .padding(.horizontal, LLMMessageView.horizontalTextPadding)
+        .padding(.top, 8)
+    }
+    
+    func messageTimeView(needsCapsule: Bool = false) -> some View {
+        Group {
+            if needsCapsule {
+                Text(DateFormatter.timeFormatter.string(from: message.createdAt))
+                    .font(.caption)
+                    .foregroundColor(.white)
+                    .opacity(0.8)
+                    .padding(.top, 4)
+                    .padding(.bottom, 4)
+                    .padding(.horizontal, 8)
+                    .background {
+                        Capsule()
+                            .foregroundColor(.black.opacity(0.4))
+                    }
+            } else {
+                Text(DateFormatter.timeFormatter.string(from: message.createdAt))
+                    .font(.caption)
+                    .foregroundColor(message.user.isCurrentUser ? theme.colors.messageMyTimeText : theme.colors.messageFriendTimeText)
+            }
+        }
+        .sizeGetter($timeSize)
+    }
+}
+
+// MARK: - View Extensions
+extension View {
+    @ViewBuilder
+    func sizeGetter(_ size: Binding<CGSize>) -> some View {
+        self.background(
+            GeometryReader { geometry in
+                Color.clear
+                    .preference(key: SizePreferenceKey.self, value: geometry.size)
+            }
+        )
+        .onPreferenceChange(SizePreferenceKey.self) { newSize in
+            size.wrappedValue = newSize
+        }
+    }
+    
+    @ViewBuilder
+    func applyIf<Content: View>(_ condition: Bool, transform: (Self) -> Content) -> some View {
+        if condition {
+            transform(self)
+        } else {
+            self
+        }
+    }
+}
+
+// MARK: - Preference Key
+struct SizePreferenceKey: PreferenceKey {
+    static var defaultValue: CGSize = .zero
+    static func reduce(value: inout CGSize, nextValue: () -> CGSize) {}
+}
+
+// MARK: - Date Formatter Extension
+extension DateFormatter {
+    static let timeFormatter: DateFormatter = {
+        let formatter = DateFormatter()
+        formatter.timeStyle = .short
+        return formatter
+    }()
+}
+
+extension View {
+    @ViewBuilder
+    func bubbleBackground(_ message: Message, theme: ChatTheme, isReply: Bool = false) -> some View {
+        let radius: CGFloat = !message.attachments.isEmpty ? 12 : 20
+        let additionalMediaInset: CGFloat = message.attachments.count > 1 ? 2 : 0
+        self
+            .frame(width: message.attachments.isEmpty ? nil : LLMMessageView.widthWithMedia + additionalMediaInset)
+            .foregroundColor(message.user.isCurrentUser ? theme.colors.messageMyText : theme.colors.messageFriendText)
+            .background {
+                if isReply || !message.text.isEmpty || message.recording != nil {
+                    RoundedRectangle(cornerRadius: radius)
+                        .foregroundColor(message.user.isCurrentUser ? theme.colors.messageMyBG : theme.colors.messageFriendBG)
+                        .opacity(isReply ? 0.5 : 1)
+                }
+            }
+            .cornerRadius(radius)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/ModelSettingsView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/ModelSettingsView.swift
index 2562316d..7433b53e 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/ModelSettingsView.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Chat/Views/ModelSettingsView.swift
@@ -37,7 +37,7 @@ struct ModelSettingsView: View {
     var body: some View {
         NavigationView {
             Form {
-                Section(header: Text("Model Configuration")) {
+                Section {
                     Toggle("Use mmap", isOn: $viewModel.useMmap)
                         .onChange(of: viewModel.useMmap) { newValue in
                             viewModel.modelConfigManager.updateUseMmap(newValue)
@@ -47,11 +47,13 @@ struct ModelSettingsView: View {
                         viewModel.cleanModelTmpFolder()
                         showAlert = true
                     }
+                } header: {
+                    Text("Model Configuration")
                 }
                 
                 // Diffusion Settings
                 if viewModel.isDiffusionModel {
-                    Section(header: Text("Diffusion Settings")) {
+                    Section {
                         Stepper(value: $iterations, in: 1...100) {
                             HStack {
                                 Text("Iterations")
@@ -84,9 +86,11 @@ struct ModelSettingsView: View {
                                     }
                             }
                         }
+                    } header: {
+                        Text("Diffusion Settings")
                     }
                 } else {
-                    Section(header: Text("Sampling Strategy")) {
+                    Section {
                         Picker("Sampler Type", selection: $selectedSampler) {
                             ForEach(SamplerType.allCases, id: \.self) { sampler in
                                 Text(sampler.displayName)
@@ -218,6 +222,8 @@ struct ModelSettingsView: View {
                         default:
                             EmptyView()
                         }
+                    } header: {
+                        Text("Sampling Strategy")
                     }
                 }
             }
@@ -272,5 +278,3 @@ struct ModelSettingsView: View {
         viewModel.modelConfigManager.updateMixedSamplers(orderedSelection)
     }
 }
-
-
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ChatHistoryItemView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ChatHistoryItemView.swift
deleted file mode 100644
index 2738a9f0..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ChatHistoryItemView.swift
+++ /dev/null
@@ -1,43 +0,0 @@
-//
-//  ChatHistoryItemView.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/16.
-//
-
-import SwiftUI
-
-struct ChatHistoryItemView: View {
-    let history: ChatHistory
-    
-    var body: some View {
-        HStack(spacing: 12) {
-            
-            ModelIconView(modelId: history.modelId)
-                .frame(width: 36, height: 36)
-                .clipShape(Circle())
-            
-            VStack(alignment: .leading, spacing: 4) {
-                
-                if let firstMessage = history.messages.last {
-                    Text(String(firstMessage.content.prefix(50)) + "...")
-                        .lineLimit(2)
-                        .font(.system(size: 14))
-                }
-                
-                HStack {
-                    VStack(alignment: .leading) {
-                        Text(history.modelName)
-                            .font(.system(size: 12))
-                            .foregroundColor(.gray)
-                        
-                        Text(history.updatedAt.formatAgo())
-                            .font(.system(size: 10))
-                            .foregroundColor(.gray)
-                    }
-                }
-            }
-        }
-        .padding(.vertical, 8)
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ChatHistoryFileManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Services/ChatHistoryFileManager.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ChatHistoryFileManager.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Services/ChatHistoryFileManager.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/SideMenuView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/SideMenuView.swift
deleted file mode 100644
index 91d3754b..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/SideMenuView.swift
+++ /dev/null
@@ -1,88 +0,0 @@
-//
-//  SideMenuView.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/16.
-//
-
-import SwiftUI
-
-struct SideMenuView: View {
-    @Binding var isOpen: Bool
-    @Binding var selectedHistory: ChatHistory?
-    
-    @Binding var histories: [ChatHistory]
-    
-    @State private var showingAlert = false
-    @State private var historyToDelete: ChatHistory?
-    
-    @State private var dragOffset: CGFloat = 0
-    
-    var body: some View {
-        GeometryReader { geometry in
-            VStack {
-                HStack {
-                    Text(NSLocalizedString("ChatHistroyTitle", comment: "Chat Histroy Title"))
-                        .fontWeight(.medium)
-                        .font(.system(size: 20))
-                    Spacer()
-                }
-                .padding(.top, 80)
-                .padding(.leading)
-                
-                List {
-                    ForEach(histories.sorted(by: { $0.updatedAt > $1.updatedAt })) { history in
-                        
-                            ChatHistoryItemView(history: history)
-                                .onTapGesture {
-                                    selectedHistory = history
-                                    isOpen = false
-                                }
-                            .onLongPressGesture {
-                                historyToDelete = history
-                                showingAlert = true
-                            }
-                            .listRowBackground(Color.sidemenuBg)
-                    }
-                }
-                .background(Color.sidemenuBg)
-                .listStyle(PlainListStyle())
-            }
-            .background(Color.sidemenuBg)
-            .frame(width: geometry.size.width * 0.8)
-            .offset(x: isOpen ? 0 : -geometry.size.width * 0.8)
-            .animation(.easeOut, value: isOpen)
-            .gesture(
-                DragGesture()
-                    .onChanged { value in
-                        if value.translation.width < 0 {
-                            dragOffset = value.translation.width
-                        }
-                    }
-                    .onEnded { value in
-                        if value.translation.width < -geometry.size.width * 0.25 {
-                            isOpen = false
-                        }
-                        dragOffset = 0
-                    }
-            )
-            .alert("Delete History", isPresented: $showingAlert) {
-                Button("Cancel", role: .cancel) {}
-                Button("Delete", role: .destructive) {
-                    if let history = historyToDelete {
-                        deleteHistory(history)
-                    }
-                }
-            } message: {
-                Text("Are you sure you want to delete this history?")
-            }
-        }
-    }
-    
-    private func deleteHistory(_ history: ChatHistory) {
-        // 更新存储
-        ChatHistoryManager.shared.deleteHistory(history)
-        // 从历史记录中删除
-        histories.removeAll { $0.id == history.id }
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/ChatHistoryItemView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/ChatHistoryItemView.swift
new file mode 100644
index 00000000..1ec42cc3
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/ChatHistoryItemView.swift
@@ -0,0 +1,45 @@
+//
+//  ChatHistoryItemView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/16.
+//
+
+import SwiftUI
+
+struct ChatHistoryItemView: View {
+    let history: ChatHistory
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 8) {
+            
+            if let firstMessage = history.messages.last {
+                Text(String(firstMessage.content.prefix(200)))
+                    .lineLimit(1)
+                    .font(.system(size: 15, weight: .medium))
+                    .foregroundColor(.primary)
+            }
+            
+            HStack(alignment: .bottom) {
+                
+                ModelIconView(modelId: history.modelId)
+                    .frame(width: 20, height: 20)
+                    .clipShape(Circle())
+                    .padding(.trailing, 0)
+                
+                Text(history.modelName)
+                    .lineLimit(1)
+                    .font(.system(size: 12, weight: .semibold))
+                    .foregroundColor(.black.opacity(0.5))
+                
+                Spacer()
+                
+                Text(history.updatedAt.formatAgo())
+                    .font(.system(size: 12, weight: .regular))
+                    .foregroundColor(.black.opacity(0.5))
+            }
+        }
+        .padding(.vertical, 10)
+        .padding(.horizontal, 0)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ModelIconView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/ModelIconView.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/ModelIconView.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/ModelIconView.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/SideMenuView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/SideMenuView.swift
new file mode 100644
index 00000000..aace874a
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/ChatHistory/Views/SideMenuView.swift
@@ -0,0 +1,136 @@
+//
+//  SideMenuView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/16.
+//
+
+import SwiftUI
+
+struct SideMenuView: View {
+    @Binding var isOpen: Bool
+    @Binding var selectedHistory: ChatHistory?
+    @Binding var histories: [ChatHistory]
+    @Binding var navigateToMainSettings: Bool
+    
+    @State private var showingAlert = false
+    @State private var historyToDelete: ChatHistory?
+    @State private var navigateToSettings = false
+    
+    @State private var dragOffset: CGFloat = 0
+    
+    var body: some View {
+        ZStack {
+            GeometryReader { geometry in
+                VStack {
+                    HStack {
+                        Text(NSLocalizedString("ChatHistroyTitle", comment: "Chat Histroy Title"))
+                            .fontWeight(.medium)
+                            .font(.system(size: 20))
+                        Spacer()
+                    }
+                    .padding(.top, 80)
+                    .padding(.leading, 12)
+                    
+                    List {
+                        ForEach(histories.sorted(by: { $0.updatedAt > $1.updatedAt })) { history in
+                            
+                                ChatHistoryItemView(history: history)
+                                    .onTapGesture {
+                                        selectedHistory = history
+                                        isOpen = false
+                                    }
+                                .onLongPressGesture {
+                                    historyToDelete = history
+                                    showingAlert = true
+                                }
+                                .listRowBackground(Color.sidemenuBg)
+                                .listRowSeparator(.hidden)
+                        }
+                    }
+                    .background(Color.sidemenuBg)
+                    .listStyle(PlainListStyle())
+                    
+                    Spacer()
+                    
+                    HStack {
+                        Button(action: {
+                            isOpen = false
+                            DispatchQueue.main.asyncAfter(deadline: .now() + 0.3) {
+                                navigateToMainSettings = true
+                            }
+                        }) {
+                            HStack {
+                                Image(systemName: "gear")
+                                    .resizable()
+                                    .aspectRatio(contentMode: .fit)
+                                    .frame(width: 20, height: 20)
+                            }
+                            .foregroundColor(.primary)
+                            .padding(.leading)
+                        }
+                        Spacer()
+                    }
+                    .padding(EdgeInsets(top: 10, leading: 12, bottom: 30, trailing: 0))
+                }
+                .background(Color.sidemenuBg)
+                .frame(width: geometry.size.width * 0.8)
+                .offset(x: isOpen ? 0 : -geometry.size.width * 0.8)
+                .animation(.easeOut, value: isOpen)
+                .gesture(
+                    DragGesture()
+                        .onChanged { value in
+                            if value.translation.width < 0 {
+                                dragOffset = value.translation.width
+                            }
+                        }
+                        .onEnded { value in
+                            if value.translation.width < -geometry.size.width * 0.25 {
+                                isOpen = false
+                            }
+                            dragOffset = 0
+                        }
+                )
+                .alert("Delete History", isPresented: $showingAlert) {
+                    Button("Cancel", role: .cancel) {}
+                    Button(LocalizedStringKey("button.delete"), role: .destructive) {
+                        if let history = historyToDelete {
+                            deleteHistory(history)
+                        }
+                    }
+                } message: {
+                    Text("Are you sure you want to delete this history?")
+                }
+            }
+        }
+    }
+    
+    private func deleteHistory(_ history: ChatHistory) {
+        ChatHistoryManager.shared.deleteHistory(history)
+        histories.removeAll { $0.id == history.id }
+    }
+}
+
+struct SettingsFullScreenView: View {
+    @Binding var isPresented: Bool
+    
+    var body: some View {
+        NavigationView {
+            SettingsView()
+                .navigationTitle("Settings")
+                .navigationBarTitleDisplayMode(.inline)
+                .toolbar {
+                    ToolbarItem(placement: .navigationBarLeading) {
+                        Button(action: {
+                            isPresented = false
+                        }) {
+                            Image(systemName: "xmark")
+                                .foregroundColor(.primary)
+                                .fontWeight(.medium)
+                        }
+                    }
+                }
+        }
+        .navigationViewStyle(StackNavigationViewStyle())
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/DiffusionSession.h b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/DiffusionSession.h
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/DiffusionSession.h
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/DiffusionSession.h
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/DiffusionSession.mm b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/DiffusionSession.mm
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/DiffusionSession.mm
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/DiffusionSession.mm
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.h b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.h
new file mode 100644
index 00000000..1b874f7e
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.h
@@ -0,0 +1,209 @@
+//
+//  LLMInferenceEngineWrapper.h
+//  mnn-llm
+//
+//  Created by wangzhaode on 2023/12/14.
+//
+
+#ifndef LLMInferenceEngineWrapper_h
+#define LLMInferenceEngineWrapper_h
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+typedef void (^CompletionHandler)(BOOL success);
+typedef void (^OutputHandler)(NSString * _Nonnull output);
+
+// MARK: - Benchmark Related Types
+
+/**
+ * Progress type enumeration for structured benchmark reporting
+ */
+typedef NS_ENUM(NSInteger, BenchmarkProgressType) {
+    BenchmarkProgressTypeUnknown = 0,
+    BenchmarkProgressTypeInitializing = 1,
+    BenchmarkProgressTypeWarmingUp = 2,
+    BenchmarkProgressTypeRunningTest = 3,
+    BenchmarkProgressTypeProcessingResults = 4,
+    BenchmarkProgressTypeCompleted = 5,
+    BenchmarkProgressTypeStopping = 6
+};
+
+/**
+ * Structured progress information for benchmark
+ */
+@interface BenchmarkProgressInfo : NSObject
+
+@property (nonatomic, assign) NSInteger progress;              // 0-100
+@property (nonatomic, strong) NSString *statusMessage;        // Status description
+@property (nonatomic, assign) BenchmarkProgressType progressType;
+@property (nonatomic, assign) NSInteger currentIteration;
+@property (nonatomic, assign) NSInteger totalIterations;
+@property (nonatomic, assign) NSInteger nPrompt;
+@property (nonatomic, assign) NSInteger nGenerate;
+@property (nonatomic, assign) float runTimeSeconds;
+@property (nonatomic, assign) float prefillTimeSeconds;
+@property (nonatomic, assign) float decodeTimeSeconds;
+@property (nonatomic, assign) float prefillSpeed;
+@property (nonatomic, assign) float decodeSpeed;
+
+@end
+
+/**
+ * Benchmark result structure
+ */
+@interface BenchmarkResult : NSObject
+
+@property (nonatomic, assign) BOOL success;
+@property (nonatomic, strong, nullable) NSString *errorMessage;
+@property (nonatomic, strong) NSArray<NSNumber *> *prefillTimesUs;
+@property (nonatomic, strong) NSArray<NSNumber *> *decodeTimesUs;
+@property (nonatomic, strong) NSArray<NSNumber *> *sampleTimesUs;
+@property (nonatomic, assign) NSInteger promptTokens;
+@property (nonatomic, assign) NSInteger generateTokens;
+@property (nonatomic, assign) NSInteger repeatCount;
+@property (nonatomic, assign) BOOL kvCacheEnabled;
+
+@end
+
+// Benchmark callback blocks
+typedef void (^BenchmarkProgressCallback)(BenchmarkProgressInfo *progressInfo);
+typedef void (^BenchmarkErrorCallback)(NSString *error);
+typedef void (^BenchmarkIterationCompleteCallback)(NSString *detailedStats);
+typedef void (^BenchmarkCompleteCallback)(BenchmarkResult *result);
+
+/**
+ * LLMInferenceEngineWrapper - A high-level Objective-C wrapper for MNN LLM inference engine
+ * 
+ * This class provides a convenient interface for integrating MNN's Large Language Model
+ * inference capabilities into iOS applications with enhanced error handling, performance
+ * optimization, and thread safety.
+ */
+@interface LLMInferenceEngineWrapper : NSObject
+
+/**
+ * Initialize the LLM inference engine with a model path
+ * 
+ * @param modelPath The file system path to the model directory
+ * @param completion Completion handler called with success/failure status
+ * @return Initialized instance of LLMInferenceEngineWrapper
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath completion:(CompletionHandler)completion;
+
+/**
+ * Process user input and generate streaming LLM response
+ * 
+ * @param input The user's input text to process
+ * @param output Callback block that receives streaming output chunks
+ */
+- (void)processInput:(NSString *)input withOutput:(OutputHandler)output;
+
+/**
+ * Process user input and generate streaming LLM response with optional performance output
+ * 
+ * @param input The user's input text to process
+ * @param output Callback block that receives streaming output chunks
+ * @param showPerformance Whether to output performance statistics after response completion
+ */
+- (void)processInput:(NSString *)input withOutput:(OutputHandler)output showPerformance:(BOOL)showPerformance;
+
+/**
+ * Add chat prompts from an array of dictionaries to the conversation history
+ * 
+ * @param array NSArray containing NSDictionary objects with chat messages
+ */
+- (void)addPromptsFromArray:(NSArray<NSDictionary *> *)array;
+
+/**
+ * Set the configuration for the LLM engine using a JSON string
+ * 
+ * @param jsonStr JSON string containing configuration parameters
+ */
+- (void)setConfigWithJSONString:(NSString *)jsonStr;
+
+/**
+ * Check if model is ready for inference
+ * 
+ * @return YES if model is loaded and ready
+ */
+- (BOOL)isModelReady;
+
+/**
+ * Get current processing status
+ * 
+ * @return YES if currently processing an inference request
+ */
+- (BOOL)isProcessing;
+
+/**
+ * Cancel ongoing inference (if supported)
+ */
+- (void)cancelInference;
+
+/**
+ * Get chat history count
+ * 
+ * @return Number of messages in chat history
+ */
+- (NSUInteger)getChatHistoryCount;
+
+/**
+ * Clear chat history
+ */
+- (void)clearChatHistory;
+
+// MARK: - Benchmark Methods
+
+/**
+ * Run official benchmark following llm_bench.cpp approach
+ * 
+ * @param backend Backend type (0 for CPU)
+ * @param threads Number of threads
+ * @param useMmap Whether to use memory mapping
+ * @param power Power setting
+ * @param precision Precision setting (2 for low precision)
+ * @param memory Memory setting (2 for low memory)
+ * @param dynamicOption Dynamic optimization option
+ * @param nPrompt Number of prompt tokens
+ * @param nGenerate Number of tokens to generate
+ * @param nRepeat Number of repetitions
+ * @param kvCache Whether to use KV cache
+ * @param progressCallback Progress update callback
+ * @param errorCallback Error callback
+ * @param iterationCompleteCallback Iteration completion callback
+ * @param completeCallback Final completion callback
+ */
+- (void)runOfficialBenchmarkWithBackend:(NSInteger)backend
+                                threads:(NSInteger)threads
+                                useMmap:(BOOL)useMmap
+                                  power:(NSInteger)power
+                              precision:(NSInteger)precision
+                                 memory:(NSInteger)memory
+                          dynamicOption:(NSInteger)dynamicOption
+                                nPrompt:(NSInteger)nPrompt
+                              nGenerate:(NSInteger)nGenerate
+                                nRepeat:(NSInteger)nRepeat
+                                kvCache:(BOOL)kvCache
+                       progressCallback:(BenchmarkProgressCallback _Nullable)progressCallback
+                          errorCallback:(BenchmarkErrorCallback _Nullable)errorCallback
+               iterationCompleteCallback:(BenchmarkIterationCompleteCallback _Nullable)iterationCompleteCallback
+                       completeCallback:(BenchmarkCompleteCallback _Nullable)completeCallback;
+
+/**
+ * Stop running benchmark
+ */
+- (void)stopBenchmark;
+
+/**
+ * Check if benchmark is currently running
+ * 
+ * @return YES if benchmark is running
+ */
+- (BOOL)isBenchmarkRunning;
+
+@end
+
+NS_ASSUME_NONNULL_END
+
+#endif /* LLMInferenceEngineWrapper_h */
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.mm b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.mm
new file mode 100644
index 00000000..19db6ec8
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.mm
@@ -0,0 +1,1392 @@
+//
+//  LLMInferenceEngineWrapper.m
+//  mnn-llm
+//  Modified by 游薪渝(揽清) on 2025/7/7.
+//  Created by wangzhaode on 2023/12/14.
+//
+
+/**
+ * LLMInferenceEngineWrapper - A high-level Objective-C wrapper for MNN LLM inference engine
+ *
+ * This class provides a convenient interface for integrating MNN's Large Language Model
+ * inference capabilities into iOS applications. It handles model loading, configuration,
+ * text processing, and streaming output with proper memory management and error handling.
+ *
+ * Key Features:
+ * - Asynchronous model loading with completion callbacks
+ * - Streaming text generation with real-time output
+ * - Configurable inference parameters through JSON
+ * - Memory-mapped model loading for efficiency
+ * - Chat history management and conversation context
+ * - Benchmarking capabilities for performance testing
+ *
+ * Usage Examples:
+ *
+ * 1. Basic Model Loading and Inference:
+ * ```objc
+ * LLMInferenceEngineWrapper *engine = [[LLMInferenceEngineWrapper alloc]
+ *     initWithModelPath:@"/path/to/model"
+ *     completion:^(BOOL success) {
+ *         if (success) {
+ *             NSLog(@"Model loaded successfully");
+ *         }
+ *     }];
+ *
+ * [engine processInput:@"Hello, how are you?"
+ *           withOutput:^(NSString *output) {
+ *               NSLog(@"AI Response: %@", output);
+ *           }];
+ * ```
+ *
+ * 2. Configuration with Custom Parameters:
+ * ```objc
+ * NSString *config = @"{\"temperature\":0.7,\"max_tokens\":100}";
+ * [engine setConfigWithJSONString:config];
+ * ```
+ *
+ * 3. Chat History Management:
+ * ```objc
+ * NSArray *chatHistory = @[
+ *     @{@"user": @"What is AI?"},
+ *     @{@"assistant": @"AI stands for Artificial Intelligence..."}
+ * ];
+ * [engine addPromptsFromArray:chatHistory];
+ * ```
+ *
+ * Architecture:
+ * - Built on top of MNN's C++ LLM inference engine
+ * - Uses smart pointers for automatic memory management
+ * - Implements custom stream buffer for real-time text output
+ * - Supports both bundled and external model loading
+ */
+
+#include <iostream>
+#include <string>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <filesystem>
+#include <functional>
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include <chrono>
+#include <MNN/llm/llm.hpp>
+#include <vector>
+#include <utility>
+
+#import <Foundation/Foundation.h>
+#import "LLMInferenceEngineWrapper.h"
+
+using namespace MNN::Transformer;
+
+using ChatMessage = std::pair<std::string, std::string>;
+
+// MARK: - Benchmark Progress Info Implementation
+
+@implementation BenchmarkProgressInfo
+
+- (instancetype)init {
+    self = [super init];
+    if (self) {
+        _progress = 0;
+        _statusMessage = @"";
+        _progressType = BenchmarkProgressTypeUnknown;
+        _currentIteration = 0;
+        _totalIterations = 0;
+        _nPrompt = 0;
+        _nGenerate = 0;
+        _runTimeSeconds = 0.0f;
+        _prefillTimeSeconds = 0.0f;
+        _decodeTimeSeconds = 0.0f;
+        _prefillSpeed = 0.0f;
+        _decodeSpeed = 0.0f;
+    }
+    return self;
+}
+
+@end
+
+// MARK: - Benchmark Result Implementation
+
+@implementation BenchmarkResult
+
+- (instancetype)init {
+    self = [super init];
+    if (self) {
+        _success = NO;
+        _errorMessage = nil;
+        _prefillTimesUs = @[];
+        _decodeTimesUs = @[];
+        _sampleTimesUs = @[];
+        _promptTokens = 0;
+        _generateTokens = 0;
+        _repeatCount = 0;
+        _kvCacheEnabled = NO;
+    }
+    return self;
+}
+
+@end
+
+
+/**
+ * C++ Benchmark result structure following Android implementation
+ */
+struct BenchmarkResultCpp {
+    bool success;
+    std::string error_message;
+    std::vector<int64_t> prefill_times_us;
+    std::vector<int64_t> decode_times_us;
+    std::vector<int64_t> sample_times_us;
+    int prompt_tokens;
+    int generate_tokens;
+    int repeat_count;
+    bool kv_cache_enabled;
+};
+
+/**
+ * C++ Benchmark progress info structure following Android implementation
+ */
+struct BenchmarkProgressInfoCpp {
+    int progress;
+    std::string statusMessage;
+    int progressType;
+    int currentIteration;
+    int totalIterations;
+    int nPrompt;
+    int nGenerate;
+    float runTimeSeconds;
+    float prefillTimeSeconds;
+    float decodeTimeSeconds;
+    float prefillSpeed;
+    float decodeSpeed;
+    
+    BenchmarkProgressInfoCpp() : progress(0), statusMessage(""), progressType(0),
+                                currentIteration(0), totalIterations(0), nPrompt(0), nGenerate(0),
+                                runTimeSeconds(0.0f), prefillTimeSeconds(0.0f), decodeTimeSeconds(0.0f),
+                                prefillSpeed(0.0f), decodeSpeed(0.0f) {}
+};
+
+// MARK: - C++ Benchmark Implementation
+
+/**
+ * C++ Benchmark callback structure following Android implementation
+ */
+struct BenchmarkCallback {
+    std::function<void(const BenchmarkProgressInfoCpp& progressInfo)> onProgress;
+    std::function<void(const std::string& error)> onError;
+    std::function<void(const std::string& detailed_stats)> onIterationComplete;
+};
+
+
+/**
+ * Enhanced LlmStreamBuffer with improved performance and error handling
+ */
+class OptimizedLlmStreamBuffer : public std::streambuf {
+public:
+    using CallBack = std::function<void(const char* str, size_t len)>;
+    
+    OptimizedLlmStreamBuffer(CallBack callback) : callback_(callback) {
+        buffer_.reserve(1024); // Pre-allocate buffer for better performance
+    }
+    
+    ~OptimizedLlmStreamBuffer() {
+        flushBuffer();
+    }
+
+protected:
+    virtual std::streamsize xsputn(const char* s, std::streamsize n) override {
+        if (!callback_ || n <= 0) {
+            return n;
+        }
+        
+        try {
+            buffer_.append(s, n);
+            
+            const size_t BUFFER_THRESHOLD = 64;
+            bool shouldFlush = buffer_.size() >= BUFFER_THRESHOLD;
+            
+            if (!shouldFlush && n > 0) {
+                shouldFlush = checkForFlushTriggers(s, n);
+            }
+            
+            if (shouldFlush) {
+                flushBuffer();
+            }
+            
+            return n;
+        }
+        catch (const std::exception& e) {
+            NSLog(@"Error in stream buffer: %s", e.what());
+            return -1;
+        }
+    }
+
+private:
+    void flushBuffer() {
+        if (callback_ && !buffer_.empty()) {
+            callback_(buffer_.c_str(), buffer_.size());
+            buffer_.clear();
+        }
+    }
+    
+    bool checkForFlushTriggers(const char* s, std::streamsize n) {
+        // Check ASCII punctuation
+        char lastChar = s[n-1];
+        if (lastChar == '\n' ||
+            lastChar == '\r' ||
+            lastChar == '\t' ||
+            lastChar == '.' ||
+            lastChar == ',' ||
+            lastChar == ';' ||
+            lastChar == ':' ||
+            lastChar == '!' ||
+            lastChar == '?') {
+            return true;
+        }
+        
+        // Check Unicode punctuation
+        return checkUnicodePunctuation();
+    }
+    
+    bool checkUnicodePunctuation() {
+        if (buffer_.size() >= 3) {
+            const char* bufferEnd = buffer_.c_str() + buffer_.size() - 3;
+            
+            // Chinese punctuation marks (3-byte UTF-8)
+            static const std::vector<std::string> chinesePunctuation = {
+                "\xE3\x80\x82",     // 。
+                "\xEF\xBC\x8C",     // ，
+                "\xEF\xBC\x9B",     // ；
+                "\xEF\xBC\x9A",     // ：
+                "\xEF\xBC\x81",     // ！
+                "\xEF\xBC\x9F",     // ？
+                "\xE2\x80\xA6",     // …
+            };
+            
+            for (const auto& punct : chinesePunctuation) {
+                if (memcmp(bufferEnd, punct.c_str(), 3) == 0) {
+                    return true;
+                }
+            }
+        }
+        
+        // Check 2-byte punctuation
+        if (buffer_.size() >= 2) {
+            const char* bufferEnd = buffer_.c_str() + buffer_.size() - 2;
+            if (memcmp(bufferEnd, "\xE2\x80\x93", 2) == 0 ||  // –
+                memcmp(bufferEnd, "\xE2\x80\x94", 2) == 0) {  // —
+                return true;
+            }
+        }
+        
+        return false;
+    }
+    
+    CallBack callback_ = nullptr;
+    std::string buffer_; // Buffer for accumulating output
+};
+
+@implementation LLMInferenceEngineWrapper {
+    std::shared_ptr<Llm> _llm;
+    std::vector<ChatMessage> _history;
+    std::mutex _historyMutex;
+    std::atomic<bool> _isProcessing;
+    std::atomic<bool> _isBenchmarkRunning;
+    std::atomic<bool> _shouldStopBenchmark;
+    NSString *_modelPath;
+}
+
+/**
+ * Initializes the LLM inference engine with a model path
+ *
+ * This method asynchronously loads the LLM model from the specified path
+ * and calls the completion handler on the main queue when finished.
+ *
+ * @param modelPath The file system path to the model directory
+ * @param completion Completion handler called with success/failure status
+ * @return Initialized instance of LLMInferenceEngineWrapper
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath completion:(CompletionHandler)completion {
+    self = [super init];
+    if (self) {
+        _modelPath = [modelPath copy];
+        _isProcessing = false;
+        _isBenchmarkRunning = false;
+        _shouldStopBenchmark = false;
+        
+        dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
+            BOOL success = [self loadModelFromPath:modelPath];
+            
+            dispatch_async(dispatch_get_main_queue(), ^{
+                if (completion) {
+                    completion(success);
+                }
+            });
+        });
+    }
+    return self;
+}
+
+/**
+ * Utility function to remove a directory and all its contents
+ *
+ * @param path The directory path to remove
+ * @return true if successful, false otherwise
+ */
+bool remove_directory_safely(const std::string& path) {
+    try {
+        if (std::filesystem::exists(path)) {
+            std::filesystem::remove_all(path);
+        }
+        return true;
+    } catch (const std::filesystem::filesystem_error& e) {
+        NSLog(@"Error removing directory %s: %s", path.c_str(), e.what());
+        return false;
+    }
+}
+
+/**
+ * Validates model path and configuration
+ *
+ * @param modelPath The path to validate
+ * @return YES if path is valid and contains required files
+ */
+- (BOOL)validateModelPath:(NSString *)modelPath {
+    if (!modelPath || modelPath.length == 0) {
+        NSLog(@"Error: Model path is nil or empty");
+        return NO;
+    }
+    
+    NSFileManager *fileManager = [NSFileManager defaultManager];
+    BOOL isDirectory;
+    
+    if (![fileManager fileExistsAtPath:modelPath isDirectory:&isDirectory] || !isDirectory) {
+        NSLog(@"Error: Model path does not exist or is not a directory: %@", modelPath);
+        return NO;
+    }
+    
+    NSString *configPath = [modelPath stringByAppendingPathComponent:@"config.json"];
+    if (![fileManager fileExistsAtPath:configPath]) {
+        NSLog(@"Error: config.json not found at path: %@", configPath);
+        return NO;
+    }
+    
+    return YES;
+}
+
+/**
+ * Loads the LLM model from the application bundle
+ *
+ * This method is used for testing with models bundled within the app.
+ * It sets up the model with default configuration and temporary directory.
+ *
+ * @return YES if model loading succeeds, NO otherwise
+ */
+- (BOOL)loadModel {
+    @try {
+        if (_llm) {
+            NSLog(@"Warning: Model already loaded");
+            return YES;
+        }
+        
+        NSString *bundleDirectory = [[NSBundle mainBundle] bundlePath];
+        std::string model_dir = [bundleDirectory UTF8String];
+        std::string config_path = model_dir + "/config.json";
+        
+        _llm.reset(Llm::createLLM(config_path));
+        if (!_llm) {
+            NSLog(@"Error: Failed to create LLM from bundle");
+            return NO;
+        }
+        
+        NSString *tempDirectory = NSTemporaryDirectory();
+        std::string configStr = "{\"tmp_path\":\"" + std::string([tempDirectory UTF8String]) + "\", \"use_mmap\":true}";
+        _llm->set_config(configStr);
+        _llm->load();
+        
+        NSLog(@"Model loaded successfully from bundle");
+        return YES;
+    }
+    @catch (NSException *exception) {
+        NSLog(@"Exception during model loading: %@", exception.reason);
+        return NO;
+    }
+}
+
+/**
+ * Loads the LLM model from a specified file system path
+ *
+ * This method handles the complete model loading process including:
+ * - Path validation and error checking
+ * - Reading model configuration from config.json
+ * - Setting up temporary directories for model operations
+ * - Configuring memory mapping settings
+ * - Loading the model into memory with proper error handling
+ *
+ * @param modelPath The file system path to the model directory
+ * @return YES if model loading succeeds, NO otherwise
+ */
+- (BOOL)loadModelFromPath:(NSString *)modelPath {
+    @try {
+        if (_llm) {
+            NSLog(@"Warning: Model already loaded");
+            return YES;
+        }
+        
+        if (![self validateModelPath:modelPath]) {
+            return NO;
+        }
+        
+        std::string config_path = std::string([modelPath UTF8String]) + "/config.json";
+        
+        // Read and parse configuration with error handling
+        NSError *error = nil;
+        NSData *configData = [NSData dataWithContentsOfFile:[NSString stringWithUTF8String:config_path.c_str()]];
+        if (!configData) {
+            NSLog(@"Error: Failed to read config file at %s", config_path.c_str());
+            return NO;
+        }
+        
+        NSDictionary *configDict = [NSJSONSerialization JSONObjectWithData:configData options:0 error:&error];
+        if (error) {
+            NSLog(@"Error parsing config JSON: %@", error.localizedDescription);
+            return NO;
+        }
+        
+        // Get memory mapping setting with default fallback
+        BOOL useMmap = configDict[@"use_mmap"] == nil ? YES : [configDict[@"use_mmap"] boolValue];
+        
+        // Create LLM instance with error checking
+        _llm.reset(Llm::createLLM(config_path));
+        if (!_llm) {
+            NSLog(@"Error: Failed to create LLM instance from config: %s", config_path.c_str());
+            return NO;
+        }
+        
+        // Setup temporary directory with improved error handling
+        std::string model_path_str([modelPath UTF8String]);
+        std::string temp_directory_path = model_path_str + "/temp";
+        
+        // Clean up existing temp directory
+        if (!remove_directory_safely(temp_directory_path)) {
+            NSLog(@"Warning: Failed to remove existing temp directory, continuing...");
+        }
+        
+        // Create new temp directory
+        if (mkdir(temp_directory_path.c_str(), 0755) != 0 && errno != EEXIST) {
+            NSLog(@"Error: Failed to create temp directory: %s, errno: %d", temp_directory_path.c_str(), errno);
+            return NO;
+        }
+        
+        // Configure LLM with proper error handling
+        bool useMmapCpp = (useMmap == YES);
+        std::string configStr = "{\"tmp_path\":\"" + temp_directory_path + "\", \"use_mmap\":" + (useMmapCpp ? "true" : "false") + "}";
+        
+        _llm->set_config(configStr);
+        _llm->load();
+        
+        NSLog(@"Model loaded successfully from path: %@", modelPath);
+        return YES;
+    }
+    @catch (NSException *exception) {
+        NSLog(@"Exception during model loading: %@", exception.reason);
+        _llm.reset();
+        return NO;
+    }
+}
+
+/**
+ * Sets the configuration for the LLM engine using a JSON string
+ *
+ * This method allows runtime configuration of various LLM parameters
+ * such as temperature, max tokens, sampling methods, etc.
+ *
+ * @param jsonStr JSON string containing configuration parameters
+ */
+- (void)setConfigWithJSONString:(NSString *)jsonStr {
+    if (!_llm) {
+        NSLog(@"Error: LLM not initialized, cannot set configuration");
+        return;
+    }
+    
+    if (!jsonStr || jsonStr.length == 0) {
+        NSLog(@"Error: JSON string is nil or empty");
+        return;
+    }
+    
+    @try {
+        // Validate JSON format
+        NSError *error = nil;
+        NSData *jsonData = [jsonStr dataUsingEncoding:NSUTF8StringEncoding];
+        [NSJSONSerialization JSONObjectWithData:jsonData options:0 error:&error];
+        
+        if (error) {
+            NSLog(@"Error: Invalid JSON configuration: %@", error.localizedDescription);
+            return;
+        }
+        
+        const char *cString = [jsonStr UTF8String];
+        std::string stdString(cString);
+        _llm->set_config(stdString);
+        
+        NSLog(@"Configuration updated successfully");
+    }
+    @catch (NSException *exception) {
+        NSLog(@"Exception while setting configuration: %@", exception.reason);
+    }
+}
+
+/**
+ * Processes user input and generates streaming LLM response with enhanced error handling
+ *
+ * This method handles the main inference process by:
+ * - Validating input parameters and model state
+ * - Setting up streaming output callback with error handling
+ * - Adding user input to chat history thread-safely
+ * - Executing LLM inference with streaming output
+ * - Handling special commands like benchmarking
+ *
+ * @param input The user's input text to process
+ * @param output Callback block that receives streaming output chunks
+ */
+- (void)processInput:(NSString *)input withOutput:(OutputHandler)output {
+    [self processInput:input withOutput:output showPerformance:NO];
+}
+
+/**
+ * Processes user input and generates streaming LLM response with optional performance output
+ *
+ * @param input The user's input text to process
+ * @param output Callback block that receives streaming output chunks
+ * @param showPerformance Whether to output performance statistics after response completion
+ */
+- (void)processInput:(NSString *)input withOutput:(OutputHandler)output showPerformance:(BOOL)showPerformance {
+    if (!_llm) {
+        if (output) {
+            output(@"Error: Model not loaded. Please initialize the model first.");
+        }
+        return;
+    }
+    
+    if (!input || input.length == 0) {
+        if (output) {
+            output(@"Error: Input text is empty.");
+        }
+        return;
+    }
+    
+    if (_isProcessing.load()) {
+        if (output) {
+            output(@"Error: Another inference is already in progress.");
+        }
+        return;
+    }
+    
+    _isProcessing = true;
+    
+    // Use high priority queue for better responsiveness
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
+        @try {
+            auto inference_start_time = std::chrono::high_resolution_clock::now();
+            
+            OptimizedLlmStreamBuffer::CallBack callback = [output, self](const char* str, size_t len) {
+                if (output && str && len > 0) {
+                    @autoreleasepool {
+                        NSString *nsOutput = [[NSString alloc] initWithBytes:str
+                                                                        length:len
+                                                                      encoding:NSUTF8StringEncoding];
+                        if (nsOutput) {
+                            dispatch_async(dispatch_get_main_queue(), ^{
+                                output(nsOutput);
+                            });
+                        }
+                    }
+                }
+            };
+            
+            OptimizedLlmStreamBuffer streambuf(callback);
+            std::ostream os(&streambuf);
+            
+            // Thread-safe history management
+            {
+                std::lock_guard<std::mutex> lock(self->_historyMutex);
+                self->_history.emplace_back(ChatMessage("user", [input UTF8String]));
+            }
+            
+            std::string inputStr = [input UTF8String];
+            if (inputStr == "benchmark") {
+                [self performBenchmarkWithOutput:&os];
+            } else {
+                // Get initial context state for performance measurement
+                auto context = self->_llm->getContext();
+                int initial_prompt_len = context->prompt_len;
+                int initial_decode_len = context->gen_seq_len;
+                int64_t initial_prefill_time = context->prefill_us;
+                int64_t initial_decode_time = context->decode_us;
+                
+                // Execute inference
+                self->_llm->response(self->_history, &os, "<eop>", 999999);
+                
+                // Calculate performance metrics if requested
+                if (showPerformance) {
+                    auto inference_end_time = std::chrono::high_resolution_clock::now();
+                    auto total_inference_time = std::chrono::duration_cast<std::chrono::milliseconds>(inference_end_time - inference_start_time);
+                    
+                    // Get final context state
+                    int final_prompt_len = context->prompt_len;
+                    int final_decode_len = context->gen_seq_len;
+                    int64_t final_prefill_time = context->prefill_us;
+                    int64_t final_decode_time = context->decode_us;
+                    
+                    // Calculate differences for this inference
+                    int current_prompt_len = final_prompt_len - initial_prompt_len;
+                    int current_decode_len = final_decode_len - initial_decode_len;
+                    int64_t current_prefill_time = final_prefill_time - initial_prefill_time;
+                    int64_t current_decode_time = final_decode_time - initial_decode_time;
+                    
+                    float prefill_s = current_prefill_time / 1e6;
+                    float decode_s = current_decode_time / 1e6;
+                    
+                    // Format performance results
+                    std::ostringstream performance_output;
+                    performance_output << "\n\n> Performance Results:\n"
+                                      << "> Total inference time: " << total_inference_time.count() << " ms\n"
+                                      << "Prompt tokens: " << current_prompt_len << "\n"
+                                      << "Generated tokens: " << current_decode_len << "\n"
+                                      << "Prefill time: " << std::fixed << std::setprecision(2) << prefill_s << " s\n"
+                                      << "Decode time: " << std::fixed << std::setprecision(2) << decode_s << " s\n"
+                                      << "Prefill speed: " << std::fixed << std::setprecision(2)
+                                      << (prefill_s > 0 ? current_prompt_len / prefill_s : 0) << " tok/s\n"
+                                      << "Decode speed: " << std::fixed << std::setprecision(2)
+                                      << (decode_s > 0 ? current_decode_len / decode_s : 0) << " tok/s\n\n";
+                    
+                    // Output performance results
+                    std::string perf_str = performance_output.str();
+                    if (output) {
+                        dispatch_async(dispatch_get_main_queue(), ^{
+                            NSString *perfOutput = [NSString stringWithUTF8String:perf_str.c_str()];
+                            if (perfOutput) {
+                                output(perfOutput);
+                            }
+                        });
+                    }
+                }
+            }
+        }
+        @catch (NSException *exception) {
+            NSLog(@"Exception during inference: %@", exception.reason);
+            if (output) {
+                dispatch_async(dispatch_get_main_queue(), ^{
+                    output([NSString stringWithFormat:@"Error: Inference failed - %@", exception.reason]);
+                });
+            }
+        }
+        @finally {
+            self->_isProcessing = false;
+        }
+    });
+}
+
+/**
+ * Performs benchmark testing with enhanced error handling and reporting
+ *
+ * @param os Output stream for benchmark results
+ */
+- (void)performBenchmarkWithOutput:(std::ostream *)os {
+    @try {
+        std::string model_dir = [[[NSBundle mainBundle] bundlePath] UTF8String];
+        std::string prompt_file = model_dir + "/bench.txt";
+        
+        std::ifstream prompt_fs(prompt_file);
+        if (!prompt_fs.is_open()) {
+            *os << "Error: Could not open benchmark file at " << prompt_file << std::endl;
+            return;
+        }
+        
+        std::vector<std::string> prompts;
+        std::string prompt;
+        
+        while (std::getline(prompt_fs, prompt)) {
+            if (prompt.empty() || prompt.substr(0, 1) == "#") {
+                continue;
+            }
+            
+            // Process escape sequences
+            std::string::size_type pos = 0;
+            while ((pos = prompt.find("\\n", pos)) != std::string::npos) {
+                prompt.replace(pos, 2, "\n");
+                pos += 1;
+            }
+            prompts.push_back(prompt);
+        }
+        
+        if (prompts.empty()) {
+            *os << "Error: No valid prompts found in benchmark file" << std::endl;
+            return;
+        }
+        
+        // Performance metrics
+        int prompt_len = 0;
+        int decode_len = 0;
+        int64_t prefill_time = 0;
+        int64_t decode_time = 0;
+        
+        auto context = _llm->getContext();
+        auto start_time = std::chrono::high_resolution_clock::now();
+        
+        for (const auto& p : prompts) {
+            _llm->response(p, os, "\n");
+            prompt_len += context->prompt_len;
+            decode_len += context->gen_seq_len;
+            prefill_time += context->prefill_us;
+            decode_time += context->decode_us;
+        }
+        
+        auto end_time = std::chrono::high_resolution_clock::now();
+        auto total_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+        
+        float prefill_s = prefill_time / 1e6;
+        float decode_s = decode_time / 1e6;
+        
+        *os << "\n#################################\n"
+            << "Benchmark Results:\n"
+            << "Total prompts processed: " << prompts.size() << "\n"
+            << "Total time: " << total_time.count() << " ms\n"
+            << "Prompt tokens: " << prompt_len << "\n"
+            << "Decode tokens: " << decode_len << "\n"
+            << "Prefill time: " << std::fixed << std::setprecision(2) << prefill_s << " s\n"
+            << "Decode time: " << std::fixed << std::setprecision(2) << decode_s << " s\n"
+            << "Prefill speed: " << std::fixed << std::setprecision(2)
+            << (prefill_s > 0 ? prompt_len / prefill_s : 0) << " tok/s\n"
+            << "Decode speed: " << std::fixed << std::setprecision(2)
+            << (decode_s > 0 ? decode_len / decode_s : 0) << " tok/s\n"
+            << "#################################\n";
+        *os << "<eop>";
+    }
+    @catch (NSException *exception) {
+        *os << "Error during benchmark: " << [exception.reason UTF8String] << std::endl;
+    }
+}
+
+/**
+ * Enhanced deallocation with proper cleanup
+ */
+- (void)dealloc {
+    NSLog(@"LLMInferenceEngineWrapper deallocating...");
+    
+    // Stop any running benchmark
+    _shouldStopBenchmark = true;
+    
+    // Wait for any ongoing processing to complete
+    while (_isProcessing.load() || _isBenchmarkRunning.load()) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+    
+    {
+        std::lock_guard<std::mutex> lock(_historyMutex);
+        _history.clear();
+    }
+    
+    _llm.reset();
+    NSLog(@"LLMInferenceEngineWrapper deallocation complete");
+}
+
+
+/**
+ * Enhanced chat history initialization with thread safety
+ *
+ * @param chatHistory Vector of strings representing alternating user/assistant messages
+ */
+- (void)init:(const std::vector<std::string>&)chatHistory {
+    std::lock_guard<std::mutex> lock(_historyMutex);
+    _history.clear();
+    _history.emplace_back("system", "You are a helpful assistant.");
+    
+    for (size_t i = 0; i < chatHistory.size(); ++i) {
+        _history.emplace_back(i % 2 == 0 ? "user" : "assistant", chatHistory[i]);
+    }
+    NSLog(@"Chat history initialized with %zu messages", chatHistory.size());
+}
+
+/**
+ * Enhanced method for adding chat prompts from array with validation
+ *
+ * @param array NSArray containing NSDictionary objects with chat messages
+ */
+- (void)addPromptsFromArray:(NSArray<NSDictionary *> *)array {
+    if (!array || array.count == 0) {
+        NSLog(@"Warning: Empty or nil chat history array provided");
+        return;
+    }
+    
+    std::lock_guard<std::mutex> lock(_historyMutex);
+    _history.clear();
+    
+    for (NSDictionary *dict in array) {
+        if ([dict isKindOfClass:[NSDictionary class]]) {
+            [self addPromptsFromDictionary:dict];
+        } else {
+            NSLog(@"Warning: Invalid dictionary in chat history array");
+        }
+    }
+    NSLog(@"Added prompts from array with %lu items", (unsigned long)array.count);
+}
+
+/**
+ * Enhanced method for adding prompts from dictionary with validation
+ *
+ * @param dictionary NSDictionary containing role-message key-value pairs
+ */
+- (void)addPromptsFromDictionary:(NSDictionary *)dictionary {
+    if (!dictionary || dictionary.count == 0) {
+        return;
+    }
+    
+    for (NSString *key in dictionary) {
+        NSString *value = dictionary[key];
+        
+        if (![key isKindOfClass:[NSString class]] || ![value isKindOfClass:[NSString class]]) {
+            NSLog(@"Warning: Invalid key-value pair in chat dictionary");
+            continue;
+        }
+        
+        std::string keyString = [key UTF8String];
+        std::string valueString = [value UTF8String];
+        _history.emplace_back(ChatMessage(keyString, valueString));
+    }
+}
+
+/**
+ * Check if model is ready for inference
+ *
+ * @return YES if model is loaded and ready
+ */
+- (BOOL)isModelReady {
+    return _llm != nullptr && !_isProcessing.load();
+}
+
+/**
+ * Get current processing status
+ *
+ * @return YES if currently processing an inference request
+ */
+- (BOOL)isProcessing {
+    return _isProcessing.load();
+}
+
+/**
+ * Cancel ongoing inference (if supported)
+ */
+- (void)cancelInference {
+    if (_isProcessing.load()) {
+        NSLog(@"Inference cancellation requested");
+        // Note: Actual cancellation depends on MNN LLM implementation
+        // This is a placeholder for future enhancement
+    }
+}
+
+/**
+ * Get chat history count
+ *
+ * @return Number of messages in chat history
+ */
+- (NSUInteger)getChatHistoryCount {
+    std::lock_guard<std::mutex> lock(_historyMutex);
+    return _history.size();
+}
+
+/**
+ * Clear chat history
+ */
+- (void)clearChatHistory {
+    std::lock_guard<std::mutex> lock(_historyMutex);
+    _history.clear();
+    NSLog(@"Chat history cleared");
+}
+
+// MARK: - Benchmark Implementation Following Android llm_session.cpp
+
+/**
+ * Initialize benchmark result structure
+ */
+- (BenchmarkResultCpp)initializeBenchmarkResult:(int)nPrompt nGenerate:(int)nGenerate nRepeat:(int)nRepeat kvCache:(bool)kvCache {
+    BenchmarkResultCpp result;
+    result.prompt_tokens = nPrompt;
+    result.generate_tokens = nGenerate;
+    result.repeat_count = nRepeat;
+    result.kv_cache_enabled = kvCache;
+    result.success = false;
+    return result;
+}
+
+/**
+ * Initialize LLM for benchmark and verify it's ready
+ */
+- (BOOL)initializeLlmForBenchmark:(BenchmarkResultCpp&)result callback:(const BenchmarkCallback&)callback {
+    if (!_llm) {
+        result.error_message = "LLM object is not initialized";
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    // Verify LLM context is valid before proceeding
+    auto context = _llm->getContext();
+    if (!context) {
+        result.error_message = "LLM context is not valid - model may not be properly loaded";
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    // Clear chat history for clean benchmark
+    [self clearChatHistory];
+    
+    // Re-verify context after reset
+    context = _llm->getContext();
+    if (!context) {
+        result.error_message = "LLM context became invalid after reset";
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    return YES;
+}
+
+/**
+ * Report benchmark progress
+ */
+- (void)reportBenchmarkProgress:(int)iteration nRepeat:(int)nRepeat nPrompt:(int)nPrompt nGenerate:(int)nGenerate callback:(const BenchmarkCallback&)callback {
+    if (callback.onProgress) {
+        BenchmarkProgressInfoCpp progressInfo;
+        
+        if (iteration == 0) {
+            progressInfo.progress = 0;
+            progressInfo.statusMessage = "Warming up...";
+            progressInfo.progressType = 2; // BenchmarkProgressTypeWarmingUp
+        } else {
+            progressInfo.progress = (iteration * 100) / nRepeat;
+            progressInfo.statusMessage = "Running test " + std::to_string(iteration) + "/" + std::to_string(nRepeat) +
+                                       " (prompt=" + std::to_string(nPrompt) + ", generate=" + std::to_string(nGenerate) + ")";
+            progressInfo.progressType = 3; // BenchmarkProgressTypeRunningTest
+        }
+        
+        // Set structured data
+        progressInfo.currentIteration = iteration;
+        progressInfo.totalIterations = nRepeat;
+        progressInfo.nPrompt = nPrompt;
+        progressInfo.nGenerate = nGenerate;
+        
+        callback.onProgress(progressInfo);
+    }
+}
+
+/**
+ * Run KV cache test iteration
+ */
+- (BOOL)runKvCacheTest:(int)iteration nPrompt:(int)nPrompt nGenerate:(int)nGenerate
+             startTime:(std::chrono::high_resolution_clock::time_point)start_time
+                result:(BenchmarkResultCpp&)result callback:(const BenchmarkCallback&)callback {
+    
+    const int tok = 16; // Same token ID as used in Android llm_session.cpp
+    std::vector<int> tokens(nPrompt, tok);
+    
+    // Validate token vector
+    if (tokens.empty() || nPrompt <= 0) {
+        result.error_message = "Invalid token configuration for kv-cache test";
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    _llm->response(tokens, nullptr, nullptr, nGenerate);
+    
+    // Re-get context after response to ensure it's still valid
+    auto context = _llm->getContext();
+    if (!context) {
+        result.error_message = "Context became invalid after response in kv-cache test " + std::to_string(iteration);
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    if (iteration > 0) { // Exclude the first performance value
+        auto end_time = std::chrono::high_resolution_clock::now();
+        [self processBenchmarkResults:context->prefill_us decodeTime:context->decode_us
+                            startTime:start_time endTime:end_time iteration:iteration
+                              nPrompt:nPrompt nGenerate:nGenerate result:result
+                             callback:callback isKvCache:true];
+    }
+    return YES;
+}
+
+/**
+ * Run llama-bench test iteration (without kv cache)
+ */
+- (BOOL)runLlamaBenchTest:(int)iteration nPrompt:(int)nPrompt nGenerate:(int)nGenerate
+                startTime:(std::chrono::high_resolution_clock::time_point)start_time
+                   result:(BenchmarkResultCpp&)result callback:(const BenchmarkCallback&)callback {
+    
+    const int tok = 500;
+    int64_t prefill_us = 0;
+    int64_t decode_us = 0;
+    std::vector<int> tokens(nPrompt, tok);
+    std::vector<int> tokens1(1, tok);
+    
+    // Validate token vectors
+    if ((nPrompt > 0 && tokens.empty()) || tokens1.empty()) {
+        result.error_message = "Invalid token configuration for llama-bench test " + std::to_string(iteration);
+        if (callback.onError) callback.onError(result.error_message);
+        return NO;
+    }
+    
+    NSLog(@"runLlamaBenchTest nPrompt:%d, nGenerate:%d", nPrompt, nGenerate);
+    
+    if (nPrompt > 0) {
+        NSLog(@"runLlamaBenchTest prefill begin");
+        _llm->response(tokens, nullptr, nullptr, 1);
+        NSLog(@"runLlamaBenchTest prefill end");
+        
+        auto context = _llm->getContext();
+        if (!context) {
+            result.error_message = "Context became invalid after prefill response in llama-bench test " + std::to_string(iteration);
+            if (callback.onError) callback.onError(result.error_message);
+            return NO;
+        }
+        prefill_us = context->prefill_us;
+    }
+    
+    if (nGenerate > 0) {
+        NSLog(@"runLlamaBenchTest generate begin");
+        _llm->response(tokens1, nullptr, nullptr, nGenerate);
+        NSLog(@"runLlamaBenchTest generate end");
+        
+        auto context = _llm->getContext();
+        if (!context) {
+            result.error_message = "Context became invalid after decode response in llama-bench test " + std::to_string(iteration);
+            if (callback.onError) callback.onError(result.error_message);
+            return NO;
+        }
+        decode_us = context->decode_us;
+    }
+    
+    if (iteration > 0) { // Exclude the first performance value
+        auto end_time = std::chrono::high_resolution_clock::now();
+        
+        [self processBenchmarkResults:prefill_us decodeTime:decode_us
+                            startTime:start_time endTime:end_time iteration:iteration
+                              nPrompt:nPrompt nGenerate:nGenerate result:result
+                             callback:callback isKvCache:false];
+        
+        result.sample_times_us.push_back(prefill_us + decode_us);
+        result.decode_times_us.push_back(decode_us);
+        result.prefill_times_us.push_back(prefill_us);
+    }
+    return YES;
+}
+
+/**
+ * Process and report benchmark results
+ */
+- (void)processBenchmarkResults:(int64_t)prefillTime decodeTime:(int64_t)decodeTime
+                      startTime:(std::chrono::high_resolution_clock::time_point)start_time
+                        endTime:(std::chrono::high_resolution_clock::time_point)end_time
+                      iteration:(int)iteration nPrompt:(int)nPrompt nGenerate:(int)nGenerate
+                         result:(BenchmarkResultCpp&)result callback:(const BenchmarkCallback&)callback
+                       isKvCache:(bool)isKvCache {
+    
+    auto runTime = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
+    
+    if (isKvCache) {
+        result.prefill_times_us.push_back(prefillTime);
+        result.decode_times_us.push_back(decodeTime);
+    }
+    
+    // Convert times to seconds
+    float runTimeSeconds = runTime / 1000000.0f;
+    float prefillTimeSeconds = prefillTime / 1000000.0f;
+    float decodeTimeSeconds = decodeTime / 1000000.0f;
+    
+    // Calculate speeds (tokens per second)
+    float prefillSpeed = (prefillTime > 0 && nPrompt > 0) ? ((float)nPrompt / prefillTimeSeconds) : 0.0f;
+    float decodeSpeed = (decodeTime > 0 && nGenerate > 0) ? ((float)nGenerate / decodeTimeSeconds) : 0.0f;
+    
+    // Report detailed results with structured data
+    BenchmarkProgressInfoCpp detailedInfo;
+    detailedInfo.progress = (iteration * 100) / result.repeat_count;
+    detailedInfo.progressType = 3; // BenchmarkProgressTypeRunningTest
+    detailedInfo.currentIteration = iteration;
+    detailedInfo.totalIterations = result.repeat_count;
+    detailedInfo.nPrompt = nPrompt;
+    detailedInfo.nGenerate = nGenerate;
+    detailedInfo.runTimeSeconds = runTimeSeconds;
+    detailedInfo.prefillTimeSeconds = prefillTimeSeconds;
+    detailedInfo.decodeTimeSeconds = decodeTimeSeconds;
+    detailedInfo.prefillSpeed = prefillSpeed;
+    detailedInfo.decodeSpeed = decodeSpeed;
+    
+    // Format detailed message
+    char detailedMsg[1024];
+    snprintf(detailedMsg, sizeof(detailedMsg),
+        "BenchmarkService: Native Progress [%dp+%dg] (%d%%): Running test %d/%d (prompt=%d, generate=%d) runTime:%.3fs, prefillTime:%.3fs, decodeTime:%.3fs, prefillSpeed:%.2f tok/s, decodeSpeed:%.2f tok/s",
+        nPrompt, nGenerate, detailedInfo.progress, iteration, result.repeat_count, nPrompt, nGenerate,
+        runTimeSeconds, prefillTimeSeconds, decodeTimeSeconds, prefillSpeed, decodeSpeed);
+    
+    detailedInfo.statusMessage = std::string(detailedMsg);
+    
+    NSLog(@"%s", detailedMsg);
+    
+    if (callback.onProgress) {
+        callback.onProgress(detailedInfo);
+    }
+    
+    if (callback.onIterationComplete) {
+        callback.onIterationComplete(std::string(detailedMsg));
+    }
+}
+
+/**
+ * Core benchmark implementation
+ */
+- (BenchmarkResultCpp)runBenchmarkCore:(int)backend threads:(int)threads useMmap:(bool)useMmap power:(int)power
+                             precision:(int)precision memory:(int)memory dynamicOption:(int)dynamicOption
+                               nPrompt:(int)nPrompt nGenerate:(int)nGenerate nRepeat:(int)nRepeat
+                               kvCache:(bool)kvCache callback:(const BenchmarkCallback&)callback {
+    
+    NSLog(@"BENCHMARK: runBenchmark() STARTED!");
+    NSLog(@"BENCHMARK: Parameters - nPrompt=%d, nGenerate=%d, nRepeat=%d, kvCache=%s",
+          nPrompt, nGenerate, nRepeat, kvCache ? "true" : "false");
+    
+    // Initialize result structure
+    NSLog(@"BENCHMARK: Initializing benchmark result structure");
+    BenchmarkResultCpp result = [self initializeBenchmarkResult:nPrompt nGenerate:nGenerate nRepeat:nRepeat kvCache:kvCache];
+    
+    // Initialize LLM for benchmark
+    NSLog(@"BENCHMARK: About to initialize LLM for benchmark");
+    if (![self initializeLlmForBenchmark:result callback:callback]) {
+        NSLog(@"BENCHMARK: initializeLlmForBenchmark FAILED!");
+        return result;
+    }
+    NSLog(@"BENCHMARK: initializeLlmForBenchmark SUCCESS - entering benchmark loop");
+    
+    // Run benchmark iterations
+    NSLog(@"BENCHMARK: Starting benchmark loop for %d iterations", nRepeat + 1);
+    for (int i = 0; i < nRepeat + 1; ++i) {
+        if (_shouldStopBenchmark.load()) {
+            result.error_message = "Benchmark stopped by user";
+            if (callback.onError) callback.onError(result.error_message);
+            return result;
+        }
+        
+        NSLog(@"BENCHMARK: Starting iteration %d/%d", i, nRepeat);
+        auto start_time = std::chrono::high_resolution_clock::now();
+        
+        // Report progress
+        NSLog(@"BENCHMARK: Reporting progress for iteration %d", i);
+        [self reportBenchmarkProgress:i nRepeat:nRepeat nPrompt:nPrompt nGenerate:nGenerate callback:callback];
+        
+        // Run the actual test
+        BOOL success;
+        if (kvCache) {
+            success = [self runKvCacheTest:i nPrompt:nPrompt nGenerate:nGenerate startTime:start_time result:result callback:callback];
+        } else {
+            success = [self runLlamaBenchTest:i nPrompt:nPrompt nGenerate:nGenerate startTime:start_time result:result callback:callback];
+        }
+        
+        if (!success) {
+            return result;
+        }
+    }
+    
+    // Report completion
+    if (callback.onProgress) {
+        BenchmarkProgressInfoCpp completionInfo;
+        completionInfo.progress = 100;
+        completionInfo.statusMessage = "Benchmark completed!";
+        completionInfo.progressType = 5; // BenchmarkProgressTypeCompleted
+        callback.onProgress(completionInfo);
+    }
+    
+    result.success = true;
+    return result;
+}
+
+/**
+ * Convert C++ BenchmarkProgressInfoCpp to Objective-C BenchmarkProgressInfo
+ */
+- (BenchmarkProgressInfo *)convertProgressInfo:(const BenchmarkProgressInfoCpp&)cppInfo {
+    BenchmarkProgressInfo *objcInfo = [[BenchmarkProgressInfo alloc] init];
+    objcInfo.progress = cppInfo.progress;
+    objcInfo.statusMessage = [NSString stringWithUTF8String:cppInfo.statusMessage.c_str()];
+    objcInfo.progressType = (BenchmarkProgressType)cppInfo.progressType;
+    objcInfo.currentIteration = cppInfo.currentIteration;
+    objcInfo.totalIterations = cppInfo.totalIterations;
+    objcInfo.nPrompt = cppInfo.nPrompt;
+    objcInfo.nGenerate = cppInfo.nGenerate;
+    objcInfo.runTimeSeconds = cppInfo.runTimeSeconds;
+    objcInfo.prefillTimeSeconds = cppInfo.prefillTimeSeconds;
+    objcInfo.decodeTimeSeconds = cppInfo.decodeTimeSeconds;
+    objcInfo.prefillSpeed = cppInfo.prefillSpeed;
+    objcInfo.decodeSpeed = cppInfo.decodeSpeed;
+    return objcInfo;
+}
+
+/**
+ * Convert C++ BenchmarkResultCpp to Objective-C BenchmarkResult
+ */
+- (BenchmarkResult *)convertBenchmarkResult:(const BenchmarkResultCpp&)cppResult {
+    BenchmarkResult *objcResult = [[BenchmarkResult alloc] init];
+    objcResult.success = cppResult.success;
+    if (!cppResult.error_message.empty()) {
+        objcResult.errorMessage = [NSString stringWithUTF8String:cppResult.error_message.c_str()];
+    }
+    
+    // Convert timing arrays
+    NSMutableArray<NSNumber *> *prefillTimes = [[NSMutableArray alloc] init];
+    for (int64_t time : cppResult.prefill_times_us) {
+        [prefillTimes addObject:@(time)];
+    }
+    objcResult.prefillTimesUs = [prefillTimes copy];
+    
+    NSMutableArray<NSNumber *> *decodeTimes = [[NSMutableArray alloc] init];
+    for (int64_t time : cppResult.decode_times_us) {
+        [decodeTimes addObject:@(time)];
+    }
+    objcResult.decodeTimesUs = [decodeTimes copy];
+    
+    NSMutableArray<NSNumber *> *sampleTimes = [[NSMutableArray alloc] init];
+    for (int64_t time : cppResult.sample_times_us) {
+        [sampleTimes addObject:@(time)];
+    }
+    objcResult.sampleTimesUs = [sampleTimes copy];
+    
+    objcResult.promptTokens = cppResult.prompt_tokens;
+    objcResult.generateTokens = cppResult.generate_tokens;
+    objcResult.repeatCount = cppResult.repeat_count;
+    objcResult.kvCacheEnabled = cppResult.kv_cache_enabled;
+    
+    return objcResult;
+}
+
+// MARK: - Public Benchmark Methods
+
+/**
+ * Run official benchmark following llm_bench.cpp approach
+ */
+- (void)runOfficialBenchmarkWithBackend:(NSInteger)backend
+                                threads:(NSInteger)threads
+                                useMmap:(BOOL)useMmap
+                                  power:(NSInteger)power
+                              precision:(NSInteger)precision
+                                 memory:(NSInteger)memory
+                          dynamicOption:(NSInteger)dynamicOption
+                                nPrompt:(NSInteger)nPrompt
+                              nGenerate:(NSInteger)nGenerate
+                                nRepeat:(NSInteger)nRepeat
+                                kvCache:(BOOL)kvCache
+                       progressCallback:(BenchmarkProgressCallback _Nullable)progressCallback
+                          errorCallback:(BenchmarkErrorCallback _Nullable)errorCallback
+               iterationCompleteCallback:(BenchmarkIterationCompleteCallback _Nullable)iterationCompleteCallback
+                       completeCallback:(BenchmarkCompleteCallback _Nullable)completeCallback {
+    
+    if (_isBenchmarkRunning.load()) {
+        if (errorCallback) {
+            errorCallback(@"Benchmark is already running");
+        }
+        return;
+    }
+    
+    if (!_llm) {
+        if (errorCallback) {
+            errorCallback(@"Model is not initialized");
+        }
+        return;
+    }
+    
+    _isBenchmarkRunning = true;
+    _shouldStopBenchmark = false;
+    
+    // Run benchmark in background thread
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
+        @try {
+            // Create C++ callback structure
+            BenchmarkCallback cppCallback;
+            
+            cppCallback.onProgress = [progressCallback, self](const BenchmarkProgressInfoCpp& progressInfo) {
+                if (progressCallback) {
+                    BenchmarkProgressInfo *objcProgressInfo = [self convertProgressInfo:progressInfo];
+                    dispatch_async(dispatch_get_main_queue(), ^{
+                        progressCallback(objcProgressInfo);
+                    });
+                }
+            };
+            
+            cppCallback.onError = [errorCallback](const std::string& error) {
+                if (errorCallback) {
+                    NSString *errorStr = [NSString stringWithUTF8String:error.c_str()];
+                    dispatch_async(dispatch_get_main_queue(), ^{
+                        errorCallback(errorStr);
+                    });
+                }
+            };
+            
+            cppCallback.onIterationComplete = [iterationCompleteCallback](const std::string& detailed_stats) {
+                if (iterationCompleteCallback) {
+                    NSString *statsStr = [NSString stringWithUTF8String:detailed_stats.c_str()];
+                    dispatch_async(dispatch_get_main_queue(), ^{
+                        iterationCompleteCallback(statsStr);
+                    });
+                }
+            };
+            
+            // Run the actual benchmark
+            BenchmarkResultCpp cppResult = [self runBenchmarkCore:(int)backend
+                                                          threads:(int)threads
+                                                          useMmap:(bool)useMmap
+                                                            power:(int)power
+                                                        precision:(int)precision
+                                                           memory:(int)memory
+                                                    dynamicOption:(int)dynamicOption
+                                                          nPrompt:(int)nPrompt
+                                                        nGenerate:(int)nGenerate
+                                                          nRepeat:(int)nRepeat
+                                                          kvCache:(bool)kvCache
+                                                         callback:cppCallback];
+            
+            // Convert result and call completion callback
+            BenchmarkResult *objcResult = [self convertBenchmarkResult:cppResult];
+            
+            dispatch_async(dispatch_get_main_queue(), ^{
+                if (completeCallback) {
+                    completeCallback(objcResult);
+                }
+            });
+            
+        }
+        @catch (NSException *exception) {
+            NSLog(@"Exception during benchmark: %@", exception.reason);
+            if (errorCallback) {
+                dispatch_async(dispatch_get_main_queue(), ^{
+                    errorCallback([NSString stringWithFormat:@"Benchmark failed: %@", exception.reason]);
+                });
+            }
+        }
+        @finally {
+            self->_isBenchmarkRunning = false;
+        }
+    });
+}
+
+/**
+ * Stop running benchmark
+ */
+- (void)stopBenchmark {
+    _shouldStopBenchmark = true;
+    NSLog(@"Benchmark stop requested");
+}
+
+/**
+ * Check if benchmark is currently running
+ */
+- (BOOL)isBenchmarkRunning {
+    return _isBenchmarkRunning.load();
+}
+
+@end
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.h b/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.h
deleted file mode 100644
index 6141fed6..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//
-//  LLMInferenceEngineWrapper.h
-//  mnn-llm
-//
-//  Created by wangzhaode on 2023/12/14.
-//
-
-#ifndef LLMInferenceEngineWrapper_h
-#define LLMInferenceEngineWrapper_h
-
-
-// LLMInferenceEngineWrapper.h
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-typedef void (^CompletionHandler)(BOOL success);
-typedef void (^OutputHandler)(NSString * _Nonnull output);
-
-@interface LLMInferenceEngineWrapper : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)modelPath completion:(CompletionHandler)completion;
-- (void)processInput:(NSString *)input withOutput:(OutputHandler)output;
-
-- (void)addPromptsFromArray:(NSArray<NSDictionary *> *)array;
-
-- (void)setConfigWithJSONString:(NSString *)jsonStr;
-
-@end
-
-NS_ASSUME_NONNULL_END
-
-#endif /* LLMInferenceEngineWrapper_h */
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.mm b/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.mm
deleted file mode 100644
index 1cdf7618..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/LLMWrapper/LLMInferenceEngineWrapper.mm
+++ /dev/null
@@ -1,274 +0,0 @@
-//
-//  LLMInferenceEngineWrapper.m
-//  mnn-llm
-//
-//  Created by wangzhaode on 2023/12/14.
-//
-
-#include <iostream>
-#include <string>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <filesystem>
-#include <functional>
-#include <MNN/llm/llm.hpp>
-#include <vector>
-#include <utility>
-
-#import <Foundation/Foundation.h>
-#import "LLMInferenceEngineWrapper.h"
-
-using namespace MNN::Transformer;
-
-using ChatMessage = std::pair<std::string, std::string>;
-static std::vector<ChatMessage> history{};
-
-@implementation LLMInferenceEngineWrapper {
-    std::shared_ptr<Llm> llm;
-}
-
-- (instancetype)initWithModelPath:(NSString *)modelPath completion:(CompletionHandler)completion {
-    self = [super init];
-    if (self) {
-        dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-            BOOL success = [self loadModelFromPath:modelPath];
-            // MARK: Test Local Model
-            // BOOL success = [self loadModel];
-
-            dispatch_async(dispatch_get_main_queue(), ^{
-                completion(success);
-            });
-        });
-    }
-    return self;
-}
-
-
-bool remove_directory(const std::string& path) {
-    try {
-        std::filesystem::remove_all(path); // 删除目录及其内容
-        return true;
-    } catch (const std::filesystem::filesystem_error& e) {
-        std::cerr << "Error removing directory: " << e.what() << std::endl;
-        return false;
-    }
-}
-
-- (BOOL)loadModel {
-    if (!llm) {
-        NSString *bundleDirectory = [[NSBundle mainBundle] bundlePath];
-        std::string model_dir = [bundleDirectory UTF8String];
-        std::string config_path = model_dir + "/config.json";
-        llm.reset(Llm::createLLM(config_path));
-        NSString *tempDirectory = NSTemporaryDirectory();
-        llm->set_config("{\"tmp_path\":\"" + std::string([tempDirectory UTF8String]) + "\", \"use_mmap\":true}");
-        llm->load();
-    }
-    return YES;
-}
-
-- (BOOL)loadModelFromPath:(NSString *)modelPath {
-    if (!llm) {
-        std::string config_path = std::string([modelPath UTF8String]) + "/config.json";
-
-        // Read the config file to get use_mmap value
-        NSError *error = nil;
-        NSData *configData = [NSData dataWithContentsOfFile:[NSString stringWithUTF8String:config_path.c_str()]];
-        NSDictionary *configDict = [NSJSONSerialization JSONObjectWithData:configData options:0 error:&error];
-        // If use_mmap key doesn't exist, default to YES
-        BOOL useMmap = configDict[@"use_mmap"] == nil ? YES : [configDict[@"use_mmap"] boolValue];
-
-        llm.reset(Llm::createLLM(config_path));
-        if (!llm) {
-            return NO;
-        }
-
-        // Create temp directory inside the modelPath folder
-        std::string model_path_str([modelPath UTF8String]);
-        std::string temp_directory_path = model_path_str + "/temp";
-
-        struct stat info;
-        if (stat(temp_directory_path.c_str(), &info) == 0) {
-            // Directory exists, so remove it
-            if (!remove_directory(temp_directory_path)) {
-                std::cerr << "Failed to remove existing temp directory: " << temp_directory_path << std::endl;
-                return NO;
-            }
-            std::cerr << "Existing temp directory removed: " << temp_directory_path << std::endl;
-        }
-
-        // Now create the temp directory
-        if (mkdir(temp_directory_path.c_str(), 0777) != 0) {
-            std::cerr << "Failed to create temp directory: " << temp_directory_path << std::endl;
-            return NO;
-        }
-        std::cerr << "Temp directory created: " << temp_directory_path << std::endl;
-
-        // NSLog(@"useMmap value: %@", useMmap ? @"YES" : @"NO");
-
-        // Explicitly convert BOOL to bool and ensure proper string conversion
-        bool useMmapCpp = (useMmap == YES);
-        std::string configStr = "{\"tmp_path\":\"" + temp_directory_path + "\", \"use_mmap\":" + (useMmapCpp ? "true" : "false") + "}";
-        // Debug print to check the final config string
-        // NSLog(@"Config string: %s", configStr.c_str());
-
-        llm->set_config(configStr);
-
-        llm->load();
-    }
-    else {
-        std::cerr << "Warmming:: LLM have already been created!" << std::endl;
-    }
-    return YES;
-}
-
-- (void)setConfigWithJSONString:(NSString *)jsonStr {
-    
-    if (!llm) {
-        return;
-    }
-
-    if (jsonStr) {
-        const char *cString = [jsonStr UTF8String];
-        std::string stdString(cString);
-
-        llm->set_config(stdString);
-    } else {
-        NSLog(@"Error: JSON string is nil or invalid.");
-    }
-}
-
-// llm stream buffer with callback
-class LlmStreamBuffer : public std::streambuf {
-public:
-    using CallBack = std::function<void(const char* str, size_t len)>;
-    LlmStreamBuffer(CallBack callback) : callback_(callback) {}
-
-protected:
-    virtual std::streamsize xsputn(const char* s, std::streamsize n) override {
-        if (callback_) {
-            callback_(s, n);
-        }
-        return n;
-    }
-private:
-    CallBack callback_ = nullptr;
-};
-
-- (void)processInput:(NSString *)input withOutput:(OutputHandler)output {
-    if (llm == nil) {
-        output(@"Error: Model not loaded");
-        return;
-    }
-
-    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0), ^{
-
-        LlmStreamBuffer::CallBack callback = [output](const char* str, size_t len) {
-            if (output) {
-                NSString *nsOutput = [[NSString alloc] initWithBytes:str
-                                                                length:len
-                                                              encoding:NSUTF8StringEncoding];
-                if (nsOutput) {
-                    output(nsOutput);
-                }
-            }
-        };
-
-        LlmStreamBuffer streambuf(callback);
-        std::ostream os(&streambuf);
-
-        history.emplace_back(ChatMessage("user", [input UTF8String]));
-
-        if (std::string([input UTF8String]) == "benchmark") {
-            [self performBenchmarkWithOutput:&os];
-        } else {
-            llm->response(history, &os, "<eop>", 999999);
-        }
-
-    });
-}
-
-// New method to handle benchmarking
-- (void)performBenchmarkWithOutput:(std::ostream *)os {
-    std::string model_dir = [[[NSBundle mainBundle] bundlePath] UTF8String];
-    std::string prompt_file = model_dir + "/bench.txt";
-    std::ifstream prompt_fs(prompt_file);
-    std::vector<std::string> prompts;
-    std::string prompt;
-    while (std::getline(prompt_fs, prompt)) {
-        if (prompt.substr(0, 1) == "#") {
-            continue;
-        }
-        std::string::size_type pos = 0;
-        while ((pos = prompt.find("\\n", pos)) != std::string::npos) {
-            prompt.replace(pos, 2, "\n");
-            pos += 1;
-        }
-        prompts.push_back(prompt);
-    }
-
-    int prompt_len = 0;
-    int decode_len = 0;
-    int64_t prefill_time = 0;
-    int64_t decode_time = 0;
-
-    auto context = llm->getContext();
-    for (const auto& p : prompts) {
-        llm->response(p, os, "\n");
-        prompt_len += context->prompt_len;
-        decode_len += context->gen_seq_len;
-        prefill_time += context->prefill_us;
-        decode_time += context->decode_us;
-    }
-
-    float prefill_s = prefill_time / 1e6;
-    float decode_s = decode_time / 1e6;
-
-    *os << "\n#################################\n"
-        << "prompt tokens num  = " << prompt_len << "\n"
-        << "decode tokens num  = " << decode_len << "\n"
-        << "prefill time = " << std::fixed << std::setprecision(2) << prefill_s << " s\n"
-        << "decode time = " << std::fixed << std::setprecision(2) << decode_s << " s\n"
-        << "prefill speed = " << std::fixed << std::setprecision(2) << (prefill_s > 0 ? prompt_len / prefill_s : 0) << " tok/s\n"
-        << "decode speed = " << std::fixed << std::setprecision(2) << (decode_s > 0 ? decode_len / decode_s : 0) << " tok/s\n"
-        << "##################################\n";
-    *os << "<eop>";
-}
-
-- (void)dealloc {
-    std::cerr << "llm dealloc reset" << std::endl;
-    history.clear();
-    llm.reset();
-    llm = nil;
-}
-
-- (void)init:(const std::vector<std::string>&)chatHistory {
-    history.clear();
-    history.emplace_back("system", "You are a helpful assistant.");
-
-    for (size_t i = 0; i < chatHistory.size(); ++i) {
-        history.emplace_back(i % 2 == 0 ? "user" : "assistant", chatHistory[i]);
-    }
-}
-
-- (void)addPromptsFromArray:(NSArray<NSDictionary *> *)array {
-
-    history.clear();
-
-    for (NSDictionary *dict in array) {
-        [self addPromptsFromDictionary:dict];
-    }
-}
-
-- (void)addPromptsFromDictionary:(NSDictionary *)dictionary {
-    for (NSString *key in dictionary) {
-        NSString *value = dictionary[key];
-
-        std::string keyString = [key UTF8String];
-        std::string valueString = [value UTF8String];
-
-        history.emplace_back(ChatMessage(keyString, valueString));
-    }
-}
-
-@end
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MNNLLMiOSApp.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MNNLLMiOSApp.swift
index 6b763174..60358b42 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/MNNLLMiOSApp.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MNNLLMiOSApp.swift
@@ -12,11 +12,15 @@ struct MNNLLMiOSApp: App {
     
     init() {
         UIView.appearance().overrideUserInterfaceStyle = .light
+        
+        let savedLanguage = LanguageManager.shared.currentLanguage
+        UserDefaults.standard.set([savedLanguage], forKey: "AppleLanguages")
+        UserDefaults.standard.synchronize()
     }
     
     var body: some Scene {
         WindowGroup {
-            ModelListView()
+            MainTabView()
         }
     }
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Helpers/BenchmarkResultsHelper.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Helpers/BenchmarkResultsHelper.swift
new file mode 100644
index 00000000..90226403
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Helpers/BenchmarkResultsHelper.swift
@@ -0,0 +1,144 @@
+//
+//  BenchmarkResultsHelper.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import Darwin
+
+/**
+ * Helper class for processing and formatting benchmark test results.
+ * Provides statistical analysis, formatting utilities, and device information
+ * for benchmark result display and sharing.
+ */
+class BenchmarkResultsHelper {
+    static let shared = BenchmarkResultsHelper()
+    
+    private init() {}
+    
+    // MARK: - Results Processing & Statistics
+    
+    /// Processes test results to generate comprehensive benchmark statistics
+    /// - Parameter testResults: Array of completed test instances
+    /// - Returns: Processed statistics including speed metrics and configuration details
+    func processTestResults(_ testResults: [TestInstance]) -> BenchmarkStatistics {
+        guard !testResults.isEmpty else {
+            return BenchmarkStatistics.empty
+        }
+        
+        let firstTest = testResults[0]
+        let configText = "Backend: CPU, Threads: \(firstTest.threads), Memory: Low, Precision: Low"
+        
+        var prefillStats: SpeedStatistics?
+        var decodeStats: SpeedStatistics?
+        var totalTokensProcessed = 0
+        
+        // Calculate prefill (prompt processing) statistics
+        let allPrefillSpeeds = testResults.flatMap { test in
+            test.getTokensPerSecond(tokens: test.nPrompt, timesUs: test.prefillUs)
+        }
+        
+        if !allPrefillSpeeds.isEmpty {
+            let avgPrefill = allPrefillSpeeds.reduce(0, +) / Double(allPrefillSpeeds.count)
+            let stdevPrefill = calculateStandardDeviation(values: allPrefillSpeeds, mean: avgPrefill)
+            prefillStats = SpeedStatistics(average: avgPrefill, stdev: stdevPrefill, label: "Prompt Processing")
+        }
+        
+        // Calculate decode (token generation) statistics
+        let allDecodeSpeeds = testResults.flatMap { test in
+            test.getTokensPerSecond(tokens: test.nGenerate, timesUs: test.decodeUs)
+        }
+        
+        if !allDecodeSpeeds.isEmpty {
+            let avgDecode = allDecodeSpeeds.reduce(0, +) / Double(allDecodeSpeeds.count)
+            let stdevDecode = calculateStandardDeviation(values: allDecodeSpeeds, mean: avgDecode)
+            decodeStats = SpeedStatistics(average: avgDecode, stdev: stdevDecode, label: "Token Generation")
+        }
+        
+        // Calculate total tokens processed across all tests
+        totalTokensProcessed = testResults.reduce(0) { sum, test in
+            return sum + (test.nPrompt * test.prefillUs.count) + (test.nGenerate * test.decodeUs.count)
+        }
+        
+        return BenchmarkStatistics(
+            configText: configText,
+            prefillStats: prefillStats,
+            decodeStats: decodeStats,
+            totalTokensProcessed: totalTokensProcessed,
+            totalTests: testResults.count
+        )
+    }
+    
+    /// Calculates standard deviation for a set of values
+    /// - Parameters:
+    ///   - values: Array of numeric values
+    ///   - mean: Pre-calculated mean of the values
+    /// - Returns: Standard deviation value
+    private func calculateStandardDeviation(values: [Double], mean: Double) -> Double {
+        guard values.count > 1 else { return 0.0 }
+        
+        let variance = values.reduce(0) { sum, value in
+            let diff = value - mean
+            return sum + (diff * diff)
+        } / Double(values.count - 1)
+        
+        return sqrt(variance)
+    }
+    
+    // MARK: - Formatting & Display
+    
+    /// Formats speed statistics with average and standard deviation
+    /// - Parameter stats: Speed statistics to format
+    /// - Returns: Formatted string like "42.5 ± 3.2 tok/s"
+    func formatSpeedStatisticsLine(_ stats: SpeedStatistics) -> String {
+        return String(format: "%.1f ± %.1f tok/s", stats.average, stats.stdev)
+    }
+    
+    /// Returns the label-only portion of speed statistics
+    /// - Parameter stats: Speed statistics object
+    /// - Returns: Human-readable label (e.g., "Prompt Processing")
+    func formatSpeedLabelOnly(_ stats: SpeedStatistics) -> String {
+        return stats.label
+    }
+    
+    /// Formats model parameter summary for display
+    /// - Parameters:
+    ///   - totalTokens: Total number of tokens processed
+    ///   - totalTests: Total number of tests completed
+    /// - Returns: Formatted summary string
+    func formatModelParams(totalTokens: Int, totalTests: Int) -> String {
+        return "Total Tokens: \(totalTokens), Tests: \(totalTests)"
+    }
+    
+    /// Formats memory usage with percentage and absolute values
+    /// - Parameters:
+    ///   - maxMemoryKb: Peak memory usage in kilobytes
+    ///   - totalKb: Total system memory in kilobytes
+    /// - Returns: Tuple containing formatted value and percentage label
+    func formatMemoryUsage(maxMemoryKb: Int64, totalKb: Int64) -> (valueText: String, labelText: String) {
+        let maxMemoryMB = Double(maxMemoryKb) / 1024.0
+        let totalMemoryGB = Double(totalKb) / (1024.0 * 1024.0)
+        let percentage = (Double(maxMemoryKb) / Double(totalKb)) * 100.0
+        
+        let valueText = String(format: "%.1f MB", maxMemoryMB)
+        let labelText = String(format: "%.1f%% of %.1f GB", percentage, totalMemoryGB)
+        
+        return (valueText, labelText)
+    }
+    
+    // MARK: - Device & System Information
+    
+    /// Gets comprehensive device information including model and iOS version
+    /// - Returns: Formatted device info string (e.g., "iPhone 14 Pro, iOS 17.0")
+    func getDeviceInfo() -> String {
+        return DeviceInfoHelper.shared.getDeviceInfo()
+    }
+    
+    /// Gets total system memory in kilobytes
+    /// - Returns: System memory size in KB
+    func getTotalSystemMemoryKb() -> Int64 {
+        return Int64(ProcessInfo.processInfo.physicalMemory) / 1024
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkErrorCode.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkErrorCode.swift
new file mode 100644
index 00000000..8762c212
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkErrorCode.swift
@@ -0,0 +1,22 @@
+//
+//  BenchmarkErrorCode.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Enumeration of possible error codes that can occur during benchmark execution.
+ * Provides specific error identification for different failure scenarios.
+ */
+enum BenchmarkErrorCode: Int {
+    case benchmarkFailedUnknown = 30
+    case testInstanceFailed = 40
+    case modelNotInitialized = 50
+    case benchmarkRunning = 99
+    case benchmarkStopped = 100
+    case nativeError = 0
+    case modelError = 2
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkProgress.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkProgress.swift
new file mode 100644
index 00000000..9f882aa2
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkProgress.swift
@@ -0,0 +1,53 @@
+//
+//  BenchmarkProgress.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure containing detailed progress information for benchmark execution.
+ * Provides real-time metrics including timing data and performance statistics.
+ */
+struct BenchmarkProgress {
+    let progress: Int // 0-100
+    let statusMessage: String
+    let progressType: ProgressType
+    let currentIteration: Int
+    let totalIterations: Int
+    let nPrompt: Int
+    let nGenerate: Int
+    let runTimeSeconds: Float
+    let prefillTimeSeconds: Float
+    let decodeTimeSeconds: Float
+    let prefillSpeed: Float
+    let decodeSpeed: Float
+    
+    init(progress: Int,
+         statusMessage: String,
+         progressType: ProgressType = .unknown,
+         currentIteration: Int = 0,
+         totalIterations: Int = 0,
+         nPrompt: Int = 0,
+         nGenerate: Int = 0,
+         runTimeSeconds: Float = 0.0,
+         prefillTimeSeconds: Float = 0.0,
+         decodeTimeSeconds: Float = 0.0,
+         prefillSpeed: Float = 0.0,
+         decodeSpeed: Float = 0.0) {
+        self.progress = progress
+        self.statusMessage = statusMessage
+        self.progressType = progressType
+        self.currentIteration = currentIteration
+        self.totalIterations = totalIterations
+        self.nPrompt = nPrompt
+        self.nGenerate = nGenerate
+        self.runTimeSeconds = runTimeSeconds
+        self.prefillTimeSeconds = prefillTimeSeconds
+        self.decodeTimeSeconds = decodeTimeSeconds
+        self.prefillSpeed = prefillSpeed
+        self.decodeSpeed = decodeSpeed
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResult.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResult.swift
new file mode 100644
index 00000000..c1bb6823
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResult.swift
@@ -0,0 +1,24 @@
+//
+//  BenchmarkResult.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure containing the results of a completed benchmark test.
+ * Encapsulates test instance data along with success status and error information.
+ */
+struct BenchmarkResult {
+    let testInstance: TestInstance
+    let success: Bool
+    let errorMessage: String?
+    
+    init(testInstance: TestInstance, success: Bool, errorMessage: String? = nil) {
+        self.testInstance = testInstance
+        self.success = success
+        self.errorMessage = errorMessage
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResults.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResults.swift
new file mode 100644
index 00000000..361e943f
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkResults.swift
@@ -0,0 +1,26 @@
+//
+//  BenchmarkResults.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure containing comprehensive benchmark results for display and sharing.
+ * Aggregates test results, memory usage, and metadata for result presentation.
+ */
+struct BenchmarkResults {
+    let modelDisplayName: String
+    let maxMemoryKb: Int64
+    let testResults: [TestInstance]
+    let timestamp: String
+    
+    init(modelDisplayName: String, maxMemoryKb: Int64, testResults: [TestInstance], timestamp: String) {
+        self.modelDisplayName = modelDisplayName
+        self.maxMemoryKb = maxMemoryKb
+        self.testResults = testResults
+        self.timestamp = timestamp
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkStatistics.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkStatistics.swift
new file mode 100644
index 00000000..766639e6
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/BenchmarkStatistics.swift
@@ -0,0 +1,28 @@
+//
+//  BenchmarkStatistics.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure containing comprehensive statistical analysis of benchmark results.
+ * Aggregates performance metrics, configuration details, and test summary information.
+ */
+struct BenchmarkStatistics {
+    let configText: String
+    let prefillStats: SpeedStatistics?
+    let decodeStats: SpeedStatistics?
+    let totalTokensProcessed: Int
+    let totalTests: Int
+    
+    static let empty = BenchmarkStatistics(
+        configText: "",
+        prefillStats: nil,
+        decodeStats: nil,
+        totalTokensProcessed: 0,
+        totalTests: 0
+    )
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/DeviceInfoHelper.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/DeviceInfoHelper.swift
new file mode 100644
index 00000000..a8bdebb2
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/DeviceInfoHelper.swift
@@ -0,0 +1,142 @@
+//
+//  DeviceInfoHelper.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import UIKit
+
+/**
+ * Helper class for retrieving device information including model identification
+ * and system details. Provides device-specific information for benchmark results.
+ */
+class DeviceInfoHelper {
+    static let shared = DeviceInfoHelper()
+    
+    private init() {}
+    
+    /// Gets the device model identifier (e.g., "iPhone14,7")
+    func getDeviceIdentifier() -> String {
+        var systemInfo = utsname()
+        uname(&systemInfo)
+        
+        let machineMirror = Mirror(reflecting: systemInfo.machine)
+        let identifier = machineMirror.children.reduce("") { identifier, element in
+            guard let value = element.value as? Int8, value != 0 else { return identifier }
+            return identifier + String(UnicodeScalar(UInt8(value)))
+        }
+        
+        return identifier
+    }
+    
+    /// Gets the user-friendly device name (e.g., "iPhone 13 mini")
+    func getDeviceModelName() -> String {
+        let identifier = getDeviceIdentifier()
+        return mapIdentifierToModelName(identifier)
+    }
+    
+    /// Gets detailed device information including model and system version
+    func getDeviceInfo() -> String {
+        let device = UIDevice.current
+        let systemVersion = device.systemVersion
+        let modelName = getDeviceModelName()
+        return "\(modelName), iOS \(systemVersion)"
+    }
+    
+    private func mapIdentifierToModelName(_ identifier: String) -> String {
+        // iPhone mappings
+        let iPhoneMappings: [String: String] = [
+            // iPhone 13 series
+            "iPhone14,4": "iPhone 13 mini",
+            "iPhone14,5": "iPhone 13",
+            "iPhone14,2": "iPhone 13 Pro",
+            "iPhone14,3": "iPhone 13 Pro Max",
+            
+            // iPhone 14 series
+            "iPhone14,7": "iPhone 14",
+            "iPhone14,8": "iPhone 14 Plus",
+            "iPhone15,2": "iPhone 14 Pro",
+            "iPhone15,3": "iPhone 14 Pro Max",
+            
+            // iPhone 15 series
+            "iPhone15,4": "iPhone 15",
+            "iPhone15,5": "iPhone 15 Plus",
+            "iPhone16,1": "iPhone 15 Pro",
+            "iPhone16,2": "iPhone 15 Pro Max",
+            
+            // iPhone 16 series
+            "iPhone17,1": "iPhone 16",
+            "iPhone17,2": "iPhone 16 Plus",
+            "iPhone17,3": "iPhone 16 Pro",
+            "iPhone17,4": "iPhone 16 Pro Max",
+            
+            // iPhone SE series
+            "iPhone12,8": "iPhone SE (2nd generation)",
+            "iPhone14,6": "iPhone SE (3rd generation)",
+            
+            // Older iPhones
+            "iPhone13,1": "iPhone 12 mini",
+            "iPhone13,2": "iPhone 12",
+            "iPhone13,3": "iPhone 12 Pro",
+            "iPhone13,4": "iPhone 12 Pro Max",
+            "iPhone12,1": "iPhone 11",
+            "iPhone12,3": "iPhone 11 Pro",
+            "iPhone12,5": "iPhone 11 Pro Max",
+        ]
+        
+        // iPad mappings
+        let iPadMappings: [String: String] = [
+            // iPad Pro 12.9-inch
+            "iPad13,8": "iPad Pro (12.9-inch) (5th generation)",
+            "iPad13,9": "iPad Pro (12.9-inch) (5th generation)",
+            "iPad13,10": "iPad Pro (12.9-inch) (5th generation)",
+            "iPad13,11": "iPad Pro (12.9-inch) (5th generation)",
+            "iPad14,5": "iPad Pro (12.9-inch) (6th generation)",
+            "iPad14,6": "iPad Pro (12.9-inch) (6th generation)",
+            
+            // iPad Pro 11-inch
+            "iPad13,4": "iPad Pro (11-inch) (3rd generation)",
+            "iPad13,5": "iPad Pro (11-inch) (3rd generation)",
+            "iPad13,6": "iPad Pro (11-inch) (3rd generation)",
+            "iPad13,7": "iPad Pro (11-inch) (3rd generation)",
+            "iPad14,3": "iPad Pro (11-inch) (4th generation)",
+            "iPad14,4": "iPad Pro (11-inch) (4th generation)",
+            
+            // iPad Air
+            "iPad13,1": "iPad Air (4th generation)",
+            "iPad13,2": "iPad Air (4th generation)",
+            "iPad13,16": "iPad Air (5th generation)",
+            "iPad13,17": "iPad Air (5th generation)",
+            
+            // iPad mini
+            "iPad14,1": "iPad mini (6th generation)",
+            "iPad14,2": "iPad mini (6th generation)",
+            
+            // iPad (regular)
+            "iPad12,1": "iPad (9th generation)",
+            "iPad12,2": "iPad (9th generation)",
+            "iPad13,18": "iPad (10th generation)",
+            "iPad13,19": "iPad (10th generation)",
+        ]
+        
+        // Try iPhone mappings first
+        if let modelName = iPhoneMappings[identifier] {
+            return modelName
+        }
+        
+        // Try iPad mappings
+        if let modelName = iPadMappings[identifier] {
+            return modelName
+        }
+        
+        // Check for simulator
+        if identifier == "x86_64" || identifier == "i386" {
+            return "Simulator"
+        }
+        
+        // Return raw identifier if no mapping found
+        return identifier
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelItem.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelItem.swift
new file mode 100644
index 00000000..646fe47b
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelItem.swift
@@ -0,0 +1,34 @@
+//
+//  ModelItem.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure representing a model item with download state information.
+ * Used for tracking model availability and download progress in the benchmark interface.
+ */
+struct ModelItem: Identifiable, Equatable {
+    let id = UUID()
+    let modelId: String
+    let displayName: String
+    let isLocal: Bool
+    let localPath: String?
+    let size: Int64?
+    let downloadState: DownloadState
+    
+    enum DownloadState: Equatable {
+        case notStarted
+        case downloading(progress: Double)
+        case completed
+        case failed(error: String)
+        case paused
+    }
+    
+    static func == (lhs: ModelItem, rhs: ModelItem) -> Bool {
+        return lhs.modelId == rhs.modelId
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelListManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelListManager.swift
new file mode 100644
index 00000000..1dd57f34
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ModelListManager.swift
@@ -0,0 +1,31 @@
+//
+//  ModelListManager.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Manager class for integrating with ModelListViewModel to provide
+ * downloaded models for benchmark testing.
+ */
+class ModelListManager {
+    static let shared = ModelListManager()
+    
+    private let modelListViewModel = ModelListViewModel()
+    
+    private init() {}
+    
+    /// Loads available models, filtering for downloaded models suitable for benchmarking
+    /// - Returns: Array of downloaded ModelInfo objects
+    /// - Throws: Error if model loading fails
+    func loadModels() async throws -> [ModelInfo] {
+        // Ensure models are loaded from the view model
+        await modelListViewModel.fetchModels()
+        
+        // Return only downloaded models that are available for benchmark
+        return modelListViewModel.models.filter { $0.isDownloaded }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ProgressType.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ProgressType.swift
new file mode 100644
index 00000000..ed0588c4
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/ProgressType.swift
@@ -0,0 +1,34 @@
+//
+//  ProgressType.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Enumeration representing different stages of benchmark execution progress.
+ * Used to track and display the current state of benchmark operations.
+ */
+enum ProgressType: Int, CaseIterable {
+    case unknown = 0
+    case initializing
+    case warmingUp
+    case runningTest
+    case processingResults
+    case completed
+    case stopping
+    
+    var description: String {
+        switch self {
+        case .unknown: return "Unknown"
+        case .initializing: return "Initializing benchmark..."
+        case .warmingUp: return "Warming up..."
+        case .runningTest: return "Running test"
+        case .processingResults: return "Processing results..."
+        case .completed: return "All tests completed"
+        case .stopping: return "Stopping benchmark..."
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/RuntimeParameters.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/RuntimeParameters.swift
new file mode 100644
index 00000000..6d8aeaad
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/RuntimeParameters.swift
@@ -0,0 +1,32 @@
+//
+//  RuntimeParameters.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Configuration parameters for benchmark runtime environment.
+ * Defines hardware and execution settings for benchmark tests.
+ */
+struct RuntimeParameters {
+    let backends: [Int]
+    let threads: [Int]
+    let useMmap: Bool
+    let power: [Int]
+    let precision: [Int]
+    let memory: [Int]
+    let dynamicOption: [Int]
+    
+    static let `default` = RuntimeParameters(
+        backends: [0], // CPU
+        threads: [4],
+        useMmap: false,
+        power: [0],
+        precision: [2], // Low precision
+        memory: [2], // Low memory
+        dynamicOption: [0]
+    )
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/SpeedStatistics.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/SpeedStatistics.swift
new file mode 100644
index 00000000..d875a4f1
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/SpeedStatistics.swift
@@ -0,0 +1,24 @@
+//
+//  SpeedStatistics.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Structure containing statistical analysis of benchmark speed metrics.
+ * Provides average, standard deviation, and descriptive label for performance data.
+ */
+struct SpeedStatistics {
+    let average: Double
+    let stdev: Double
+    let label: String
+    
+    init(average: Double, stdev: Double, label: String) {
+        self.average = average
+        self.stdev = stdev
+        self.label = label
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestInstance.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestInstance.swift
new file mode 100644
index 00000000..11603615
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestInstance.swift
@@ -0,0 +1,71 @@
+//
+//  TestInstance.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import Combine
+
+/**
+ * Observable class representing a single benchmark test instance.
+ * Contains test configuration parameters and stores timing results.
+ */
+class TestInstance: ObservableObject, Identifiable {
+    let id = UUID()
+    let modelConfigFile: String
+    let modelType: String
+    let modelSize: Int64
+    let threads: Int
+    let useMmap: Bool
+    let nPrompt: Int
+    let nGenerate: Int
+    let backend: Int
+    let precision: Int
+    let power: Int
+    let memory: Int
+    let dynamicOption: Int
+    
+    @Published var prefillUs: [Int64] = []
+    @Published var decodeUs: [Int64] = []
+    @Published var samplesUs: [Int64] = []
+    
+    init(modelConfigFile: String,
+         modelType: String,
+         modelSize: Int64 = 0,
+         threads: Int,
+         useMmap: Bool,
+         nPrompt: Int,
+         nGenerate: Int,
+         backend: Int,
+         precision: Int,
+         power: Int,
+         memory: Int,
+         dynamicOption: Int) {
+        self.modelConfigFile = modelConfigFile
+        self.modelType = modelType
+        self.modelSize = modelSize
+        self.threads = threads
+        self.useMmap = useMmap
+        self.nPrompt = nPrompt
+        self.nGenerate = nGenerate
+        self.backend = backend
+        self.precision = precision
+        self.power = power
+        self.memory = memory
+        self.dynamicOption = dynamicOption
+    }
+    
+    /// Calculates tokens per second from timing data
+    /// - Parameters:
+    ///   - tokens: Number of tokens processed
+    ///   - timesUs: Array of timing measurements in microseconds
+    /// - Returns: Array of tokens per second calculations
+    func getTokensPerSecond(tokens: Int, timesUs: [Int64]) -> [Double] {
+        return timesUs.compactMap { timeUs in
+            guard timeUs > 0 else { return 0.0 }
+            return Double(tokens) * 1_000_000.0 / Double(timeUs)
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestParameters.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestParameters.swift
new file mode 100644
index 00000000..c7686e01
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Models/TestParameters.swift
@@ -0,0 +1,30 @@
+//
+//  TestParameters.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+
+/**
+ * Configuration parameters for benchmark test execution.
+ * Defines test scenarios including prompt sizes, generation lengths, and repetition counts.
+ */
+struct TestParameters {
+    let nPrompt: [Int]
+    let nGenerate: [Int]
+    let nPrompGen: [(Int, Int)]
+    let nRepeat: [Int]
+    let kvCache: String
+    let loadTime: String
+    
+    static let `default` = TestParameters(
+        nPrompt: [256, 512],
+        nGenerate: [64, 128],
+        nPrompGen: [(256, 64), (512, 128)],
+        nRepeat: [3], // Reduced for mobile
+        kvCache: "false", // llama-bench style test by default
+        loadTime: "false"
+    )
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Services/BenchmarkService.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Services/BenchmarkService.swift
new file mode 100644
index 00000000..c2e7fe2f
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Services/BenchmarkService.swift
@@ -0,0 +1,414 @@
+//
+//  BenchmarkService.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import Combine
+
+/**
+ * Protocol defining callback methods for benchmark execution events.
+ * Provides progress updates, completion notifications, and error handling.
+ */
+protocol BenchmarkCallback: AnyObject {
+    func onProgress(_ progress: BenchmarkProgress)
+    func onComplete(_ result: BenchmarkResult)
+    func onBenchmarkError(_ errorCode: Int, _ message: String)
+}
+
+/**
+ * Singleton service class responsible for managing benchmark operations.
+ * Coordinates with LLMInferenceEngineWrapper to execute performance tests
+ * and provides real-time progress updates through callback mechanisms.
+ */
+class BenchmarkService: ObservableObject {
+    
+    // MARK: - Singleton & Properties
+    
+    static let shared = BenchmarkService()
+    
+    @Published private(set) var isRunning = false
+    private var shouldStop = false
+    private var currentTask: Task<Void, Never>?
+    
+    // Real LLM inference engine - using actual MNN LLM wrapper
+    private var llmEngine: LLMInferenceEngineWrapper?
+    private var currentModelId: String?
+    
+    private init() {}
+    
+    // MARK: - Public Interface
+    
+    /// Initiates benchmark execution with specified parameters and callback handler
+    /// - Parameters:
+    ///   - modelId: Identifier for the model to benchmark
+    ///   - callback: Callback handler for progress and completion events
+    ///   - runtimeParams: Runtime configuration parameters
+    ///   - testParams: Test scenario parameters
+    func runBenchmark(
+        modelId: String,
+        callback: BenchmarkCallback,
+        runtimeParams: RuntimeParameters = .default,
+        testParams: TestParameters = .default
+    ) {
+        guard !isRunning else {
+            callback.onBenchmarkError(BenchmarkErrorCode.benchmarkRunning.rawValue, "Benchmark is already running")
+            return
+        }
+        
+        guard let engine = llmEngine, engine.isModelReady() else {
+            callback.onBenchmarkError(BenchmarkErrorCode.modelNotInitialized.rawValue, "Model is not initialized or not ready")
+            return
+        }
+        
+        isRunning = true
+        shouldStop = false
+        
+        currentTask = Task {
+            await performBenchmark(
+                engine: engine,
+                modelId: modelId,
+                callback: callback,
+                runtimeParams: runtimeParams,
+                testParams: testParams
+            )
+        }
+    }
+    
+    /// Stops the currently running benchmark operation
+    func stopBenchmark() {
+        shouldStop = true
+        llmEngine?.stopBenchmark()
+        currentTask?.cancel()
+        isRunning = false
+    }
+    
+    /// Checks if the model is properly initialized and ready for benchmarking
+    /// - Returns: True if model is ready, false otherwise
+    func isModelInitialized() -> Bool {
+        return llmEngine != nil && llmEngine!.isModelReady()
+    }
+    
+    /// Initializes a model for benchmark testing
+    /// - Parameters:
+    ///   - modelId: Identifier for the model
+    ///   - modelPath: File system path to the model
+    /// - Returns: True if initialization succeeded, false otherwise
+    func initializeModel(modelId: String, modelPath: String) async -> Bool {
+        return await withCheckedContinuation { continuation in
+            // Release existing engine if any
+            llmEngine = nil
+            currentModelId = nil
+            
+            // Create new LLM inference engine
+            llmEngine = LLMInferenceEngineWrapper(modelPath: modelPath) { success in
+                if success {
+                    self.currentModelId = modelId
+                    print("BenchmarkService: Model \(modelId) initialized successfully")
+                } else {
+                    self.llmEngine = nil
+                    print("BenchmarkService: Failed to initialize model \(modelId)")
+                }
+                continuation.resume(returning: success)
+            }
+        }
+    }
+    
+    /// Retrieves information about the currently loaded model
+    /// - Returns: Model information string, or nil if no model is loaded
+    func getModelInfo() -> String? {
+        guard let modelId = currentModelId else { return nil }
+        return "Model: \(modelId), Engine: MNN LLM"
+    }
+    
+    /// Releases the current model and frees associated resources
+    func releaseModel() {
+        llmEngine = nil
+        currentModelId = nil
+    }
+    
+    // MARK: - Benchmark Execution
+    
+    /// Performs the actual benchmark execution with progress tracking
+    /// - Parameters:
+    ///   - engine: LLM inference engine instance
+    ///   - modelId: Model identifier
+    ///   - callback: Progress and completion callback handler
+    ///   - runtimeParams: Runtime configuration
+    ///   - testParams: Test parameters
+    private func performBenchmark(
+        engine: LLMInferenceEngineWrapper,
+        modelId: String,
+        callback: BenchmarkCallback,
+        runtimeParams: RuntimeParameters,
+        testParams: TestParameters
+    ) async {
+        do {
+            let instances = generateTestInstances(runtimeParams: runtimeParams, testParams: testParams)
+            
+            var completedInstances = 0
+            let totalInstances = instances.count
+            
+            for instance in instances {
+                if shouldStop {
+                    await MainActor.run {
+                        callback.onBenchmarkError(BenchmarkErrorCode.benchmarkStopped.rawValue, "Benchmark stopped by user")
+                        self.isRunning = false
+                    }
+                    return
+                }
+                
+                // Create TestInstance for current configuration
+                let testInstance = TestInstance(
+                    modelConfigFile: instance.configPath,
+                    modelType: modelId,
+                    modelSize: 0, // Will be calculated if needed
+                    threads: instance.threads,
+                    useMmap: instance.useMmap,
+                    nPrompt: instance.nPrompt,
+                    nGenerate: instance.nGenerate,
+                    backend: instance.backend,
+                    precision: instance.precision,
+                    power: instance.power,
+                    memory: instance.memory,
+                    dynamicOption: instance.dynamicOption
+                )
+                
+                // Update overall progress
+                let progress = (completedInstances * 100) / totalInstances
+                let statusMsg = "Running test \(completedInstances + 1)/\(totalInstances): pp\(instance.nPrompt)+tg\(instance.nGenerate)"
+                
+                await MainActor.run {
+                    callback.onProgress(BenchmarkProgress(
+                        progress: progress,
+                        statusMessage: statusMsg,
+                        progressType: .runningTest,
+                        currentIteration: completedInstances + 1,
+                        totalIterations: totalInstances,
+                        nPrompt: instance.nPrompt,
+                        nGenerate: instance.nGenerate
+                    ))
+                }
+                
+                // Execute benchmark using LLMInferenceEngineWrapper
+                let result = await runOfficialBenchmark(
+                    engine: engine,
+                    instance: instance,
+                    testInstance: testInstance,
+                    progressCallback: { progress in
+                        await MainActor.run {
+                            callback.onProgress(progress)
+                        }
+                    }
+                )
+                
+                if result.success {
+                    completedInstances += 1
+                    
+                    // Only call onComplete for the last test instance
+                    if completedInstances == totalInstances {
+                        await MainActor.run {
+                            callback.onComplete(result)
+                        }
+                    }
+                } else {
+                    await MainActor.run {
+                        callback.onBenchmarkError(BenchmarkErrorCode.testInstanceFailed.rawValue, result.errorMessage ?? "Test failed")
+                        self.isRunning = false
+                    }
+                    return
+                }
+            }
+            
+            await MainActor.run {
+                self.isRunning = false
+            }
+            
+        } catch {
+            await MainActor.run {
+                callback.onBenchmarkError(BenchmarkErrorCode.nativeError.rawValue, error.localizedDescription)
+                self.isRunning = false
+            }
+        }
+    }
+    
+    /// Executes a single benchmark test using the official MNN LLM benchmark interface
+    /// - Parameters:
+    ///   - engine: LLM inference engine
+    ///   - instance: Test configuration
+    ///   - testInstance: Test instance to populate with results
+    ///   - progressCallback: Callback for progress updates
+    /// - Returns: Benchmark result with success status and timing data
+    private func runOfficialBenchmark(
+        engine: LLMInferenceEngineWrapper,
+        instance: TestConfig,
+        testInstance: TestInstance,
+        progressCallback: @escaping (BenchmarkProgress) async -> Void
+    ) async -> BenchmarkResult {
+        
+        return await withCheckedContinuation { continuation in
+            var hasResumed = false
+            
+            engine.runOfficialBenchmark(
+                withBackend: instance.backend,
+                threads: instance.threads,
+                useMmap: instance.useMmap,
+                power: instance.power,
+                precision: instance.precision,
+                memory: instance.memory,
+                dynamicOption: instance.dynamicOption,
+                nPrompt: instance.nPrompt,
+                nGenerate: instance.nGenerate,
+                nRepeat: instance.nRepeat,
+                kvCache: instance.kvCache,
+                progressCallback: { [self] progressInfo in
+                    // Convert Objective-C BenchmarkProgressInfo to Swift BenchmarkProgress
+                    let swiftProgress = BenchmarkProgress(
+                        progress: Int(progressInfo.progress),
+                        statusMessage: progressInfo.statusMessage,
+                        progressType: convertProgressType(progressInfo.progressType),
+                        currentIteration: Int(progressInfo.currentIteration),
+                        totalIterations: Int(progressInfo.totalIterations),
+                        nPrompt: Int(progressInfo.nPrompt),
+                        nGenerate: Int(progressInfo.nGenerate),
+                        runTimeSeconds: progressInfo.runTimeSeconds,
+                        prefillTimeSeconds: progressInfo.prefillTimeSeconds,
+                        decodeTimeSeconds: progressInfo.decodeTimeSeconds,
+                        prefillSpeed: progressInfo.prefillSpeed,
+                        decodeSpeed: progressInfo.decodeSpeed
+                    )
+                    
+                    Task {
+                        await progressCallback(swiftProgress)
+                    }
+                },
+                errorCallback: { errorMessage in
+                    if !hasResumed {
+                        hasResumed = true
+                        let result = BenchmarkResult(
+                            testInstance: testInstance,
+                            success: false,
+                            errorMessage: errorMessage
+                        )
+                        continuation.resume(returning: result)
+                    }
+                },
+                iterationCompleteCallback: { detailedStats in
+                    // Log detailed stats if needed
+                    print("Benchmark iteration complete: \(detailedStats)")
+                },
+                completeCallback: { benchmarkResult in
+                    if !hasResumed {
+                        hasResumed = true
+                        
+                        // Update test instance with timing results
+                        testInstance.prefillUs = benchmarkResult.prefillTimesUs.map { $0.int64Value }
+                        testInstance.decodeUs = benchmarkResult.decodeTimesUs.map { $0.int64Value }
+                        testInstance.samplesUs = benchmarkResult.sampleTimesUs.map { $0.int64Value }
+                        
+                        let result = BenchmarkResult(
+                            testInstance: testInstance,
+                            success: benchmarkResult.success,
+                            errorMessage: benchmarkResult.errorMessage
+                        )
+                        continuation.resume(returning: result)
+                    }
+                }
+            )
+        }
+    }
+    
+    // MARK: - Helper Methods & Configuration
+    
+    /// Converts Objective-C progress type to Swift enum
+    /// - Parameter objcType: Objective-C progress type
+    /// - Returns: Corresponding Swift ProgressType
+    private func convertProgressType(_ objcType: BenchmarkProgressType) -> ProgressType {
+        switch objcType {
+        case .unknown:
+            return .unknown
+        case .initializing:
+            return .initializing
+        case .warmingUp:
+            return .warmingUp
+        case .runningTest:
+            return .runningTest
+        case .processingResults:
+            return .processingResults
+        case .completed:
+            return .completed
+        case .stopping:
+            return .stopping
+        @unknown default:
+            return .unknown
+        }
+    }
+    
+    /// Generates test instances by combining runtime and test parameters
+    /// - Parameters:
+    ///   - runtimeParams: Runtime configuration parameters
+    ///   - testParams: Test scenario parameters
+    /// - Returns: Array of test configurations for execution
+    private func generateTestInstances(
+        runtimeParams: RuntimeParameters,
+        testParams: TestParameters
+    ) -> [TestConfig] {
+        var instances: [TestConfig] = []
+        
+        for backend in runtimeParams.backends {
+            for thread in runtimeParams.threads {
+                for power in runtimeParams.power {
+                    for precision in runtimeParams.precision {
+                        for memory in runtimeParams.memory {
+                            for dynamicOption in runtimeParams.dynamicOption {
+                                for repeatCount in testParams.nRepeat {
+                                    for (nPrompt, nGenerate) in testParams.nPrompGen {
+                                        instances.append(TestConfig(
+                                            configPath: "", // Will be set based on model
+                                            backend: backend,
+                                            threads: thread,
+                                            useMmap: runtimeParams.useMmap,
+                                            power: power,
+                                            precision: precision,
+                                            memory: memory,
+                                            dynamicOption: dynamicOption,
+                                            nPrompt: nPrompt,
+                                            nGenerate: nGenerate,
+                                            nRepeat: repeatCount,
+                                            kvCache: testParams.kvCache == "true"
+                                        ))
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        
+        return instances
+    }
+}
+
+// MARK: - Test Configuration
+
+/**
+ * Structure containing configuration parameters for a single benchmark test.
+ * Combines runtime settings and test parameters into a complete test specification.
+ */
+struct TestConfig {
+    let configPath: String
+    let backend: Int
+    let threads: Int
+    let useMmap: Bool
+    let power: Int
+    let precision: Int
+    let memory: Int
+    let dynamicOption: Int
+    let nPrompt: Int
+    let nGenerate: Int
+    let nRepeat: Int
+    let kvCache: Bool
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/ViewModels/BenchmarkViewModel.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/ViewModels/BenchmarkViewModel.swift
new file mode 100644
index 00000000..a41a7941
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/ViewModels/BenchmarkViewModel.swift
@@ -0,0 +1,444 @@
+//
+//  BenchmarkViewModel.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import SwiftUI
+import Combine
+
+/**
+ * ViewModel for managing benchmark operations including model selection, test execution,
+ * progress tracking, and result management. Handles communication with BenchmarkService
+ * and provides UI state management for the benchmark interface.
+ */
+@MainActor
+class BenchmarkViewModel: ObservableObject {
+    
+    // MARK: - Published Properties
+    
+    @Published var isLoading = false
+    @Published var isRunning = false
+    @Published var showProgressBar = false
+    @Published var showResults = false
+    @Published var showError = false
+    
+    @Published var selectedModel: ModelInfo?
+    @Published var availableModels: [ModelInfo] = []
+    @Published var currentProgress: BenchmarkProgress?
+    @Published var benchmarkResults: BenchmarkResults?
+    @Published var errorMessage: String = ""
+    @Published var statusMessage: String = ""
+    
+    @Published var startButtonText = String(localized: "Start Test")
+    @Published var isStartButtonEnabled = true
+    
+    // MARK: - Private Properties
+    
+    private let benchmarkService = BenchmarkService.shared
+    private let resultsHelper = BenchmarkResultsHelper.shared
+    private var cancellables = Set<AnyCancellable>()
+    
+    // Model list manager for getting local models
+    private let modelListManager = ModelListManager.shared
+    
+    // MARK: - Initialization & Setup
+    
+    init() {
+        setupBindings()
+        loadAvailableModels()
+    }
+    
+    /// Sets up reactive bindings between service and view model
+    private func setupBindings() {
+        benchmarkService.$isRunning
+            .receive(on: DispatchQueue.main)
+            .assign(to: \.isRunning, on: self)
+            .store(in: &cancellables)
+        
+        // Update button text based on running state
+        benchmarkService.$isRunning
+            .receive(on: DispatchQueue.main)
+            .map { isRunning in
+                isRunning ? String(localized: "Stop Test") : String(localized: "Start Test")
+            }
+            .assign(to: \.startButtonText, on: self)
+            .store(in: &cancellables)
+    }
+    
+    /// Loads available models from ModelListManager, filtering for downloaded models only
+    private func loadAvailableModels() {
+        Task {
+            isLoading = true
+            
+            do {
+                // Get all models from ModelListManager
+                let allModels = try await modelListManager.loadModels()
+                
+                // Filter only downloaded models that are available locally
+                availableModels = allModels.filter { model in
+                    model.isDownloaded && model.localPath != nil
+                }
+                
+                print("BenchmarkViewModel: Loaded \(availableModels.count) available local models")
+                
+            } catch {
+                showErrorMessage("Failed to load models: \(error.localizedDescription)")
+            }
+            
+            isLoading = false
+        }
+    }
+    
+    // MARK: - Public Action Handlers
+    
+    /// Handles start/stop benchmark button taps
+    func onStartBenchmarkTapped() {
+        if !isRunning {
+            startBenchmark()
+        } else {
+            showStopConfirmationAlert()
+        }
+    }
+    
+    /// Handles benchmark stop confirmation
+    func onStopBenchmarkTapped() {
+        stopBenchmark()
+    }
+    
+    /// Handles model selection from dropdown
+    func onModelSelected(_ model: ModelInfo) {
+        selectedModel = model
+    }
+    
+    /// Handles result deletion and cleanup
+    func onDeleteResultTapped() {
+        benchmarkResults = nil
+        showResults = false
+        hideStatus()
+        
+        // Release model to free memory
+        benchmarkService.releaseModel()
+    }
+    
+    /// Placeholder for future result submission functionality
+    func onSubmitResultTapped() {
+        // Implementation for submitting results (if needed)
+        // This could involve sharing or uploading results
+    }
+    
+    // MARK: - Benchmark Execution
+    
+    /// Initiates benchmark test with selected model and configured parameters
+    private func startBenchmark() {
+        guard let model = selectedModel else {
+            showErrorMessage("Please select a model first")
+            return
+        }
+        
+        guard model.isDownloaded else {
+            showErrorMessage("Selected model is not downloaded or path is invalid")
+            return
+        }
+        
+        onBenchmarkStarted()
+        
+        Task {
+            // Initialize model if needed
+            let initialized = await benchmarkService.initializeModel(
+                modelId: model.id,
+                modelPath: model.localPath
+            )
+            
+            guard initialized else {
+                showErrorMessage("Failed to initialize model")
+                resetUIState()
+                return
+            }
+            
+            // Start memory monitoring
+            MemoryMonitor.shared.start()
+            
+            // Start benchmark with optimized parameters for mobile devices
+            benchmarkService.runBenchmark(
+                modelId: model.id,
+                callback: self,
+                runtimeParams: createRuntimeParameters(),
+                testParams: createTestParameters()
+            )
+        }
+    }
+    
+    /// Creates runtime parameters optimized for iOS devices
+    private func createRuntimeParameters() -> RuntimeParameters {
+        return RuntimeParameters(
+            backends: [0], // CPU backend
+            threads: [4], // 4 threads for most iOS devices
+            useMmap: false, // Memory mapping disabled for iOS
+            power: [0], // Normal power mode
+            precision: [2], // Low precision for better performance
+            memory: [2], // Low memory usage
+            dynamicOption: [0] // No dynamic optimization
+        )
+    }
+    
+    /// Creates test parameters suitable for mobile benchmarking
+    private func createTestParameters() -> TestParameters {
+        return TestParameters(
+            nPrompt: [256, 512], // Smaller prompt sizes for mobile
+            nGenerate: [64, 128], // Smaller generation sizes
+            nPrompGen: [(256, 64), (512, 128)], // Combined test cases
+            nRepeat: [3], // Fewer repetitions for faster testing
+            kvCache: "false", // Disable KV cache by default
+            loadTime: "false"
+        )
+    }
+    
+    /// Stops the currently running benchmark
+    private func stopBenchmark() {
+        updateStatus("Stopping benchmark...")
+        benchmarkService.stopBenchmark()
+        MemoryMonitor.shared.stop()
+    }
+    
+    // MARK: - UI State Management
+    
+    /// Updates UI state when benchmark starts
+    private func onBenchmarkStarted() {
+        isStartButtonEnabled = true
+        showProgressBar = true
+        showResults = false
+        updateStatus("Initializing benchmark...")
+    }
+    
+    /// Resets UI to initial state
+    private func resetUIState() {
+        isStartButtonEnabled = true
+        showProgressBar = false
+        hideStatus()
+        showResults = false
+        MemoryMonitor.shared.stop()
+    }
+    
+    /// Updates status message display
+    private func updateStatus(_ message: String) {
+        statusMessage = message
+    }
+    
+    /// Hides status message
+    private func hideStatus() {
+        statusMessage = ""
+    }
+    
+    /// Shows error message alert
+    private func showErrorMessage(_ message: String) {
+        errorMessage = message
+        showError = true
+    }
+    
+    /// Placeholder for stop confirmation alert (handled in View)
+    private func showStopConfirmationAlert() {
+        // This will be handled in the View with an alert
+    }
+    
+    /// Formats progress messages with appropriate status text based on progress type
+    private func formatProgressMessage(_ progress: BenchmarkProgress) -> BenchmarkProgress {
+        let formattedMessage: String
+        
+        switch progress.progressType {
+        case .initializing:
+            formattedMessage = "Initializing benchmark..."
+        case .warmingUp:
+            formattedMessage = "Warming up..."
+        case .runningTest:
+            formattedMessage = "Running test \(progress.currentIteration)/\(progress.totalIterations)"
+        case .processingResults:
+            formattedMessage = "Processing results..."
+        case .completed:
+            formattedMessage = "All tests completed"
+        case .stopping:
+            formattedMessage = "Stopping benchmark..."
+        default:
+            formattedMessage = progress.statusMessage
+        }
+        
+        return BenchmarkProgress(
+            progress: progress.progress,
+            statusMessage: formattedMessage,
+            progressType: progress.progressType,
+            currentIteration: progress.currentIteration,
+            totalIterations: progress.totalIterations,
+            nPrompt: progress.nPrompt,
+            nGenerate: progress.nGenerate,
+            runTimeSeconds: progress.runTimeSeconds,
+            prefillTimeSeconds: progress.prefillTimeSeconds,
+            decodeTimeSeconds: progress.decodeTimeSeconds,
+            prefillSpeed: progress.prefillSpeed,
+            decodeSpeed: progress.decodeSpeed
+        )
+    }
+}
+
+// MARK: - BenchmarkCallback Implementation
+
+extension BenchmarkViewModel: BenchmarkCallback {
+    
+    /// Handles progress updates from benchmark service
+    func onProgress(_ progress: BenchmarkProgress) {
+        let formattedProgress = formatProgressMessage(progress)
+        currentProgress = formattedProgress
+        updateStatus(formattedProgress.statusMessage)
+    }
+    
+    /// Handles benchmark completion with results processing
+    func onComplete(_ result: BenchmarkResult) {
+        guard let model = selectedModel else { return }
+        
+        updateStatus("Processing results...")
+        
+        // Create comprehensive benchmark results
+        let results = BenchmarkResults(
+            modelDisplayName: model.modelName,
+            maxMemoryKb: MemoryMonitor.shared.getMaxMemoryKb(),
+            testResults: [result.testInstance],
+            timestamp: DateFormatter.benchmarkTimestamp.string(from: Date())
+        )
+        
+        benchmarkResults = results
+        showResults = true
+        
+        // Only stop memory monitoring if benchmark is no longer running (all tests completed)
+        if !isRunning {
+            // Stop memory monitoring
+            MemoryMonitor.shared.stop()
+        }
+        
+        // Always hide status after processing results
+        hideStatus()
+        
+        print("BenchmarkViewModel: Benchmark completed successfully for model: \(model.modelName)")
+    }
+    
+    /// Handles benchmark errors with user-friendly error messages
+    func onBenchmarkError(_ errorCode: Int, _ message: String) {
+        let errorCodeName = BenchmarkErrorCode(rawValue: errorCode)?.description ?? "Unknown"
+        showErrorMessage("Benchmark failed (\(errorCodeName)): \(message)")
+        resetUIState()
+        print("BenchmarkViewModel: Benchmark error (\(errorCode)): \(message)")
+    }
+}
+
+// MARK: - Memory Monitoring
+
+/**
+ * Singleton class for monitoring memory usage during benchmark execution.
+ * Tracks current and peak memory consumption using system APIs.
+ */
+class MemoryMonitor: ObservableObject {
+    
+    static let shared = MemoryMonitor()
+    
+    @Published private(set) var currentMemoryKb: Int64 = 0
+    private var maxMemoryKb: Int64 = 0
+    private var isMonitoring = false
+    private var monitoringTask: Task<Void, Never>?
+    
+    private init() {}
+    
+    /// Starts continuous memory monitoring
+    func start() {
+        guard !isMonitoring else { return }
+        
+        isMonitoring = true
+        maxMemoryKb = 0
+        
+        monitoringTask = Task {
+            while isMonitoring && !Task.isCancelled {
+                await updateMemoryUsage()
+                try? await Task.sleep(nanoseconds: 500_000_000) // 0.5 seconds
+            }
+        }
+    }
+    
+    /// Stops memory monitoring
+    func stop() {
+        isMonitoring = false
+        monitoringTask?.cancel()
+        monitoringTask = nil
+    }
+    
+    /// Resets memory tracking counters
+    func reset() {
+        maxMemoryKb = 0
+        currentMemoryKb = 0
+    }
+    
+    /// Returns the maximum memory usage recorded during monitoring
+    func getMaxMemoryKb() -> Int64 {
+        return maxMemoryKb
+    }
+    
+    /// Updates current memory usage and tracks maximum
+    @MainActor
+    private func updateMemoryUsage() {
+        let memoryUsage = getCurrentMemoryUsage()
+        currentMemoryKb = memoryUsage
+        maxMemoryKb = max(maxMemoryKb, memoryUsage)
+    }
+    
+    /// Gets current memory usage from system using mach task info
+    private func getCurrentMemoryUsage() -> Int64 {
+        var info = mach_task_basic_info()
+        var count = mach_msg_type_number_t(MemoryLayout<mach_task_basic_info>.size) / 4
+        
+        let kerr: kern_return_t = withUnsafeMutablePointer(to: &info) {
+            $0.withMemoryRebound(to: integer_t.self, capacity: 1) {
+                task_info(mach_task_self_,
+                         task_flavor_t(MACH_TASK_BASIC_INFO),
+                         $0,
+                         &count)
+            }
+        }
+        
+        if kerr == KERN_SUCCESS {
+            return Int64(info.resident_size) / 1024 // Convert to KB
+        } else {
+            return 0
+        }
+    }
+}
+
+// MARK: - Extensions
+
+/// Extension providing user-friendly descriptions for benchmark error codes
+extension BenchmarkErrorCode {
+    var description: String {
+        switch self {
+        case .benchmarkFailedUnknown:
+            return "Unknown Error"
+        case .testInstanceFailed:
+            return "Test Failed"
+        case .modelNotInitialized:
+            return "Model Not Ready"
+        case .benchmarkRunning:
+            return "Already Running"
+        case .benchmarkStopped:
+            return "Stopped"
+        case .nativeError:
+            return "Native Error"
+        case .modelError:
+            return "Model Error"
+        }
+    }
+}
+
+/// Extension providing formatted timestamp for benchmark results
+extension DateFormatter {
+    static let benchmarkTimestamp: DateFormatter = {
+        let formatter = DateFormatter()
+        formatter.dateFormat = "yyyy/M/dd HH:mm:ss"
+        return formatter
+    }()
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/MetricCard.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/MetricCard.swift
new file mode 100644
index 00000000..c2ad6200
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/MetricCard.swift
@@ -0,0 +1,58 @@
+//
+//  MetricCard.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Reusable metric display card component.
+ * Shows performance metrics with icon, title, and value in a compact format.
+ */
+struct MetricCard: View {
+    let title: String
+    let value: String
+    let icon: String
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 6) {
+            HStack(spacing: 6) {
+                Image(systemName: icon)
+                    .font(.caption)
+                    .foregroundColor(.benchmarkAccent)
+                
+                Text(title)
+                    .font(.caption)
+                    .foregroundColor(.benchmarkSecondary)
+                    .lineLimit(1)
+            }
+            
+            Text(value)
+                .font(.system(size: 14, weight: .semibold))
+                .foregroundColor(.primary)
+                .lineLimit(1)
+        }
+        .frame(maxWidth: .infinity, alignment: .leading)
+        .padding(.horizontal, 12)
+        .padding(.vertical, 8)
+        .background(
+            RoundedRectangle(cornerRadius: 8)
+                .fill(Color.benchmarkAccent.opacity(0.05))
+                .overlay(
+                    RoundedRectangle(cornerRadius: 8)
+                        .stroke(Color.benchmarkAccent.opacity(0.1), lineWidth: 1)
+                )
+        )
+    }
+}
+
+#Preview {
+    HStack(spacing: 12) {
+        MetricCard(title: "Runtime", value: "2.456s", icon: "clock")
+        MetricCard(title: "Speed", value: "109.8 t/s", icon: "speedometer")
+        MetricCard(title: "Memory", value: "1.2 GB", icon: "memorychip")
+    }
+    .padding()
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ModelSelectionCard.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ModelSelectionCard.swift
new file mode 100644
index 00000000..9fbc8147
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ModelSelectionCard.swift
@@ -0,0 +1,241 @@
+//
+//  ModelSelectionCard.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Reusable model selection card component for benchmark interface.
+ * Provides dropdown menu for model selection and start/stop controls.
+ */
+struct ModelSelectionCard: View {
+    @ObservedObject var viewModel: BenchmarkViewModel
+    @Binding var showStopConfirmation: Bool
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 16) {
+            HStack {
+                Text("Select Model")
+                    .font(.title3)
+                    .fontWeight(.semibold)
+                    .foregroundColor(.primary)
+                
+                Spacer()
+            }
+            
+            if viewModel.isLoading {
+                HStack {
+                    ProgressView()
+                        .scaleEffect(0.8)
+                    Text("Loading models...")
+                        .font(.subheadline)
+                        .foregroundColor(.secondary)
+                }
+                .frame(maxWidth: .infinity, alignment: .leading)
+            } else {
+                modelDropdownMenu
+            }
+            
+            startStopButton
+            
+            statusMessages
+        }
+        .padding(20)
+        .background(
+            RoundedRectangle(cornerRadius: 16)
+                .fill(Color.benchmarkCardBg)
+                .overlay(
+                    RoundedRectangle(cornerRadius: 16)
+                        .stroke(Color.benchmarkSuccess.opacity(0.3), lineWidth: 1)
+                )
+        )
+    }
+    
+    // MARK: - Private Views
+    
+    private var modelDropdownMenu: some View {
+        Menu {
+            if viewModel.availableModels.isEmpty {
+                Button("No models available") {
+                    // Placeholder - no action
+                }
+                .disabled(true)
+            } else {
+                ForEach(viewModel.availableModels, id: \.id) { model in
+                    Button(action: {
+                        viewModel.onModelSelected(model)
+                    }) {
+                        HStack {
+                            VStack(alignment: .leading, spacing: 2) {
+                                Text(model.modelName)
+                                    .font(.system(size: 14, weight: .medium))
+                                Text("Local")
+                                    .font(.caption)
+                                    .foregroundColor(.secondary)
+                            }
+                        }
+                    }
+                }
+            }
+        } label: {
+            HStack(spacing: 16) {
+                VStack(alignment: .leading, spacing: 6) {
+                    Text(viewModel.selectedModel?.modelName ?? String(localized: "Choose your AI model"))
+                        .font(.system(size: 16, weight: .medium))
+                        .foregroundColor(viewModel.isRunning ? .secondary : (viewModel.selectedModel != nil ? .primary : .benchmarkSecondary))
+                        .lineLimit(1)
+                    
+                    if let model = viewModel.selectedModel {
+                        HStack(spacing: 8) {
+                            HStack(spacing: 4) {
+                                Circle()
+                                    .fill(Color.benchmarkSuccess)
+                                    .frame(width: 6, height: 6)
+                                Text("Ready")
+                                    .font(.caption)
+                                    .foregroundColor(.benchmarkSuccess)
+                            }
+                            
+                            if let size = model.cachedSize {
+                                Text("• \(formatBytes(size))")
+                                    .font(.caption)
+                                    .foregroundColor(.benchmarkSecondary)
+                            }
+                        }
+                    } else {
+                        Text("Tap to select a model for testing")
+                            .font(.caption)
+                            .foregroundColor(.benchmarkSecondary)
+                    }
+                }
+                
+                Spacer()
+                
+                Image(systemName: "chevron.down")
+                    .font(.system(size: 14, weight: .medium))
+                    .foregroundColor(viewModel.isRunning ? .secondary : .benchmarkSecondary)
+                    .rotationEffect(.degrees(0))
+            }
+            .padding(20)
+            .background(
+                RoundedRectangle(cornerRadius: 16)
+                    .fill(Color.benchmarkCardBg)
+                    .overlay(
+                        RoundedRectangle(cornerRadius: 16)
+                            .stroke(
+                                viewModel.isRunning ? 
+                                Color.gray.opacity(0.1) :
+                                (viewModel.selectedModel != nil ? 
+                                Color.benchmarkAccent.opacity(0.3) : 
+                                Color.gray.opacity(0.2)),
+                                lineWidth: 1
+                            )
+                    ))
+        }
+        .disabled(viewModel.isRunning)
+    }
+    
+    private var startStopButton: some View {
+        Button(action: {
+            if viewModel.startButtonText.contains("Stop") {
+                showStopConfirmation = true
+            } else {
+                viewModel.onStartBenchmarkTapped()
+            }
+        }) {
+            HStack(spacing: 12) {
+                ZStack {
+                    Circle()
+                        .fill(Color.white.opacity(0.2))
+                        .frame(width: 32, height: 32)
+                    
+                    if viewModel.isRunning && viewModel.startButtonText.contains("Stop") {
+                        ProgressView()
+                            .progressViewStyle(CircularProgressViewStyle(tint: .white))
+                            .scaleEffect(0.7)
+                    } else {
+                        Image(systemName: viewModel.startButtonText.contains("Stop") ? "stop.fill" : "play.fill")
+                            .font(.system(size: 16, weight: .bold))
+                            .foregroundColor(.white)
+                    }
+                }
+                
+                Text(viewModel.startButtonText)
+                    .font(.system(size: 18, weight: .semibold))
+                    .foregroundColor(.white)
+                
+                Spacer()
+                
+                if !viewModel.startButtonText.contains("Stop") {
+                    Image(systemName: "arrow.right")
+                        .font(.system(size: 16, weight: .semibold))
+                        .foregroundColor(.white.opacity(0.8))
+                }
+            }
+            .frame(maxWidth: .infinity)
+            .padding(.horizontal, 24)
+            .padding(.vertical, 18)
+            .background(
+                RoundedRectangle(cornerRadius: 16)
+                    .fill(
+                        viewModel.isStartButtonEnabled ? 
+                        (viewModel.startButtonText.contains("Stop") ? 
+                         LinearGradient(
+                             colors: [Color.benchmarkError, Color.benchmarkError.opacity(0.8)],
+                             startPoint: .leading,
+                             endPoint: .trailing
+                         ) :
+                         LinearGradient(
+                             colors: [Color.benchmarkGradientStart, Color.benchmarkGradientEnd],
+                             startPoint: .leading,
+                             endPoint: .trailing
+                         )) :
+                        LinearGradient(
+                            colors: [Color.gray, Color.gray.opacity(0.8)],
+                            startPoint: .leading,
+                            endPoint: .trailing
+                        )
+                    )
+            )
+        }
+        .disabled(!viewModel.isStartButtonEnabled || viewModel.selectedModel == nil)
+        .animation(.easeInOut(duration: 0.2), value: viewModel.startButtonText)
+        .animation(.easeInOut(duration: 0.2), value: viewModel.isStartButtonEnabled)
+    }
+    
+    private var statusMessages: some View {
+        Group {
+            if viewModel.selectedModel == nil {
+                Text("Start benchmark after selecting your model")
+                    .font(.caption)
+                    .foregroundColor(.orange)
+                    .padding(.horizontal, 16)
+            } else if viewModel.availableModels.isEmpty {
+                Text("No local models found. Please download a model first.")
+                    .font(.caption)
+                    .foregroundColor(.orange)
+                    .padding(.horizontal, 16)
+            }
+        }
+    }
+    
+    // MARK: - Helper Functions
+    
+    private func formatBytes(_ bytes: Int64) -> String {
+        let formatter = ByteCountFormatter()
+        formatter.allowedUnits = [.useGB, .useMB]
+        formatter.countStyle = .file
+        return formatter.string(fromByteCount: bytes)
+    }
+}
+
+#Preview {
+    ModelSelectionCard(
+        viewModel: BenchmarkViewModel(),
+        showStopConfirmation: .constant(false)
+    )
+    .padding()
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/PerformanceMetricView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/PerformanceMetricView.swift
new file mode 100644
index 00000000..7b355c83
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/PerformanceMetricView.swift
@@ -0,0 +1,117 @@
+//
+//  EnhancedPerformanceMetricView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Enhanced performance metric display component.
+ * Shows detailed performance metrics with gradient backgrounds, icons, and custom colors.
+ */
+struct PerformanceMetricView: View {
+    let icon: String
+    let title: String
+    let value: String
+    let subtitle: String
+    let color: Color
+    
+    var body: some View {
+        VStack(alignment: .center, spacing: 12) {
+            ZStack {
+                Circle()
+                    .fill(
+                        LinearGradient(
+                            colors: [color.opacity(0.2), color.opacity(0.1)],
+                            startPoint: .topLeading,
+                            endPoint: .bottomTrailing
+                        )
+                    )
+                    .frame(width: 50, height: 50)
+                
+                Image(systemName: icon)
+                    .font(.system(size: 25, weight: .semibold))
+                    .foregroundColor(color)
+            }
+            
+            VStack(alignment: .center, spacing: 2) {
+                Text(title)
+                    .font(.subheadline)
+                    .fontWeight(.medium)
+                    .foregroundColor(.primary)
+                
+                Text(subtitle)
+                    .font(.caption)
+                    .foregroundColor(.benchmarkSecondary)
+            }
+            
+            Text(value)
+                .font(.title2)
+                .fontWeight(.bold)
+                .foregroundColor(color)
+                .multilineTextAlignment(.center)
+                .lineLimit(nil)
+                .fixedSize(horizontal: false, vertical: true)
+        }
+        .frame(maxWidth: .infinity, alignment: .center)
+        .padding(16)
+        .background(
+            RoundedRectangle(cornerRadius: 12)
+                .fill(
+                    LinearGradient(
+                        colors: [Color.benchmarkCardBg, color.opacity(0.02)],
+                        startPoint: .topLeading,
+                        endPoint: .bottomTrailing
+                    )
+                )
+                .overlay(
+                    RoundedRectangle(cornerRadius: 12)
+                        .stroke(color.opacity(0.2), lineWidth: 1)
+                )
+        )
+    }
+}
+
+
+#Preview {
+    VStack(spacing: 16) {
+        HStack(spacing: 12) {
+            PerformanceMetricView(
+                icon: "speedometer",
+                title: "Prefill Speed",
+                value: "1024.5 t/s",
+                subtitle: "Tokens per second",
+                color: .benchmarkGradientStart
+            )
+            
+            PerformanceMetricView(
+                icon: "gauge",
+                title: "Decode Speed",
+                value: "109.8 t/s",
+                subtitle: "Generation rate",
+                color: .benchmarkGradientEnd
+            )
+        }
+        
+        HStack(spacing: 12) {
+            PerformanceMetricView(
+                icon: "memorychip",
+                title: "Memory Usage",
+                value: "1.2 GB",
+                subtitle: "Peak memory",
+                color: .benchmarkWarning
+            )
+            
+            PerformanceMetricView(
+                icon: "clock",
+                title: "Total Time",
+                value: "2.456s",
+                subtitle: "Complete duration",
+                color: .benchmarkSuccess
+            )
+        }
+    }
+    .padding()
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ProgressCard.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ProgressCard.swift
new file mode 100644
index 00000000..6f8cc1d6
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ProgressCard.swift
@@ -0,0 +1,187 @@
+//
+//  ProgressCard.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Reusable progress tracking card component for benchmark interface.
+ * Displays test progress with detailed metrics and visual indicators.
+ */
+struct ProgressCard: View {
+    let progress: BenchmarkProgress?
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 20) {
+            if let progress = progress {
+                VStack(alignment: .leading, spacing: 16) {
+                    progressHeader(progress)
+                    progressBar(progress)
+                    
+                    if progress.progressType == .runningTest && progress.totalIterations > 0 {
+                        testDetails(progress)
+                    }
+                }
+            } else {
+                fallbackProgress
+            }
+        }
+        .padding(20)
+        .background(
+            RoundedRectangle(cornerRadius: 16)
+                .fill(Color.benchmarkCardBg)
+                .overlay(
+                    RoundedRectangle(cornerRadius: 16)
+                        .stroke(Color.benchmarkSuccess.opacity(0.3), lineWidth: 1)
+                )
+        )
+    }
+    
+    // MARK: - Private Views
+    
+    private func progressHeader(_ progress: BenchmarkProgress) -> some View {
+        HStack {
+            HStack(spacing: 12) {
+                ZStack {
+                    Circle()
+                        .fill(
+                            LinearGradient(
+                                colors: [Color.benchmarkAccent.opacity(0.2), Color.benchmarkGradientEnd.opacity(0.1)],
+                                startPoint: .topLeading,
+                                endPoint: .bottomTrailing
+                            )
+                        )
+                        .frame(width: 40, height: 40)
+                    
+                    Image(systemName: "chart.line.uptrend.xyaxis")
+                        .font(.system(size: 18, weight: .semibold))
+                        .foregroundColor(.benchmarkAccent)
+                }
+                
+                VStack(alignment: .leading, spacing: 2) {
+                    Text("Test Progress")
+                        .font(.title3)
+                        .fontWeight(.semibold)
+                        .foregroundColor(.primary)
+                    
+                    Text("Running performance tests")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                }
+            }
+            
+            Spacer()
+            
+            VStack(alignment: .trailing, spacing: 2) {
+                Text("\(progress.progress)%")
+                    .font(.title2)
+                    .fontWeight(.bold)
+                    .foregroundColor(.benchmarkAccent)
+                
+                Text("Complete")
+                    .font(.caption)
+                    .foregroundColor(.benchmarkSecondary)
+            }
+        }
+    }
+    
+    private func progressBar(_ progress: BenchmarkProgress) -> some View {
+        VStack(spacing: 8) {
+            ZStack(alignment: .leading) {
+                RoundedRectangle(cornerRadius: 8)
+                    .fill(Color.gray.opacity(0.2))
+                    .frame(height: 8)
+                
+                RoundedRectangle(cornerRadius: 8)
+                    .fill(
+                        LinearGradient(
+                            colors: [Color.benchmarkGradientStart, Color.benchmarkGradientEnd],
+                            startPoint: .leading,
+                            endPoint: .trailing
+                        )
+                    )
+                    .frame(width: CGFloat(progress.progress) / 100 * UIScreen.main.bounds.width * 0.8, height: 8)
+                    .animation(.easeInOut(duration: 0.3), value: progress.progress)
+            }
+        }
+    }
+    
+    private func testDetails(_ progress: BenchmarkProgress) -> some View {
+        VStack(alignment: .leading, spacing: 12) {
+            // Test iteration info
+            HStack {
+                Image(systemName: "repeat")
+                    .font(.caption)
+                    .foregroundColor(.benchmarkAccent)
+                
+                Text("Test \(progress.currentIteration) of \(progress.totalIterations)")
+                    .font(.subheadline)
+                    .fontWeight(.medium)
+                    .foregroundColor(.primary)
+                
+                Spacer()
+                
+                Text("PP: \(progress.nPrompt) • TG: \(progress.nGenerate)")
+                    .font(.caption)
+                    .foregroundColor(.benchmarkSecondary)
+                    .padding(.horizontal, 8)
+                    .padding(.vertical, 4)
+                    .background(
+                        RoundedRectangle(cornerRadius: 6)
+                            .fill(Color.benchmarkAccent.opacity(0.1))
+                    )
+            }
+            
+            // Real-time performance metrics
+            if progress.runTimeSeconds > 0 {
+                VStack(spacing: 12) {
+                    // Timing metrics
+                    HStack(spacing: 12) {
+                        MetricCard(title: "Runtime", value: String(format: "%.3fs", progress.runTimeSeconds), icon: "clock")
+                        MetricCard(title: "Prefill", value: String(format: "%.3fs", progress.prefillTimeSeconds), icon: "arrow.up.circle")
+                        MetricCard(title: "Decode", value: String(format: "%.3fs", progress.decodeTimeSeconds), icon: "arrow.down.circle")
+                    }
+                    
+                    // Speed metrics
+                    HStack(spacing: 12) {
+                        MetricCard(title: "Prefill Speed", value: String(format: "%.2f t/s", progress.prefillSpeed), icon: "speedometer")
+                        MetricCard(title: "Decode Speed", value: String(format: "%.2f t/s", progress.decodeSpeed), icon: "gauge")
+                        Spacer()
+                    }
+                }
+            }
+        }
+    }
+    
+    private var fallbackProgress: some View {
+        VStack(alignment: .leading, spacing: 8) {
+            Text("Progress")
+                .font(.headline)
+            ProgressView()
+                .progressViewStyle(LinearProgressViewStyle())
+        }
+    }
+}
+
+#Preview {
+    ProgressCard(
+        progress: BenchmarkProgress(
+            progress: 65,
+            statusMessage: "Running benchmark...",
+            progressType: .runningTest,
+            currentIteration: 3,
+            totalIterations: 5,
+            nPrompt: 128,
+            nGenerate: 256,
+            runTimeSeconds: 2.456,
+            prefillTimeSeconds: 0.123,
+            decodeTimeSeconds: 2.333,
+            prefillSpeed: 1024.5,
+            decodeSpeed: 109.8
+        )
+    )
+    .padding()
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ResultsCard.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ResultsCard.swift
new file mode 100644
index 00000000..37f01980
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/ResultsCard.swift
@@ -0,0 +1,311 @@
+//
+//  ResultsCard.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Reusable results display card component for benchmark interface.
+ * Shows comprehensive benchmark results with performance metrics and statistics.
+ */
+struct ResultsCard: View {
+    let results: BenchmarkResults
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 20) {
+            resultsHeader
+            infoHeader
+            performanceMetrics
+            detailedStats
+        }
+        .padding(20)
+        .background(
+            RoundedRectangle(cornerRadius: 16)
+                .fill(Color.benchmarkCardBg)
+                .overlay(
+                    RoundedRectangle(cornerRadius: 16)
+                        .stroke(Color.benchmarkSuccess.opacity(0.3), lineWidth: 1)
+                )
+        )
+    }
+    
+    // MARK: - Private Views
+    
+    private var infoHeader: some View {
+        
+        let statistics = BenchmarkResultsHelper.shared.processTestResults(results.testResults)
+        
+        return VStack(alignment: .leading, spacing: 8) {
+            Text(results.modelDisplayName)
+                .font(.headline)
+            Text(BenchmarkResultsHelper.shared.getDeviceInfo())
+                .font(.subheadline)
+                .foregroundColor(.secondary)
+        
+            Text("Benchmark Config")
+                .font(.headline)
+            Text(statistics.configText)
+                .font(.subheadline)
+                .lineLimit(nil)
+                .fixedSize(horizontal: false, vertical: true)
+                .foregroundColor(.secondary)
+        }
+    }
+    
+    private var resultsHeader: some View {
+        HStack {
+            HStack(spacing: 12) {
+                ZStack {
+                    Circle()
+                        .fill(
+                            LinearGradient(
+                                colors: [Color.benchmarkSuccess.opacity(0.2), Color.benchmarkSuccess.opacity(0.1)],
+                                startPoint: .topLeading,
+                                endPoint: .bottomTrailing
+                            )
+                        )
+                        .frame(width: 40, height: 40)
+                    
+                    Image(systemName: "chart.bar.fill")
+                        .font(.system(size: 18, weight: .semibold))
+                        .foregroundColor(.benchmarkSuccess)
+                }
+                
+                VStack(alignment: .leading, spacing: 2) {
+                    Text("Benchmark Results")
+                        .font(.title3)
+                        .fontWeight(.semibold)
+                        .foregroundColor(.primary)
+                    
+                    Text("Performance analysis complete")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                }
+            }
+            
+            Spacer()
+            
+            Button(action: {
+                shareResults()
+            }) {
+                VStack(alignment: .center, spacing: 2) {
+                    Image(systemName: "square.and.arrow.up")
+                        .font(.title2)
+                        .foregroundColor(.benchmarkSuccess)
+                    
+                    Text("Share")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                }
+            }
+            .buttonStyle(PlainButtonStyle())
+        }
+    }
+
+    
+    private var performanceMetrics: some View {
+        let statistics = BenchmarkResultsHelper.shared.processTestResults(results.testResults)
+        
+        return VStack(spacing: 16) {
+            HStack(spacing: 12) {
+                if let prefillStats = statistics.prefillStats {
+                    PerformanceMetricView(
+                        icon: "speedometer",
+                        title: "Prefill Speed",
+                        value: BenchmarkResultsHelper.shared.formatSpeedStatisticsLine(prefillStats),
+                        subtitle: "Tokens per second",
+                        color: .benchmarkGradientStart
+                    )
+                } else {
+                    PerformanceMetricView(
+                        icon: "speedometer",
+                        title: "Prefill Speed",
+                        value: "N/A",
+                        subtitle: "Tokens per second",
+                        color: .benchmarkGradientStart
+                    )
+                }
+                
+                if let decodeStats = statistics.decodeStats {
+                    PerformanceMetricView(
+                        icon: "gauge",
+                        title: "Decode Speed",
+                        value: BenchmarkResultsHelper.shared.formatSpeedStatisticsLine(decodeStats),
+                        subtitle: "Generation rate",
+                        color: .benchmarkGradientEnd
+                    )
+                } else {
+                    PerformanceMetricView(
+                        icon: "gauge",
+                        title: "Decode Speed",
+                        value: "N/A",
+                        subtitle: "Generation rate",
+                        color: .benchmarkGradientEnd
+                    )
+                }
+            }
+            
+            HStack(spacing: 12) {
+                let totalMemoryKb = BenchmarkResultsHelper.shared.getTotalSystemMemoryKb()
+                let memoryInfo = BenchmarkResultsHelper.shared.formatMemoryUsage(
+                    maxMemoryKb: results.maxMemoryKb,
+                    totalKb: totalMemoryKb
+                )
+                
+                PerformanceMetricView(
+                    icon: "memorychip",
+                    title: "Memory Usage",
+                    value: memoryInfo.valueText,
+                    subtitle: "Peak memory",
+                    color: .benchmarkWarning
+                )
+                
+                PerformanceMetricView(
+                    icon: "clock",
+                    title: "Total Tokens",
+                    value: "\(statistics.totalTokensProcessed)",
+                    subtitle: "Complete duration",
+                    color: .benchmarkSuccess
+                )
+            }
+        }
+    }
+    
+    private var detailedStats: some View {
+        return VStack(alignment: .leading, spacing: 12) {
+            VStack(spacing: 8) {
+                HStack {
+                    Text("Completed")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                    Spacer()
+                    Text(results.timestamp)
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                }
+                
+                HStack {
+                    Text("Powered By MNN")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                    Spacer()
+                    Text(verbatim: "https://github.com/alibaba/MNN")
+                        .font(.caption)
+                        .foregroundColor(.benchmarkSecondary)
+                }
+            }
+            .padding(.vertical, 8)
+        }
+    }
+    
+    // MARK: - Helper Functions
+    
+    /// Formats byte count into human-readable string
+    private func formatBytes(_ bytes: Int64) -> String {
+        let formatter = ByteCountFormatter()
+        formatter.allowedUnits = [.useKB, .useMB, .useGB]
+        formatter.countStyle = .file
+        return formatter.string(fromByteCount: bytes)
+    }
+    
+    /// Initiates sharing of benchmark results through system share sheet
+    private func shareResults() {
+        let viewToRender = self.body.frame(width: 390) // Adjust width as needed
+        if let image = viewToRender.snapshot() {
+            presentShareSheet(activityItems: [image, formatResultsForSharing()])
+        } else {
+            presentShareSheet(activityItems: [formatResultsForSharing()])
+        }
+    }
+
+    private func presentShareSheet(activityItems: [Any]) {
+        let activityViewController = UIActivityViewController(activityItems: activityItems, applicationActivities: nil)
+
+        if let windowScene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
+           let window = windowScene.windows.first,
+           let rootViewController = window.rootViewController {
+
+            if let popover = activityViewController.popoverPresentationController {
+                popover.sourceView = window
+                popover.sourceRect = CGRect(x: window.bounds.midX, y: window.bounds.midY, width: 0, height: 0)
+                popover.permittedArrowDirections = []
+            }
+
+            rootViewController.present(activityViewController, animated: true)
+        }
+    }
+    
+    /// Formats benchmark results into shareable text format with performance metrics and hashtags
+    private func formatResultsForSharing() -> String {
+        let statistics = BenchmarkResultsHelper.shared.processTestResults(results.testResults)
+        let deviceInfo = BenchmarkResultsHelper.shared.getDeviceInfo()
+        
+        var shareText = """
+        📱 MNN LLM Benchmark Results
+        
+        🤖 Model: \(results.modelDisplayName)
+        📱 \(deviceInfo)
+        📅 Completed: \(results.timestamp)
+        
+        📊 Configuration:
+        \(statistics.configText)
+        
+        ⚡️ Performance Results:
+        """
+        
+        if let prefillStats = statistics.prefillStats {
+            shareText += "\n🔄 Prompt Processing: \(BenchmarkResultsHelper.shared.formatSpeedStatisticsLine(prefillStats))"
+        }
+        
+        if let decodeStats = statistics.decodeStats {
+            shareText += "\n⚡️ Token Generation: \(BenchmarkResultsHelper.shared.formatSpeedStatisticsLine(decodeStats))"
+        }
+        
+        let totalMemoryKb = BenchmarkResultsHelper.shared.getTotalSystemMemoryKb()
+        let memoryInfo = BenchmarkResultsHelper.shared.formatMemoryUsage(
+            maxMemoryKb: results.maxMemoryKb,
+            totalKb: totalMemoryKb
+        )
+        shareText += "\n💾 Peak Memory: \(memoryInfo.valueText) (\(memoryInfo.labelText))"
+        
+        shareText += "\n\n📈 Summary:"
+        shareText += "\n• Total Tokens Processed: \(statistics.totalTokensProcessed)"
+        shareText += "\n• Number of Tests: \(statistics.totalTests)"
+        
+        shareText += "\n\n#MNNLLMBenchmark #AIPerformance #MobileAI"
+        
+        return shareText
+    }
+}
+
+extension View {
+    func snapshot() -> UIImage? {
+        let controller = UIHostingController(rootView: self)
+        let view = controller.view
+
+        let targetSize = controller.view.intrinsicContentSize
+        view?.bounds = CGRect(origin: .zero, size: targetSize)
+        view?.backgroundColor = .clear
+
+        let renderer = UIGraphicsImageRenderer(size: targetSize)
+
+        return renderer.image { _ in
+            view?.drawHierarchy(in: controller.view.bounds, afterScreenUpdates: true)
+        }
+    }
+}
+
+#Preview {
+    ResultsCard(
+        results: BenchmarkResults(
+            modelDisplayName: "Qwen2.5-1.5B-Instruct",
+            maxMemoryKb: 1200000, // 1.2 GB in KB
+            testResults: [],
+            timestamp: "2025-01-21 14:30:25"
+        )
+    )
+    .padding()
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/StatusCard.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/StatusCard.swift
new file mode 100644
index 00000000..2ded0a00
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkSubViews/StatusCard.swift
@@ -0,0 +1,64 @@
+//
+//  StatusCard.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Reusable status display card component for benchmark interface.
+ * Shows status messages and updates to provide user feedback.
+ */
+struct StatusCard: View {
+    let statusMessage: String
+    
+    var body: some View {
+        HStack(spacing: 16) {
+            ZStack {
+                Circle()
+                    .fill(
+                        LinearGradient(
+                            colors: [Color.benchmarkWarning.opacity(0.2), Color.benchmarkWarning.opacity(0.1)],
+                            startPoint: .topLeading,
+                            endPoint: .bottomTrailing
+                        )
+                    )
+                    .frame(width: 40, height: 40)
+                
+                Image(systemName: "info.circle")
+                    .font(.system(size: 18, weight: .semibold))
+                    .foregroundColor(.benchmarkWarning)
+            }
+            
+            VStack(alignment: .leading, spacing: 4) {
+                Text("Status Update")
+                    .font(.subheadline)
+                    .fontWeight(.semibold)
+                    .foregroundColor(.primary)
+                
+                Text(statusMessage)
+                    .font(.subheadline)
+                    .foregroundColor(.benchmarkSecondary)
+                    .fixedSize(horizontal: false, vertical: true)
+            }
+            
+            Spacer()
+        }
+        .padding(20)
+        .background(
+            RoundedRectangle(cornerRadius: 16)
+                .fill(Color.benchmarkCardBg)
+                .overlay(
+                    RoundedRectangle(cornerRadius: 16)
+                        .stroke(Color.benchmarkWarning.opacity(0.3), lineWidth: 1)
+                )
+        )
+    }
+}
+
+#Preview {
+    StatusCard(statusMessage: "Initializing benchmark test environment...")
+        .padding()
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkView.swift
new file mode 100644
index 00000000..37c91c8b
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Benchmark/Views/BenchmarkView.swift
@@ -0,0 +1,78 @@
+//
+//  BenchmarkView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/21.
+//
+
+import SwiftUI
+
+/**
+ * Main benchmark view that provides interface for running performance tests on ML models.
+ * Features include model selection, progress tracking, and results visualization.
+ */
+struct BenchmarkView: View {
+    @StateObject private var viewModel = BenchmarkViewModel()
+    @State private var showStopConfirmation = false
+    
+    var body: some View {
+        ZStack {
+            ScrollView {
+                VStack(spacing: 24) {
+                    // Model Selection Section
+                    ModelSelectionCard(
+                        viewModel: viewModel,
+                        showStopConfirmation: $showStopConfirmation
+                    )
+                    
+                    // Progress Section
+                    if viewModel.showProgressBar {
+                        ProgressCard(progress: viewModel.currentProgress)
+                            .transition(.asymmetric(
+                                insertion: .scale.combined(with: .opacity),
+                                removal: .opacity
+                            ))
+                    }
+                    
+                    // Status Section
+                    if !viewModel.statusMessage.isEmpty {
+                        StatusCard(statusMessage: viewModel.statusMessage)
+                            .transition(.slide)
+                    }
+                    
+                    // Results Section
+                    if viewModel.showResults, let results = viewModel.benchmarkResults {
+                        ResultsCard(results: results)
+                            .transition(.asymmetric(
+                                insertion: .move(edge: .bottom).combined(with: .opacity),
+                                removal: .opacity
+                            ))
+                    }
+                    
+                    Spacer(minLength: 20)
+                }
+                .padding(.horizontal, 20)
+                .padding(.vertical, 16)
+            }
+        }
+        .alert("Stop Benchmark", isPresented: $showStopConfirmation) {
+            Button("Yes", role: .destructive) {
+                viewModel.onStopBenchmarkTapped()
+            }
+            Button("No", role: .cancel) { }
+        } message: {
+            Text("Are you sure you want to stop the benchmark test?")
+        }
+        .alert("Error", isPresented: $viewModel.showError) {
+            Button("OK") { }
+        } message: {
+            Text(viewModel.errorMessage)
+        }
+        .onReceive(viewModel.$isRunning) { isRunning in
+            if isRunning && viewModel.startButtonText.contains("Stop") {
+                showStopConfirmation = false
+            }
+        }
+    }
+}
+
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/CommonToolbarView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/CommonToolbarView.swift
new file mode 100644
index 00000000..b3e3b5fc
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/CommonToolbarView.swift
@@ -0,0 +1,44 @@
+//
+//  CommonToolbarView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/07/18.
+//
+
+import SwiftUI
+
+struct CommonToolbarView: ToolbarContent {
+    @Binding var showHistory: Bool
+    @Binding var showHistoryButton: Bool
+    
+    var body: some ToolbarContent {
+        ToolbarItem(placement: .navigationBarLeading) {
+            if showHistoryButton {
+                Button(action: {
+                    showHistory = true
+                    showHistoryButton = false
+                }) {
+                    Image(systemName: "sidebar.left")
+                        .resizable()
+                        .aspectRatio(contentMode: .fit)
+                        .frame(width: 20, height: 20)
+                        .foregroundColor(.black)
+                }
+            }
+        }
+        
+        ToolbarItem(placement: .navigationBarTrailing) {
+            Button(action: {
+                if let url = URL(string: "https://github.com/alibaba/MNN") {
+                    UIApplication.shared.open(url)
+                }
+            }) {
+                Image(systemName: "star")
+                    .resizable()
+                    .aspectRatio(contentMode: .fit)
+                    .frame(width: 20, height: 20)
+                    .foregroundColor(.black)
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelListView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelListView.swift
new file mode 100644
index 00000000..f3eddefb
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelListView.swift
@@ -0,0 +1,37 @@
+//
+//  MNNLLMiOSApp.swift
+//  LocalModelListView
+//
+//  Created by 游薪渝(揽清) on 2025/06/20.
+//
+
+import SwiftUI
+
+struct LocalModelListView: View {
+    @ObservedObject var viewModel: ModelListViewModel
+    
+    var body: some View {
+        List {
+            ForEach(viewModel.filteredModels.filter { $0.isDownloaded }, id: \.id) { model in
+                Button(action: {
+                    viewModel.selectModel(model)
+                }) {
+                    LocalModelRowView(model: model)
+                }
+                .listRowBackground(viewModel.pinnedModelIds.contains(model.id) ? Color.black.opacity(0.05) : Color.clear)
+                .swipeActions(edge: .trailing, allowsFullSwipe: false) {
+                    SwipeActionsView(model: model, viewModel: viewModel)
+                }
+            }
+        }
+        .listStyle(.plain)
+        .refreshable {
+            await viewModel.fetchModels()
+        }
+        .alert("Error", isPresented: $viewModel.showError) {
+            Button("OK", role: .cancel) {}
+        } message: {
+            Text(viewModel.errorMessage)
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelRowView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelRowView.swift
new file mode 100644
index 00000000..7625799b
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/LocalModelList/Views/LocalModelRowView.swift
@@ -0,0 +1,64 @@
+//
+//  LocalModelRowView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/6/26.
+//
+
+import SwiftUI
+
+struct LocalModelRowView: View {
+    
+    let model: ModelInfo
+
+    private var localizedTags: [String] {
+        model.localizedTags
+    }
+    
+    private var formattedSize: String {
+        model.formattedSize
+    }
+    
+    var body: some View {
+        HStack(alignment: .center) {
+            
+            ModelIconView(modelId: model.id)
+                .frame(width: 40, height: 40)
+            
+            VStack(alignment: .leading, spacing: 8) {
+                Text(model.modelName)
+                    .font(.headline)
+                    .fontWeight(.semibold)
+                    .lineLimit(1)
+                
+                if !localizedTags.isEmpty {
+                    TagsView(tags: localizedTags)
+                }
+                
+                HStack {
+                    HStack(alignment: .center, spacing: 2) {
+                        Image(systemName: "folder")
+                            .font(.caption)
+                            .fontWeight(.medium)
+                            .foregroundColor(.gray)
+                            .frame(width: 20, height: 20)
+                        
+                        Text(formattedSize)
+                            .font(.caption)
+                            .fontWeight(.medium)
+                            .foregroundColor(.gray)
+                    }
+
+                    Spacer()
+                    
+                    if let lastUsedAt = model.lastUsedAt {
+                    Text("\(lastUsedAt.formatAgo())")
+                        .font(.caption)
+                        .fontWeight(.medium)
+                        .foregroundColor(.gray)
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/MainTabView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/MainTabView.swift
new file mode 100644
index 00000000..f3ed7d36
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/MainTabView.swift
@@ -0,0 +1,230 @@
+//
+//  MNNLLMiOSApp.swift
+//  MainTabView
+//
+//  Created by 游薪渝(揽清) on 2025/06/20.
+//
+
+import SwiftUI
+
+// MainTabView is the primary view of the app, containing the tab bar and navigation for main sections.
+struct MainTabView: View {
+    // MARK: - State Properties
+    
+    @State private var showHistory = false
+    @State private var selectedHistory: ChatHistory? = nil
+    @State private var histories: [ChatHistory] = ChatHistoryManager.shared.getAllHistory()
+    @State private var showHistoryButton = true
+    @State private var showSettings = false
+    @State private var showWebView = false
+    @State private var webViewURL: URL?
+    @State private var navigateToSettings = false
+    @StateObject private var modelListViewModel = ModelListViewModel()
+    @State private var selectedTab: Int = 0
+    
+    private var titles: [String] {
+        [
+            NSLocalizedString("Local Model", comment: "本地模型标签"),
+            NSLocalizedString("Model Market", comment: "模型市场标签"),
+            NSLocalizedString("Benchmark", comment: "基准测试标签")
+        ]
+    }
+    
+    // MARK: - Body
+    
+    var body: some View {
+        ZStack {
+            // Main TabView for navigation between Local Model, Model Market, and Benchmark
+            TabView(selection: $selectedTab) {
+                NavigationView {
+                    LocalModelListView(viewModel: modelListViewModel)
+                        .navigationTitle(titles[0])
+                        .navigationBarTitleDisplayMode(.inline)
+                        .navigationBarHidden(false)
+                        .onAppear {
+                            setupNavigationBarAppearance()
+                        }
+                        .toolbar {
+                            CommonToolbarView(
+                                showHistory: $showHistory,
+                                showHistoryButton: $showHistoryButton,
+                            )
+                        }
+                        .background(
+                            ZStack {
+                                NavigationLink(destination: chatDestination, isActive: chatIsActiveBinding) { EmptyView() }
+                                NavigationLink(destination: SettingsView(), isActive: $navigateToSettings) { EmptyView() }
+                            }
+                        )
+                        // Hide TabBar when entering chat or settings view
+                        .toolbar((chatIsActiveBinding.wrappedValue || navigateToSettings) ? .hidden : .visible, for: .tabBar)
+                }
+                .tabItem {
+                    Image(systemName: "house.fill")
+                    Text(titles[0])
+                }
+                .tag(0)
+                
+                NavigationView {
+                    ModelListView(viewModel: modelListViewModel)
+                        .navigationTitle(titles[1])
+                        .navigationBarTitleDisplayMode(.inline)
+                        .navigationBarHidden(false)
+                        .onAppear {
+                            setupNavigationBarAppearance()
+                        }
+                        .toolbar {
+                            CommonToolbarView(
+                                showHistory: $showHistory,
+                                showHistoryButton: $showHistoryButton,
+                            )
+                        }
+                        .background(
+                            ZStack {
+                                NavigationLink(destination: chatDestination, isActive: chatIsActiveBinding) { EmptyView() }
+                                NavigationLink(destination: SettingsView(), isActive: $navigateToSettings) { EmptyView() }
+                            }
+                        )
+                }
+                .tabItem {
+                    Image(systemName: "doc.text.fill")
+                    Text(titles[1])
+                }
+                .tag(1)
+                
+                NavigationView {
+                    BenchmarkView()
+                        .navigationTitle(titles[2])
+                        .navigationBarTitleDisplayMode(.inline)
+                        .navigationBarHidden(false)
+                        .onAppear {
+                            setupNavigationBarAppearance()
+                        }
+                        .toolbar {
+                            CommonToolbarView(
+                                showHistory: $showHistory,
+                                showHistoryButton: $showHistoryButton,
+                            )
+                        }
+                        .background(
+                            ZStack {
+                                NavigationLink(destination: chatDestination, isActive: chatIsActiveBinding) { EmptyView() }
+                                NavigationLink(destination: SettingsView(), isActive: $navigateToSettings) { EmptyView() }
+                            }
+                        )
+                }
+                .tabItem {
+                    Image(systemName: "clock.fill")
+                    Text(titles[2])
+                }
+                .tag(2)
+            }
+            .onAppear {
+                setupTabBarAppearance()
+            }
+            .tint(.black)
+            
+            // Overlay for dimming the background when history is shown
+            if showHistory {
+                Color.black.opacity(0.5)
+                .edgesIgnoringSafeArea(.all)
+                .onTapGesture {
+                    withAnimation(.easeInOut(duration: 0.2)) {
+                        showHistory = false
+                    }
+                }
+            }
+            
+            // Side menu for displaying chat history
+            SideMenuView(isOpen: $showHistory, 
+                        selectedHistory: $selectedHistory, 
+                        histories: $histories,
+                        navigateToMainSettings: $navigateToSettings)
+                        .edgesIgnoringSafeArea(.all)
+        }
+        .onChange(of: showHistory) { oldValue, newValue in
+            if !newValue {
+                DispatchQueue.main.asyncAfter(deadline: .now() + 0.3) {
+                    withAnimation {
+                        showHistoryButton = true
+                    }
+                }
+            }
+        }
+        .sheet(isPresented: $showWebView) {
+            if let url = webViewURL {
+                WebView(url: url)
+            }
+        }
+    }
+
+    // MARK: - View Builders
+    
+    /// Destination view for chat, either from a new model or a history item.
+    @ViewBuilder
+    private var chatDestination: some View {
+        if let model = modelListViewModel.selectedModel {
+            LLMChatView(modelInfo: model)
+                .navigationBarHidden(false)
+                .navigationBarTitleDisplayMode(.inline)
+        } else if let history = selectedHistory {
+            let modelInfo = ModelInfo(modelId: history.modelId, isDownloaded: true)
+            LLMChatView(modelInfo: modelInfo, history: history)
+                .navigationBarHidden(false)
+                .navigationBarTitleDisplayMode(.inline)
+        } else {
+            EmptyView()
+        }
+    }
+    
+    // MARK: - Bindings
+    
+    /// Binding to control the activation of the chat view.
+    private var chatIsActiveBinding: Binding<Bool> {
+        Binding<Bool>(
+            get: { 
+                return modelListViewModel.selectedModel != nil || selectedHistory != nil
+            },
+            set: { isActive in
+                if !isActive {
+                    // Record usage when returning from chat
+                    if let model = modelListViewModel.selectedModel {
+                        modelListViewModel.recordModelUsage(modelName: model.modelName)
+                    }
+                    
+                    // Clear selections
+                    modelListViewModel.selectedModel = nil
+                    selectedHistory = nil
+                }
+            }
+        )
+    }
+    
+    // MARK: - Private Methods
+    
+    /// Configures the appearance of the navigation bar.
+    private func setupNavigationBarAppearance() {
+        let appearance = UINavigationBarAppearance()
+        appearance.configureWithOpaqueBackground()
+        appearance.backgroundColor = .white
+        appearance.shadowColor = .clear
+        
+        UINavigationBar.appearance().standardAppearance = appearance
+        UINavigationBar.appearance().compactAppearance = appearance
+        UINavigationBar.appearance().scrollEdgeAppearance = appearance
+    }
+    
+    /// Configures the appearance of the tab bar.
+    private func setupTabBarAppearance() {
+        let appearance = UITabBarAppearance()
+        appearance.configureWithOpaqueBackground()
+        
+        let selectedColor = UIColor(Color.primaryPurple)
+        
+        appearance.stackedLayoutAppearance.selected.iconColor = selectedColor
+        appearance.stackedLayoutAppearance.selected.titleTextAttributes = [.foregroundColor: selectedColor]
+
+        UITabBar.appearance().standardAppearance = appearance
+        UITabBar.appearance().scrollEdgeAppearance = appearance
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/ModelIcon/ModelIcon.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/ModelIcon/ModelIcon.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/ModelIcon/ModelIcon.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/ModelIcon/ModelIcon.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/ModelIcon/ModelIconManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/ModelIcon/ModelIconManager.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/ModelIcon/ModelIconManager.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/ModelIcon/ModelIconManager.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelInfo.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelInfo.swift
new file mode 100644
index 00000000..75e59c80
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelInfo.swift
@@ -0,0 +1,203 @@
+//
+//  TBModelInfo.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import Hub
+import Foundation
+
+struct ModelInfo: Codable {
+    // MARK: - Properties
+    let modelName: String
+    let tags: [String]
+    let categories: [String]?
+    let size_gb: Double?
+    let vendor: String?
+    let sources: [String: String]?
+    let tagTranslations: [String: [String]]?
+    
+    // Runtime properties
+    var isDownloaded: Bool = false
+    var lastUsedAt: Date?
+    var cachedSize: Int64? = nil
+    
+    // MARK: - Initialization
+    
+    init(modelName: String = "",
+         tags: [String] = [],
+         categories: [String]? = nil,
+         size_gb: Double? = nil,
+         vendor: String? = nil,
+         sources: [String: String]? = nil,
+         tagTranslations: [String: [String]]? = nil,
+         isDownloaded: Bool = false,
+         lastUsedAt: Date? = nil,
+         cachedSize: Int64? = nil) {
+        
+        self.modelName = modelName
+        self.tags = tags
+        self.categories = categories
+        self.size_gb = size_gb
+        self.vendor = vendor
+        self.sources = sources
+        self.tagTranslations = tagTranslations
+        self.isDownloaded = isDownloaded
+        self.lastUsedAt = lastUsedAt
+        self.cachedSize = cachedSize
+    }
+    
+    init(modelId: String, isDownloaded: Bool = true) {
+        let modelName = modelId.components(separatedBy: "/").last ?? modelId
+        
+        self.init(
+            modelName: modelName,
+            tags: [],
+            sources: ["huggingface": modelId],
+            isDownloaded: isDownloaded
+        )
+    }
+    
+    // MARK: - Model Identity & Localization
+    
+    var id: String {
+        guard let sources = sources else {
+            return "taobao-mnn/\(modelName)"
+        }
+        
+        let sourceKey = ModelSourceManager.shared.selectedSource.rawValue
+        return sources[sourceKey] ?? "taobao-mnn/\(modelName)"
+    }
+    
+    var localizedTags: [String] {
+        let currentLanguage = LanguageManager.shared.currentLanguage
+        let isChineseLanguage = currentLanguage == "简体中文"
+        
+        if isChineseLanguage, let translations = tagTranslations {
+            let languageCode = "zh-Hans"
+            return translations[languageCode] ?? tags
+        } else {
+            return tags
+        }
+    }
+    
+    // MARK: - File System & Path Management
+    
+    var localPath: String {
+        let modelScopeId = "taobao-mnn/\(modelName)"
+        return HubApi.shared.localRepoLocation(HubApi.Repo.init(id: modelScopeId)).path
+    }
+    
+    // MARK: - Size Calculation & Formatting
+    
+    var formattedSize: String {
+        if let cached = cachedSize {
+            return FileOperationManager.shared.formatBytes(cached)
+        } else if isDownloaded {
+            return FileOperationManager.shared.formatLocalDirectorySize(at: localPath)
+        } else if let sizeGb = size_gb {
+            return String(format: "%.1f GB", sizeGb)
+        } else {
+            return "None"
+        }
+    }
+    
+    /// Calculates and caches the local directory size
+    /// - Returns: The formatted size string and updates cachedSize property
+    mutating func calculateAndCacheSize() -> String {
+        if let cached = cachedSize {
+            return FileOperationManager.shared.formatBytes(cached)
+        }
+        
+        if isDownloaded {
+            do {
+                let sizeInBytes = try FileOperationManager.shared.calculateDirectorySize(at: localPath)
+                self.cachedSize = sizeInBytes
+                return FileOperationManager.shared.formatBytes(sizeInBytes)
+            } catch {
+                print("Error calculating directory size: \(error)")
+                return "Unknown"
+            }
+        } else if let sizeGb = size_gb {
+            return String(format: "%.1f GB", sizeGb)
+        } else {
+            return "None"
+        }
+    }
+    
+    // MARK: - Remote Size Calculation
+    
+    func fetchRemoteSize() async -> Int64? {
+        let modelScopeId = "taobao-mnn/\(modelName)"
+
+        do {
+            let files = try await fetchFileList(repoPath: modelScopeId, root: "", revision: "")
+            let totalSize = try await calculateTotalSize(files: files, repoPath: modelScopeId)
+            return totalSize
+        } catch {
+            print("Error fetching remote size for \(id): \(error)")
+            return nil
+        }
+    }
+    
+    private func fetchFileList(repoPath: String, root: String, revision: String) async throws -> [ModelFile] {
+        let url = try buildURL(
+            repoPath: repoPath,
+            path: "/repo/files",
+            queryItems: [
+                URLQueryItem(name: "Root", value: root),
+                URLQueryItem(name: "Revision", value: revision)
+            ]
+        )
+        
+        let (data, response) = try await URLSession.shared.data(from: url)
+        try validateResponse(response)
+        
+        let modelResponse = try JSONDecoder().decode(ModelResponse.self, from: data)
+        return modelResponse.data.files
+    }
+    
+    private func calculateTotalSize(files: [ModelFile], repoPath: String) async throws -> Int64 {
+        var totalSize: Int64 = 0
+        
+        for file in files {
+            if file.type == "tree" {
+                let subFiles = try await fetchFileList(repoPath: repoPath, root: file.path, revision: "")
+                totalSize += try await calculateTotalSize(files: subFiles, repoPath: repoPath)
+            } else if file.type == "blob" {
+                totalSize += Int64(file.size)
+            }
+        }
+        
+        return totalSize
+    }
+    
+    // MARK: - Network Utilities
+    
+    private func buildURL(repoPath: String, path: String, queryItems: [URLQueryItem]) throws -> URL {
+        var components = URLComponents()
+        components.scheme = "https"
+        components.host = "modelscope.cn"
+        components.path = "/api/v1/models/\(repoPath)\(path)"
+        components.queryItems = queryItems
+        
+        guard let url = components.url else {
+            throw ModelScopeError.invalidURL
+        }
+        return url
+    }
+    
+    private func validateResponse(_ response: URLResponse) throws {
+        guard let httpResponse = response as? HTTPURLResponse,
+              (200...299).contains(httpResponse.statusCode) else {
+            throw ModelScopeError.invalidResponse
+        }
+    }
+    
+    // MARK: - Codable
+    
+    private enum CodingKeys: String, CodingKey {
+        case modelName, tags, categories, size_gb, vendor, sources, tagTranslations, cachedSize
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelListViewModel.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelListViewModel.swift
new file mode 100644
index 00000000..070e9a82
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/ModelListViewModel.swift
@@ -0,0 +1,361 @@
+//
+//  ModelListViewModel.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import Foundation
+import SwiftUI
+
+class ModelListViewModel: ObservableObject {
+    // MARK: - Published Properties
+    @Published var models: [ModelInfo] = []
+    @Published var searchText = ""
+    @Published var quickFilterTags: [String] = []
+    @Published var selectedModel: ModelInfo?
+    @Published var showError = false
+    @Published var errorMessage = ""
+    
+    // Download state
+    @Published private(set) var downloadProgress: [String: Double] = [:]
+    @Published private(set) var currentlyDownloading: String?
+    
+    // MARK: - Private Properties
+    private let modelClient = ModelClient()
+    private let pinnedModelKey = "com.mnnllm.pinnedModelIds"
+    
+    // MARK: - Model Data Access
+    
+    public var pinnedModelIds: [String] {
+        get { UserDefaults.standard.stringArray(forKey: pinnedModelKey) ?? [] }
+        set { UserDefaults.standard.setValue(newValue, forKey: pinnedModelKey) }
+    }
+    
+    var allTags: [String] {
+        Array(Set(models.flatMap { $0.tags }))
+    }
+    
+    var allCategories: [String] {
+        Array(Set(models.compactMap { $0.categories }.flatMap { $0 }))
+    }
+    
+    var allVendors: [String] {
+        Array(Set(models.compactMap { $0.vendor }))
+    }
+    
+    var filteredModels: [ModelInfo] {
+        let filtered = searchText.isEmpty ? models : models.filter { model in
+            model.id.localizedCaseInsensitiveContains(searchText) ||
+            model.modelName.localizedCaseInsensitiveContains(searchText) ||
+            model.localizedTags.contains { $0.localizedCaseInsensitiveContains(searchText) }
+        }
+        
+        let downloaded = filtered.filter { $0.isDownloaded }
+        let notDownloaded = filtered.filter { !$0.isDownloaded }
+        
+        return downloaded + notDownloaded
+    }
+    
+    // MARK: - Initialization
+    
+    init() {
+        Task { @MainActor in
+            await fetchModels()
+        }
+    }
+    
+    // MARK: - Model Data Management
+    
+    @MainActor
+    func fetchModels() async {
+        do {
+            let info = try await modelClient.getModelInfo()
+            
+            self.quickFilterTags = info.quickFilterTags ?? []
+            TagTranslationManager.shared.loadTagTranslations(info.tagTranslations)
+            
+            var fetchedModels = info.models
+            
+            filterDiffusionModels(fetchedModels: &fetchedModels)
+            loadCachedSizes(for: &fetchedModels)
+            sortModels(fetchedModels: &fetchedModels)
+            self.models = fetchedModels
+            
+            // Asynchronously fetch size info for both downloaded and undownloaded models
+            Task {
+                await fetchModelSizes(for: fetchedModels)
+            }
+            
+        } catch {
+            showError = true
+            errorMessage = "Error: \(error.localizedDescription)"
+        }
+    }
+    
+    private func loadCachedSizes(for models: inout [ModelInfo]) {
+        for i in 0..<models.count {
+            if let cachedSize = ModelStorageManager.shared.getCachedSize(for: models[i].modelName) {
+                models[i].cachedSize = cachedSize
+            }
+        }
+    }
+    
+    private func fetchModelSizes(for models: [ModelInfo]) async {
+        await withTaskGroup(of: Void.self) { group in
+            for (_, model) in models.enumerated() {
+                // Handle undownloaded models - fetch remote size
+                if !model.isDownloaded && model.cachedSize == nil && model.size_gb == nil {
+                    group.addTask {
+                        if let size = await model.fetchRemoteSize() {
+                            await MainActor.run {
+                                if let modelIndex = self.models.firstIndex(where: { $0.id == model.id }) {
+                                    self.models[modelIndex].cachedSize = size
+                                    ModelStorageManager.shared.setCachedSize(size, for: model.modelName)
+                                }
+                            }
+                        }
+                    }
+                }
+                
+                // Handle downloaded models - calculate and cache local directory size
+                if model.isDownloaded && model.cachedSize == nil {
+                    group.addTask {
+                        do {
+                            let localSize = try FileOperationManager.shared.calculateDirectorySize(at: model.localPath)
+                            await MainActor.run {
+                                if let modelIndex = self.models.firstIndex(where: { $0.id == model.id }) {
+                                    self.models[modelIndex].cachedSize = localSize
+                                    ModelStorageManager.shared.setCachedSize(localSize, for: model.modelName)
+                                }
+                            }
+                        } catch {
+                            print("Error calculating local directory size for \(model.modelName): \(error)")
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    private func filterDiffusionModels(fetchedModels: inout [ModelInfo]) {
+        let hasDiffusionModels = fetchedModels.contains {
+            $0.modelName.lowercased().contains("diffusion")
+        }
+        
+        if hasDiffusionModels {
+            fetchedModels = fetchedModels.filter { model in
+                let name = model.modelName.lowercased()
+                let tags = model.tags.map { $0.lowercased() }
+                
+                // Only show GPU diffusion models
+                if name.contains("diffusion") {
+                    return name.contains("gpu") || tags.contains { $0.contains("gpu") }
+                }
+                
+                return true
+            }
+        }
+        
+        for i in 0..<fetchedModels.count {
+            let model = fetchedModels[i]
+            fetchedModels[i].isDownloaded = ModelStorageManager.shared.isModelDownloaded(model.modelName)
+            fetchedModels[i].lastUsedAt = ModelStorageManager.shared.getLastUsed(for: model.modelName)
+        }
+    }
+    
+    private func sortModels(fetchedModels: inout [ModelInfo]) {
+        let pinned = pinnedModelIds
+        
+        fetchedModels.sort { (model1, model2) -> Bool in
+            let isPinned1 = pinned.contains(model1.id)
+            let isPinned2 = pinned.contains(model2.id)
+            let isDownloading1 = currentlyDownloading == model1.id
+            let isDownloading2 = currentlyDownloading == model2.id
+            
+            // 1. Currently downloading models have highest priority
+            if isDownloading1 != isDownloading2 {
+                return isDownloading1
+            }
+            
+            // 2. Pinned models have second priority
+            if isPinned1 != isPinned2 {
+                return isPinned1
+            }
+            
+            // 3. If both are pinned, sort by pin time
+            if isPinned1 && isPinned2 {
+                let index1 = pinned.firstIndex(of: model1.id)!
+                let index2 = pinned.firstIndex(of: model2.id)!
+                return index1 > index2 // Pinned later comes first
+            }
+            
+            // 4. Non-pinned models sorted by download status
+            if model1.isDownloaded != model2.isDownloaded {
+                return model1.isDownloaded
+            }
+            
+            // 5. If both downloaded, sort by last used time
+            if model1.isDownloaded {
+                let date1 = model1.lastUsedAt ?? .distantPast
+                let date2 = model2.lastUsedAt ?? .distantPast
+                return date1 > date2
+            }
+            
+            return false // Keep original order for not-downloaded
+        }
+    }
+    
+    // MARK: - Model Selection & Usage
+    
+    @MainActor
+    func selectModel(_ model: ModelInfo) {
+        if model.isDownloaded {
+            selectedModel = model
+        } else {
+            Task {
+                await downloadModel(model)
+            }
+        }
+    }
+    
+    func recordModelUsage(modelName: String) {
+        ModelStorageManager.shared.updateLastUsed(for: modelName)
+        Task { @MainActor in
+            if let index = self.models.firstIndex(where: { $0.modelName == modelName }) {
+                self.models[index].lastUsedAt = Date()
+                self.sortModels(fetchedModels: &self.models)
+            }
+        }
+    }
+    
+    // MARK: - Download Management
+    
+    func downloadModel(_ model: ModelInfo) async {
+        await MainActor.run {
+            guard currentlyDownloading == nil else { return }
+            currentlyDownloading = model.id
+            downloadProgress[model.id] = 0
+        }
+        
+        do {
+            try await modelClient.downloadModel(model: model) { progress in
+                Task { @MainActor in
+                    self.downloadProgress[model.id] = progress
+                }
+            }
+            
+            await MainActor.run {
+                if let index = self.models.firstIndex(where: { $0.id == model.id }) {
+                    self.models[index].isDownloaded = true
+                    ModelStorageManager.shared.markModelAsDownloaded(model.modelName)
+                }
+            }
+            
+            // Calculate and cache size for newly downloaded model
+            do {
+                let localSize = try FileOperationManager.shared.calculateDirectorySize(at: model.localPath)
+                await MainActor.run {
+                    if let index = self.models.firstIndex(where: { $0.id == model.id }) {
+                        self.models[index].cachedSize = localSize
+                        ModelStorageManager.shared.setCachedSize(localSize, for: model.modelName)
+                    }
+                }
+            } catch {
+                print("Error calculating size for newly downloaded model \(model.modelName): \(error)")
+            }
+            
+        } catch {
+            await MainActor.run {
+                if case ModelScopeError.downloadCancelled = error {
+                    print("Download was cancelled")
+                } else {
+                    self.showError = true
+                    self.errorMessage = "Failed to download model: \(error.localizedDescription)"
+                }
+            }
+        }
+        
+        await MainActor.run {
+            self.currentlyDownloading = nil
+            self.downloadProgress.removeValue(forKey: model.id)
+        }
+    }
+    
+    func cancelDownload() async {
+        let modelId = await MainActor.run { currentlyDownloading }
+        
+        if let modelId = modelId {
+            await modelClient.cancelDownload()
+            
+            await MainActor.run {
+                self.downloadProgress.removeValue(forKey: modelId)
+                self.currentlyDownloading = nil
+            }
+            
+            print("Download cancelled for model: \(modelId)")
+        }
+    }
+    
+    // MARK: - Pin Management
+    
+    @MainActor
+    func pinModel(_ model: ModelInfo) {
+        guard let index = models.firstIndex(where: { $0.id == model.id }) else { return }
+        let pinned = models.remove(at: index)
+        models.insert(pinned, at: 0)
+        
+        var pinnedIds = pinnedModelIds
+        if let existingIndex = pinnedIds.firstIndex(of: model.id) {
+            pinnedIds.remove(at: existingIndex)
+        }
+        pinnedIds.insert(model.id, at: 0)
+        pinnedModelIds = pinnedIds
+    }
+    
+    @MainActor
+    func unpinModel(_ model: ModelInfo) {
+        var pinnedIds = pinnedModelIds
+        if let index = pinnedIds.firstIndex(of: model.id) {
+            pinnedIds.remove(at: index)
+            pinnedModelIds = pinnedIds
+            
+            // Re-sort models after unpinning
+            sortModels(fetchedModels: &models)
+        }
+    }
+    
+    // MARK: - Model Deletion
+    
+    func deleteModel(_ model: ModelInfo) async {
+        guard model.isDownloaded else { return }
+        
+        do {
+            // Delete local files
+            let fileManager = FileManager.default
+            let modelPath = model.localPath
+            
+            if fileManager.fileExists(atPath: modelPath) {
+                try fileManager.removeItem(atPath: modelPath)
+            }
+            
+            // Update model state
+            await MainActor.run {
+                if let index = self.models.firstIndex(where: { $0.id == model.id }) {
+                    self.models[index].isDownloaded = false
+                    self.models[index].cachedSize = nil
+                    ModelStorageManager.shared.markModelAsNotDownloaded(model.modelName)
+                }
+                
+                // Re-sort models after deletion
+                self.sortModels(fetchedModels: &self.models)
+            }
+            
+        } catch {
+            await MainActor.run {
+                self.showError = true
+                self.errorMessage = "Failed to delete model: \(error.localizedDescription)"
+            }
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/TBDataResponse.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/TBDataResponse.swift
new file mode 100644
index 00000000..2cf0bac6
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Models/TBDataResponse.swift
@@ -0,0 +1,24 @@
+//
+//  TBDataResponse.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/9.
+//
+
+import Foundation
+
+struct TBDataResponse: Codable {
+    let tagTranslations: [String: String]
+    let quickFilterTags: [String]?
+    let models: [ModelInfo]
+    let metadata: Metadata?
+    
+    struct Metadata: Codable {
+        let version: String
+        let lastUpdated: String
+        let schemaVersion: String
+        let totalModels: Int
+        let supportedPlatforms: [String]
+        let minAppVersion: String
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelClient.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelClient.swift
new file mode 100644
index 00000000..f9d073c2
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelClient.swift
@@ -0,0 +1,196 @@
+//
+//  ModelClient.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import Hub
+import Foundation
+
+class ModelClient {
+    private let maxRetries = 5
+    
+    private let baseMirrorURL = "https://hf-mirror.com"
+    private let baseURL = "https://huggingface.co"
+    private let AliCDNURL = "https://meta.alicdn.com/data/mnn/apis/model_market.json"
+    
+    // Debug flag to use local mock data instead of network API
+    private let useLocalMockData = false
+    
+    private var currentDownloadManager: ModelScopeDownloadManager?
+    
+    private lazy var baseURLString: String = {
+        switch ModelSourceManager.shared.selectedSource {
+        case .huggingFace:
+            return baseURL
+        default:
+            return baseMirrorURL
+        }
+    }()
+    
+    init() {}
+    
+    func getModelInfo() async throws -> TBDataResponse {
+        if useLocalMockData {
+            // Debug mode: use local mock data
+            guard let url = Bundle.main.url(forResource: "mock", withExtension: "json") else {
+                throw NetworkError.invalidData
+            }
+            
+            let data = try Data(contentsOf: url)
+            let mockResponse = try JSONDecoder().decode(TBDataResponse.self, from: data)
+            return mockResponse
+        } else {
+            // Production mode: fetch from network API
+            return try await fetchDataFromAliAPI()
+        }
+    }
+        
+    /**
+     * Fetches data from the network API with fallback to local mock data
+     *
+     * @throws NetworkError if both network request and local fallback fail
+     */
+    private func fetchDataFromAliAPI() async throws -> TBDataResponse {
+        do {
+            guard let url = URL(string: AliCDNURL) else {
+                throw NetworkError.invalidData
+            }
+            
+            let (data, response) = try await URLSession.shared.data(from: url)
+            
+            guard let httpResponse = response as? HTTPURLResponse,
+                  httpResponse.statusCode == 200 else {
+                throw NetworkError.invalidResponse
+            }
+            
+            let apiResponse = try JSONDecoder().decode(TBDataResponse.self, from: data)
+            return apiResponse
+            
+        } catch {
+            print("Network request failed: \(error). Falling back to local mock data.")
+            
+            // Fallback to local mock data if network request fails
+            guard let url = Bundle.main.url(forResource: "mock", withExtension: "json") else {
+                throw NetworkError.invalidData
+            }
+            
+            let data = try Data(contentsOf: url)
+            let mockResponse = try JSONDecoder().decode(TBDataResponse.self, from: data)
+            return mockResponse
+        }
+    }
+    
+    /**
+     * Downloads a model from the selected source with progress tracking
+     *
+     * @param model The ModelInfo object containing model details
+     * @param progress Progress callback that receives download progress (0.0 to 1.0)
+     * @throws Various network or file system errors
+     */
+    func downloadModel(model: ModelInfo,
+                       progress: @escaping (Double) -> Void) async throws {
+        switch ModelSourceManager.shared.selectedSource {
+        case .modelScope, .modeler:
+            try await downloadFromModelScope(model, progress: progress)
+        case .huggingFace:
+            try await downloadFromHuggingFace(model, progress: progress)
+        }
+    }
+    
+    /**
+     * Cancels the current download operation
+     */
+    func cancelDownload() async {
+        if let manager = currentDownloadManager {
+            await manager.cancelDownload()
+            currentDownloadManager = nil
+            print("Download cancelled")
+        }
+    }
+    /**
+     * Downloads model from ModelScope platform
+     *
+     * @param model The ModelInfo object to download
+     * @param progress Progress callback for download updates
+     * @throws Download or network related errors
+     */
+    private func downloadFromModelScope(_ model: ModelInfo,
+                                        progress: @escaping (Double) -> Void) async throws {
+        let ModelScopeId = model.id
+        let config = URLSessionConfiguration.default
+        config.timeoutIntervalForRequest = 30
+        config.timeoutIntervalForResource = 300
+        
+        let manager = ModelScopeDownloadManager.init(repoPath: ModelScopeId, config: config, enableLogging: true, source: ModelSourceManager.shared.selectedSource)
+        currentDownloadManager = manager
+        
+        try await manager.downloadModel(to:"huggingface/models/taobao-mnn", modelId: ModelScopeId, modelName: model.modelName) { fileProgress in
+            Task { @MainActor in
+                progress(fileProgress)
+            }
+        }
+        
+        currentDownloadManager = nil
+    }
+
+    /**
+     * Downloads model from HuggingFace platform with optimized progress updates
+     *
+     * This method implements throttling to prevent UI stuttering by limiting
+     * progress update frequency and filtering out minor progress changes.
+     *
+     * @param model The ModelInfo object to download
+     * @param progress Progress callback for download updates
+     * @throws Download or network related errors
+     */
+    private func downloadFromHuggingFace(_ model: ModelInfo,
+                                         progress: @escaping (Double) -> Void) async throws {
+        let repo = Hub.Repo(id: model.id)
+        let modelFiles = ["*.*"]
+        let mirrorHubApi = HubApi(endpoint: baseURL)
+        
+        // Progress throttling mechanism to prevent UI stuttering
+        var lastUpdateTime = Date()
+        var lastProgress: Double = 0.0
+        let progressUpdateInterval: TimeInterval = 0.1 // Limit update frequency to every 100ms
+        let progressThreshold: Double = 0.01 // Progress change threshold of 1%
+        
+        try await mirrorHubApi.snapshot(from: repo, matching: modelFiles) { fileProgress in
+            let currentProgress = fileProgress.fractionCompleted
+            let currentTime = Date()
+            
+            // Check if progress should be updated
+            let timeDiff = currentTime.timeIntervalSince(lastUpdateTime)
+            let progressDiff = abs(currentProgress - lastProgress)
+            
+            // Update progress if any of these conditions are met:
+            // 1. Time interval exceeds threshold
+            // 2. Progress change exceeds threshold
+            // 3. Progress reaches 100% (download complete)
+            // 4. Progress is 0% (download start)
+            if timeDiff >= progressUpdateInterval ||
+               progressDiff >= progressThreshold ||
+               currentProgress >= 1.0 ||
+               currentProgress == 0.0 {
+                
+                lastUpdateTime = currentTime
+                lastProgress = currentProgress
+                
+                // Ensure progress updates are executed on the main thread
+                Task { @MainActor in
+                    progress(currentProgress)
+                }
+            }
+        }
+    }
+}
+
+
+enum NetworkError: Error {
+    case invalidResponse
+    case invalidData
+    case downloadFailed
+    case unknown
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelDownloadStorage.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelDownloadStorage.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelDownloadStorage.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelDownloadStorage.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeDownloadManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeDownloadManager.swift
similarity index 76%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeDownloadManager.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeDownloadManager.swift
index f7396224..96d8141e 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeDownloadManager.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeDownloadManager.swift
@@ -1,5 +1,5 @@
 //
-//  ModelClient.swift
+//  ModelScopeDownloadManager.swift
 //  MNNLLMiOS
 //
 //  Created by 游薪渝(揽清) on 2025/2/20.
@@ -31,6 +31,11 @@ public actor ModelScopeDownloadManager: Sendable {
     private var downloadedSize: Int64 = 0
     private var lastUpdatedBytes: Int64 = 0
     
+    // Download cancellation related properties
+    private var isCancelled: Bool = false
+    private var currentDownloadTask: Task<Void, Error>?
+    private var currentFileHandle: FileHandle?
+    
     // MARK: - Initialization
     
     /// Creates a new ModelScope download manager
@@ -83,6 +88,9 @@ public actor ModelScopeDownloadManager: Sendable {
         modelName: String,
         progress: ((Double) -> Void)? = nil
     ) async throws {
+        
+        isCancelled = false
+        
         ModelScopeLogger.info("Starting download for modelId: \(modelId)")
         
         let destination = try resolveDestinationPath(base: destinationFolder, modelId: modelName)
@@ -100,7 +108,28 @@ public actor ModelScopeDownloadManager: Sendable {
         )
     }
     
-    // MARK: - Private Methods
+    /// Cancel download
+    /// Preserve downloaded temporary files to support resume functionality
+    public func cancelDownload() async {
+        isCancelled = true
+        
+        currentDownloadTask?.cancel()
+        currentDownloadTask = nil
+        
+        await closeFileHandle()
+        
+        session.invalidateAndCancel()
+        
+        ModelScopeLogger.info("Download cancelled, temporary files preserved for resume")
+    }
+    
+    // MARK: - Private Methods - Progress Management
+    
+    private func updateProgress(_ progress: Double, callback: @escaping (Double) -> Void) {
+        Task { @MainActor in
+            callback(progress)
+        }
+    }
     
     private func fetchFileList(
         root: String,
@@ -131,6 +160,8 @@ public actor ModelScopeDownloadManager: Sendable {
         var lastError: Error?
         
         for attempt in 1...maxRetries {
+            if isCancelled { break }
+            
             do {
                 print("Attempt \(attempt) of \(maxRetries) for file: \(file.name)")
                 try await downloadFileWithRetry(
@@ -162,6 +193,11 @@ public actor ModelScopeDownloadManager: Sendable {
         destinationPath: String,
         onProgress: @escaping (Int64) -> Void
     ) async throws {
+        
+        if isCancelled {
+            throw ModelScopeError.downloadCancelled
+        }
+        
         let session = self.session
         
         ModelScopeLogger.info("Starting download for file: \(file.name)")
@@ -209,28 +245,45 @@ public actor ModelScopeDownloadManager: Sendable {
         ModelScopeLogger.debug("Requesting URL: \(url)")
         
         return try await withCheckedThrowingContinuation { continuation in
-            Task {
+            currentDownloadTask = Task {
                 do {
                     let (asyncBytes, response) = try await session.bytes(for: request)
                     ModelScopeLogger.debug("Response status code: \((response as? HTTPURLResponse)?.statusCode ?? -1)")
                     try validateResponse(response)
                     
                     let fileHandle = try FileHandle(forWritingTo: tempURL)
+                    self.currentFileHandle = fileHandle
+                    
                     if resumeOffset > 0 {
                         try fileHandle.seek(toOffset: UInt64(resumeOffset))
                     }
                     
                     var downloadedBytes: Int64 = resumeOffset
+                    var bytesCount = 0
                     
                     for try await byte in asyncBytes {
+                        // Frequently check cancellation status
+                        if isCancelled {
+                            try fileHandle.close()
+                            self.currentFileHandle = nil
+                            // Don't delete temp files when cancelled, preserve resume functionality
+                            continuation.resume(throwing: ModelScopeError.downloadCancelled)
+                            return
+                        }
+                        
                         try fileHandle.write(contentsOf: [byte])
                         downloadedBytes += 1
-                        if downloadedBytes % 1024 == 0 {
+                        bytesCount += 1
+                        
+                        // 减少进度回调频率：每 64KB * 5 更新一次而不是每1KB
+                        if bytesCount >= 64 * 1024 * 5 {
                             onProgress(downloadedBytes)
+                            bytesCount = 0
                         }
                     }
                     
                     try fileHandle.close()
+                    self.currentFileHandle = nil
                     
                     let finalSize = try FileManager.default.attributesOfItem(atPath: tempURL.path)[.size] as? Int64 ?? 0
                     guard finalSize == file.size else {
@@ -250,8 +303,16 @@ public actor ModelScopeDownloadManager: Sendable {
                     onProgress(downloadedBytes)
                     continuation.resume()
                 } catch {
-                    ModelScopeLogger.error("Download failed: \(error.localizedDescription)")
-                    storage.clearFileStatus(at: destination.path)
+                    // Clean up file handle when handling errors
+                    if let handle = self.currentFileHandle {
+                        try? handle.close()
+                        self.setCurrentFileHandle(nil)
+                    }
+                    
+                    if !isCancelled {
+                        ModelScopeLogger.error("Download failed: \(error.localizedDescription)")
+                        storage.clearFileStatus(at: destination.path)
+                    }
                     continuation.resume(throwing: error)
                 }
             }
@@ -266,6 +327,10 @@ public actor ModelScopeDownloadManager: Sendable {
     ) async throws {
         ModelScopeLogger.info("Starting download with \(files.count) files")
         
+        if isCancelled {
+            throw ModelScopeError.downloadCancelled
+        }
+        
         func calculateTotalSize(files: [ModelFile]) async throws -> Int64 {
             var size: Int64 = 0
             for file in files {
@@ -288,6 +353,11 @@ public actor ModelScopeDownloadManager: Sendable {
         }
         
         for file in files {
+            
+            if Task.isCancelled || isCancelled {
+                throw ModelScopeError.downloadCancelled
+            }
+            
             ModelScopeLogger.debug("Processing: \(file.name), type: \(file.type)")
             
             if file.type == "tree" {
@@ -317,15 +387,7 @@ public actor ModelScopeDownloadManager: Sendable {
                         destinationPath: destinationPath,
                         onProgress: { downloadedBytes in
                             let currentProgress = Double(self.downloadedSize + downloadedBytes) / Double(self.totalSize)
-                            progress(currentProgress)
-                            // 1MB = 1,024 * 1,024
-                           let bytesDelta = self.downloadedSize - self.lastUpdatedBytes
-                           if bytesDelta >= 1_024 * 1_024 {
-                               self.lastUpdatedBytes = self.downloadedSize
-                               DispatchQueue.main.async {
-                                   progress(currentProgress)
-                               }
-                           }
+                            self.updateProgress(currentProgress, callback: progress)
                         },
                         maxRetries: 500,
                         retryDelay: 1.0
@@ -340,9 +402,42 @@ public actor ModelScopeDownloadManager: Sendable {
                     ModelScopeLogger.debug("File exists: \(file.name)")
                 }
                 
-                progress(Double(downloadedSize) / Double(totalSize))
+                let currentProgress = Double(downloadedSize) / Double(totalSize)
+                updateProgress(currentProgress, callback: progress)
             }
         }
+        
+        Task { @MainActor in
+            progress(1.0)
+        }
+    }
+    
+    
+    private func resetDownloadState() async {
+        totalFiles = 0
+        downloadedFiles = 0
+        totalSize = 0
+        downloadedSize = 0
+        lastUpdatedBytes = 0
+    }
+    
+    private func resetCancelStatus() {
+        isCancelled = false
+        
+        totalFiles = 0
+        downloadedFiles = 0
+        totalSize = 0
+        downloadedSize = 0
+        lastUpdatedBytes = 0
+    }
+    
+    private func closeFileHandle() async {
+        do {
+            try currentFileHandle?.close()
+            currentFileHandle = nil
+        } catch {
+            print("Error closing file handle: \(error)")
+        }
     }
     
     private func buildURL(
@@ -410,4 +505,27 @@ public actor ModelScopeDownloadManager: Sendable {
         
         return modelScopePath.path
     }
+    
+    private func setCurrentFileHandle(_ handle: FileHandle?) {
+        currentFileHandle = handle
+    }
+    
+    private func getTempFileSize(for file: ModelFile, destinationPath: String) -> Int64 {
+        let modelHash = repoPath.hash
+        let fileHash = file.path.hash
+        let tempURL = FileManager.default.temporaryDirectory
+            .appendingPathComponent("model_\(modelHash)_file_\(fileHash)_\(file.name.sanitizedPath).tmp")
+        
+        guard fileManager.fileExists(atPath: tempURL.path) else {
+            return 0
+        }
+        
+        do {
+            let attributes = try fileManager.attributesOfItem(atPath: tempURL.path)
+            return attributes[.size] as? Int64 ?? 0
+        } catch {
+            ModelScopeLogger.error("Failed to get temp file size for \(file.name): \(error)")
+            return 0
+        }
+    }
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeLogger.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeLogger.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeLogger.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeLogger.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeModels.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeModels.swift
similarity index 97%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeModels.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeModels.swift
index 3321d61a..1a8d9114 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeModels.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeModels.swift
@@ -10,6 +10,7 @@ import Foundation
 public enum ModelScopeError: Error {
     case invalidURL
     case invalidResponse
+    case downloadCancelled
     case downloadFailed(Error)
     case fileSystemError(Error)
     case invalidData
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeUtilities.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeUtilities.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelScopeUtilities.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Network/ModelScopeUtilities.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/CustomPopupMenu.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/CustomPopupMenu.swift
new file mode 100644
index 00000000..5556bc77
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/CustomPopupMenu.swift
@@ -0,0 +1,65 @@
+//
+//  CustomPopupMenu.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/6/30.
+//
+
+import SwiftUI
+
+struct CustomPopupMenu: View {
+    @Binding var isPresented: Bool
+    @Binding var selectedSource: ModelSource
+    let anchorFrame: CGRect
+    
+    var body: some View {
+        GeometryReader { geometry in
+            ZStack(alignment: .top) {
+                
+                Color.black.opacity(0.3)
+                    .frame(maxWidth: .infinity)
+                    .frame(height: UIScreen.main.bounds.height - anchorFrame.maxY)
+                    .offset(y: anchorFrame.maxY - 10)
+                    .onTapGesture {
+                        isPresented = false
+                    }
+                
+                VStack(spacing: 0) {
+                    ForEach(ModelSource.allCases) { source in
+                        Button {
+                            selectedSource = source
+                            ModelSourceManager.shared.updateSelectedSource(source)
+                            isPresented = false
+                        } label: {
+                            HStack {
+                                Text(source.description)
+                                    .font(.system(size: 12, weight: .regular))
+                                    .foregroundColor(source == selectedSource ? .primaryBlue : .black)
+                                Spacer()
+                                if source == selectedSource {
+                                    Image(systemName: "checkmark.circle")
+                                        .foregroundColor(.primaryBlue)
+                                }
+                            }
+                            .frame(maxWidth: .infinity)
+                            .padding()
+                            .background(.white)
+                        }
+                        Divider()
+                    }
+                }
+                .background(Color.white)
+                .cornerRadius(8)
+                .shadow(color: .black.opacity(0.1), radius: 5, x: 0, y: 5)
+                .frame(width: geometry.size.width)
+                .position(
+                    x: geometry.size.width / 2,
+                    y: anchorFrame.maxY - 24
+                )
+            }
+        }
+        .transition(.opacity)
+        .animation(.spring(response: 0.3, dampingFraction: 0.8, blendDuration: 0), value: isPresented)
+    }
+}
+
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/HelpView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/HelpView.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/HelpView.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/HelpView.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterButton.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterButton.swift
new file mode 100644
index 00000000..56f5a0f6
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterButton.swift
@@ -0,0 +1,33 @@
+//
+//  FilterButton.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct FilterButton: View {
+    @Binding var showFilterMenu: Bool
+    @Binding var selectedTags: Set<String>
+    @Binding var selectedCategories: Set<String>
+    @Binding var selectedVendors: Set<String>
+    
+    var body: some View {
+        Button(action: {
+            showFilterMenu.toggle()
+        }) {
+            Image(systemName: "line.3.horizontal.decrease.circle")
+                .font(.system(size: 20))
+                .foregroundColor(.primary)
+        }
+        .sheet(isPresented: $showFilterMenu) {
+            FilterMenuView(
+                selectedTags: $selectedTags,
+                selectedCategories: $selectedCategories,
+                selectedVendors: $selectedVendors
+            )
+            .presentationDetents([.large])
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterMenuView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterMenuView.swift
new file mode 100644
index 00000000..074487b4
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterMenuView.swift
@@ -0,0 +1,93 @@
+//
+//  FilterMenuView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct FilterMenuView: View {
+    @Environment(\.dismiss) private var dismiss
+    @StateObject private var viewModel = ModelListViewModel()
+    @Binding var selectedTags: Set<String>
+    @Binding var selectedCategories: Set<String>
+    @Binding var selectedVendors: Set<String>
+    
+    var body: some View {
+        NavigationView {
+            ScrollView {
+                VStack(alignment: .leading, spacing: 24) {
+                    VStack(alignment: .leading, spacing: 12) {
+                        Text("filter.byTag")
+                            .font(.headline)
+                            .fontWeight(.semibold)
+                        
+                        LazyVGrid(columns: Array(repeating: GridItem(.flexible()), count: 2), spacing: 8) {
+                            ForEach(viewModel.allTags.sorted(), id: \.self) { tag in
+                                FilterOptionRow(
+                                    text: TagTranslationManager.shared.getLocalizedTag(tag),
+                                    isSelected: selectedTags.contains(tag)
+                                ) {
+                                    if selectedTags.contains(tag) {
+                                        selectedTags.remove(tag)
+                                    } else {
+                                        selectedTags.insert(tag)
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    
+                    Divider()
+                    
+                    VStack(alignment: .leading, spacing: 12) {
+                        Text("filter.byVendor")
+                            .font(.headline)
+                            .fontWeight(.semibold)
+                        
+                        LazyVGrid(columns: Array(repeating: GridItem(.flexible()), count: 2), spacing: 8) {
+                            ForEach(viewModel.allVendors.sorted(), id: \.self) { vendor in
+                                FilterOptionRow(
+                                    text: vendor,
+                                    isSelected: selectedVendors.contains(vendor)
+                                ) {
+                                    if selectedVendors.contains(vendor) {
+                                        selectedVendors.remove(vendor)
+                                    } else {
+                                        selectedVendors.insert(vendor)
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    
+                    Spacer(minLength: 100)
+                }
+                .padding()
+            }
+            .navigationTitle("filter.title")
+            .navigationBarTitleDisplayMode(.inline)
+            .toolbar {
+                ToolbarItem(placement: .navigationBarLeading) {
+                    Button("button.clear") {
+                        selectedTags.removeAll()
+                        selectedCategories.removeAll()
+                        selectedVendors.removeAll()
+                    }
+                }
+                
+                ToolbarItem(placement: .navigationBarTrailing) {
+                    Button("button.done") {
+                        dismiss()
+                    }
+                }
+            }
+        }
+        .onAppear {
+            Task {
+                await viewModel.fetchModels()
+            }
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterOptionRow.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterOptionRow.swift
new file mode 100644
index 00000000..a9496902
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterOptionRow.swift
@@ -0,0 +1,40 @@
+//
+//  FilterOptionRow.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct FilterOptionRow: View {
+    let text: String
+    let isSelected: Bool
+    let onTap: () -> Void
+    
+    var body: some View {
+        Button(action: onTap) {
+            HStack {
+                Text(text)
+                    .font(.system(size: 14))
+                    .foregroundColor(.primary)
+                
+                Spacer()
+                
+                if isSelected {
+                    Image(systemName: "checkmark.circle.fill")
+                        .foregroundColor(.accentColor)
+                } else {
+                    Image(systemName: "circle")
+                        .foregroundColor(.secondary)
+                }
+            }
+            .padding(.horizontal, 12)
+            .padding(.vertical, 8)
+            .background(
+                RoundedRectangle(cornerRadius: 8)
+                    .fill(isSelected ? Color.accentColor.opacity(0.1) : Color(.systemGray6))
+            )
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterTagChip.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterTagChip.swift
new file mode 100644
index 00000000..c52eeab1
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/FilterTagChip.swift
@@ -0,0 +1,28 @@
+//
+//  FilterTagChip.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct FilterTagChip: View {
+    let text: String
+    let isSelected: Bool
+    let onTap: () -> Void
+    
+    var body: some View {
+        Button(action: onTap) {
+            Text(text)
+                .font(.system(size: 12, weight: .medium))
+                .foregroundColor(isSelected ? .white : .primary)
+                .padding(.horizontal, 12)
+                .padding(.vertical, 6)
+                .background(
+                    RoundedRectangle(cornerRadius: 16)
+                        .fill(isSelected ? Color.primaryPurple : Color(.systemGray6))
+                )
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/QuickFilterTags.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/QuickFilterTags.swift
new file mode 100644
index 00000000..479515a8
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/QuickFilterTags.swift
@@ -0,0 +1,33 @@
+//
+//  QuickFilterTags.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct QuickFilterTags: View {
+    let tags: [String]
+    @Binding var selectedTags: Set<String>
+    
+    var body: some View {
+        ScrollView(.horizontal, showsIndicators: false) {
+            HStack(spacing: 8) {
+                ForEach(tags, id: \.self) { tag in
+                    FilterTagChip(
+                        text: TagTranslationManager.shared.getLocalizedTag(tag),
+                        isSelected: selectedTags.contains(tag)
+                    ) {
+                        if selectedTags.contains(tag) {
+                            selectedTags.remove(tag)
+                        } else {
+                            selectedTags.insert(tag)
+                        }
+                    }
+                }
+            }
+            .padding(.horizontal, 16)
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/SourceSelector.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/SourceSelector.swift
new file mode 100644
index 00000000..7ce858c1
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/SourceSelector.swift
@@ -0,0 +1,47 @@
+//
+//  SourceSelector.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct SourceSelector: View {
+    @Binding var selectedSource: ModelSource
+    @Binding var showSourceMenu: Bool
+    let onSourceChange: (ModelSource) -> Void
+    
+    var body: some View {
+        Menu {
+            ForEach(ModelSource.allCases) { source in
+                Button(action: {
+                    onSourceChange(source)
+                }) {
+                    HStack {
+                        Text(source.rawValue)
+                        if source == selectedSource {
+                            Image(systemName: "checkmark")
+                        }
+                    }
+                }
+            }
+        } label: {
+            HStack(spacing: 4) {
+                Text("modelSource.title")
+                    .font(.system(size: 12, weight: .medium))
+                    .foregroundColor(.primary)
+                
+                Text(selectedSource.rawValue)
+                    .font(.system(size: 12, weight: .regular))
+                    .foregroundColor(.primary)
+                
+                Image(systemName: "chevron.down")
+                    .font(.system(size: 10))
+                    .foregroundColor(.primary)
+            }
+            .padding(.horizontal, 6)
+            .padding(.vertical, 6)
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/ToolbarView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/ToolbarView.swift
new file mode 100644
index 00000000..7893ea2c
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListSubviews/ToolbarView.swift
@@ -0,0 +1,56 @@
+//
+//  ToolbarView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct ToolbarView: View {
+    @ObservedObject var viewModel: ModelListViewModel
+    @Binding var selectedSource: ModelSource
+    @Binding var showSourceMenu: Bool
+    @Binding var selectedTags: Set<String>
+    @Binding var selectedCategories: Set<String>
+    @Binding var selectedVendors: Set<String>
+    let quickFilterTags: [String]
+    @Binding var showFilterMenu: Bool
+    let onSourceChange: (ModelSource) -> Void
+    
+    var body: some View {
+        VStack(spacing: 12) {
+            HStack {
+                SourceSelector(
+                    selectedSource: $selectedSource,
+                    showSourceMenu: $showSourceMenu,
+                    onSourceChange: onSourceChange
+                )
+                
+                // 快捷筛选标签
+                QuickFilterTags(
+                    tags: quickFilterTags,
+                    selectedTags: $selectedTags
+                )
+                
+                Spacer()
+                
+                FilterButton(
+                    showFilterMenu: $showFilterMenu,
+                    selectedTags: $selectedTags,
+                    selectedCategories: $selectedCategories,
+                    selectedVendors: $selectedVendors
+                )
+            }
+            .padding(.horizontal, 16)
+        }
+        .padding(.vertical, 8)
+        .background(Color(.systemBackground))
+        .overlay(
+            Rectangle()
+                .frame(height: 0.5)
+                .foregroundColor(Color(.separator)),
+            alignment: .bottom
+        )
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListView.swift
new file mode 100644
index 00000000..8197740d
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelListView.swift
@@ -0,0 +1,146 @@
+//
+//  ModelListView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct ModelListView: View {
+    @ObservedObject var viewModel: ModelListViewModel
+    @State private var searchText = ""
+    @State private var selectedSource = ModelSourceManager.shared.selectedSource
+    @State private var showSourceMenu = false
+    @State private var selectedTags: Set<String> = []
+    @State private var selectedCategories: Set<String> = []
+    @State private var selectedVendors: Set<String> = []
+    @State private var showFilterMenu = false
+    
+    var body: some View {
+        ScrollView {
+            LazyVStack(spacing: 0, pinnedViews: [.sectionHeaders]) {
+                Section {
+                    modelListSection
+                } header: {
+                    toolbarSection
+                }
+            }
+        }
+        .searchable(text: $searchText, prompt: "Search models...")
+        .onChange(of: searchText) { _, newValue in
+            viewModel.searchText = newValue
+        }
+        .refreshable {
+            await viewModel.fetchModels()
+        }
+        .alert("Error", isPresented: $viewModel.showError) {
+            Button("OK") { }
+        } message: {
+            Text(viewModel.errorMessage)
+        }
+    }
+    
+    // Extract model list section as independent view
+    @ViewBuilder
+    private var modelListSection: some View {
+        LazyVStack(spacing: 8) {
+            ForEach(Array(filteredModels.enumerated()), id: \.element.id) { index, model in
+                modelRowView(model: model, index: index)
+                
+                if index < filteredModels.count - 1 {
+                    Divider()
+                        .padding(.leading, 60)
+                }
+            }
+        }
+        .padding(.vertical, 8)
+    }
+    
+    // Extract toolbar section as independent view
+    @ViewBuilder
+    private var toolbarSection: some View {
+        ToolbarView(
+            viewModel: viewModel, selectedSource: $selectedSource,
+            showSourceMenu: $showSourceMenu,
+            selectedTags: $selectedTags,
+            selectedCategories: $selectedCategories,
+            selectedVendors: $selectedVendors,
+            quickFilterTags: viewModel.quickFilterTags,
+            showFilterMenu: $showFilterMenu,
+            onSourceChange: handleSourceChange
+        )
+    }
+    
+    @ViewBuilder
+    private func modelRowView(model: ModelInfo, index: Int) -> some View {
+        ModelRowView(
+            model: model,
+            viewModel: viewModel,
+            downloadProgress: viewModel.downloadProgress[model.id] ?? 0,
+            isDownloading: viewModel.currentlyDownloading == model.id,
+            isOtherDownloading: isOtherDownloadingCheck(model: model)
+        ) {
+            Task {
+                await viewModel.downloadModel(model)
+            }
+        }
+        .padding(.horizontal, 16)
+    }
+    
+    // Extract complex boolean logic as independent method
+    private func isOtherDownloadingCheck(model: ModelInfo) -> Bool {
+        return viewModel.currentlyDownloading != nil && viewModel.currentlyDownloading != model.id
+    }
+    
+    // Extract source change handling logic as independent method
+    private func handleSourceChange(_ source: ModelSource) {
+        ModelSourceManager.shared.updateSelectedSource(source)
+        selectedSource = source
+        Task {
+            await viewModel.fetchModels()
+        }
+    }
+    
+    // Filter models based on selected tags, categories and vendors
+    private var filteredModels: [ModelInfo] {
+        let baseFiltered = viewModel.filteredModels
+        
+        if selectedTags.isEmpty && selectedCategories.isEmpty && selectedVendors.isEmpty {
+            return baseFiltered
+        }
+        
+        return baseFiltered.filter { model in
+            let tagMatch = checkTagMatch(model: model)
+            let categoryMatch = checkCategoryMatch(model: model)
+            let vendorMatch = checkVendorMatch(model: model)
+            
+            return tagMatch && categoryMatch && vendorMatch
+        }
+    }
+    
+    // Extract tag matching logic as independent method
+    private func checkTagMatch(model: ModelInfo) -> Bool {
+        return selectedTags.isEmpty || selectedTags.allSatisfy { selectedTag in
+            model.localizedTags.contains { tag in
+                tag.localizedCaseInsensitiveContains(selectedTag)
+            }
+        }
+    }
+    
+    // Extract category matching logic as independent method
+    private func checkCategoryMatch(model: ModelInfo) -> Bool {
+        return selectedCategories.isEmpty || selectedCategories.allSatisfy { selectedCategory in
+            model.categories?.contains { category in
+                category.localizedCaseInsensitiveContains(selectedCategory)
+            } ?? false
+        }
+    }
+    
+    // Extract vendor matching logic as independent method
+    private func checkVendorMatch(model: ModelInfo) -> Bool {
+        return selectedVendors.isEmpty || selectedVendors.contains { selectedVendor in
+            model.vendor?.localizedCaseInsensitiveContains(selectedVendor) ?? false
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/ActionButtonsView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/ActionButtonsView.swift
new file mode 100644
index 00000000..13b234f1
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/ActionButtonsView.swift
@@ -0,0 +1,43 @@
+//
+//  ActionButtonsView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+// MARK: - 操作按钮视图
+struct ActionButtonsView: View {
+    let model: ModelInfo
+    @ObservedObject var viewModel: ModelListViewModel
+    let downloadProgress: Double
+    let isDownloading: Bool
+    let isOtherDownloading: Bool
+    let formattedSize: String
+    let onDownload: () -> Void
+    @Binding var showDeleteAlert: Bool
+    
+    var body: some View {
+        VStack(alignment: .center, spacing: 4) {
+            if model.isDownloaded {
+                // 已下载状态
+                DownloadedButtonView(showDeleteAlert: $showDeleteAlert)
+            } else if isDownloading {
+                // 下载中状态
+                DownloadingButtonView(
+                    viewModel: viewModel,
+                    downloadProgress: downloadProgress
+                )
+            } else {
+                // 待下载状态
+                PendingDownloadButtonView(
+                    isOtherDownloading: isOtherDownloading,
+                    formattedSize: formattedSize,
+                    onDownload: onDownload
+                )
+            }
+        }
+        .frame(width: 60)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadedButtonView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadedButtonView.swift
new file mode 100644
index 00000000..641702a2
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadedButtonView.swift
@@ -0,0 +1,30 @@
+//
+//  DownloadedButtonView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+// MARK: - 已下载按钮视图
+struct DownloadedButtonView: View {
+    @Binding var showDeleteAlert: Bool
+    
+    var body: some View {
+        Button(action: { showDeleteAlert = true }) {
+            VStack(spacing: 2) {
+                Image(systemName: "trash")
+                    .font(.system(size: 16))
+                    .foregroundColor(.primary.opacity(0.8))
+                
+                Text(LocalizedStringKey("button.downloaded"))
+                    .font(.caption2)
+                    .foregroundColor(.secondary)
+                    .lineLimit(1)
+                    .minimumScaleFactor(0.8)
+                    .allowsTightening(true)
+            }
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadingButtonView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadingButtonView.swift
new file mode 100644
index 00000000..5b689985
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/DownloadingButtonView.swift
@@ -0,0 +1,32 @@
+//
+//  DownloadingButtonView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+// MARK: - 下载中按钮视图
+struct DownloadingButtonView: View {
+    @ObservedObject var viewModel: ModelListViewModel
+    let downloadProgress: Double
+    
+    var body: some View {
+        Button(action: {
+            Task {
+                await viewModel.cancelDownload()
+            }
+        }) {
+            VStack(spacing: 2) {
+                ProgressView(value: downloadProgress)
+                    .progressViewStyle(CircularProgressViewStyle(tint: .accentColor))
+                    .frame(width: 24, height: 24)
+                
+                Text(String(format: "%.2f%%", downloadProgress * 100))
+                    .font(.caption2)
+                    .foregroundColor(.secondary)
+            }
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/PendingDownloadButtonView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/PendingDownloadButtonView.swift
new file mode 100644
index 00000000..0995b462
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/PendingDownloadButtonView.swift
@@ -0,0 +1,23 @@
+//
+//  PendingDownloadButtonView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+struct PendingDownloadButtonView: View {
+    let isOtherDownloading: Bool
+    let formattedSize: String
+    let onDownload: () -> Void
+    
+    var body: some View {
+        Button(action: onDownload) {
+            Image(systemName: "arrow.down.circle.fill")
+                .font(.title2)
+                .foregroundColor(isOtherDownloading ? .secondary : .primaryPurple)
+        }
+        .disabled(isOtherDownloading)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagChip.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagChip.swift
new file mode 100644
index 00000000..788e5db3
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagChip.swift
@@ -0,0 +1,25 @@
+//
+//  TagChip.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+// MARK: - 标签芯片
+struct TagChip: View {
+    let text: String
+    
+    var body: some View {
+        Text(TagTranslationManager.shared.getLocalizedTag(text))
+            .font(.caption)
+            .foregroundColor(.secondary)
+            .padding(.horizontal, 8)
+            .padding(.vertical, 3)
+            .background(
+                RoundedRectangle(cornerRadius: 8)
+                    .stroke(Color.secondary.opacity(0.3), lineWidth: 0.5)
+            )
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagsView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagsView.swift
new file mode 100644
index 00000000..c076a007
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowSubviews/TagsView.swift
@@ -0,0 +1,25 @@
+//
+//  TagsView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+import SwiftUI
+
+// MARK: - 标签视图
+struct TagsView: View {
+    let tags: [String]
+    
+    var body: some View {
+        ScrollView(.horizontal, showsIndicators: false) {
+            HStack(spacing: 6) {
+                ForEach(tags, id: \.self) { tag in
+                    TagChip(text: tag)
+                }
+            }
+            .padding(.horizontal, 1)
+        }
+        .frame(height: 25)
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowView.swift
new file mode 100644
index 00000000..7f75cfb5
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/ModelRowView.swift
@@ -0,0 +1,107 @@
+//
+//  ModelRowView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import SwiftUI
+
+struct ModelRowView: View {
+    
+    let model: ModelInfo
+    @ObservedObject var viewModel: ModelListViewModel
+    
+    let downloadProgress: Double
+    let isDownloading: Bool
+    let isOtherDownloading: Bool
+    let onDownload: () -> Void
+    
+    @State private var showDeleteAlert = false
+    
+    private var localizedTags: [String] {
+        model.localizedTags
+    }
+    
+    private var formattedSize: String {
+        model.formattedSize
+    }
+    
+    var body: some View {
+        HStack(alignment: .center, spacing: 0) {
+            
+            ModelIconView(modelId: model.id)
+                .frame(width: 40, height: 40)
+            
+            VStack(alignment: .leading, spacing: 6) {
+                
+                Text(model.modelName)
+                    .font(.headline)
+                    .fontWeight(.semibold)
+                    .lineLimit(1)
+                
+                if !localizedTags.isEmpty {
+                    TagsView(tags: localizedTags)
+                }
+
+                 HStack(alignment: .center, spacing: 2) {
+                    Image(systemName: "folder")
+                        .font(.caption)
+                        .fontWeight(.medium)
+                        .foregroundColor(.gray)
+                        .frame(width: 20, height: 20)
+                    
+                    Text(formattedSize)
+                        .font(.caption)
+                        .fontWeight(.medium)
+                        .foregroundColor(.gray)
+                }
+            }
+            .padding(.leading, 8)
+            
+            Spacer()
+            
+            VStack {
+                Spacer()
+                ActionButtonsView(
+                    model: model,
+                    viewModel: viewModel,
+                    downloadProgress: downloadProgress,
+                    isDownloading: isDownloading,
+                    isOtherDownloading: isOtherDownloading,
+                    formattedSize: formattedSize,
+                    onDownload: onDownload,
+                    showDeleteAlert: $showDeleteAlert
+                )
+                Spacer()
+            }
+        }
+        .padding(.vertical, 8)
+        .contentShape(Rectangle())
+        .onTapGesture {
+            handleRowTap()
+        }
+        .alert(LocalizedStringKey("alert.deleteModel.title"), isPresented: $showDeleteAlert) {
+            Button("Delete", role: .destructive) {
+                Task {
+                    await viewModel.deleteModel(model)
+                }
+            }
+            Button("Cancel", role: .cancel) { }
+        } message: {
+            Text(LocalizedStringKey("alert.deleteModel.message"))
+        }
+    }
+    
+    private func handleRowTap() {
+        if model.isDownloaded {
+            return
+        } else if isDownloading {
+            Task {
+                await viewModel.cancelDownload()
+            }
+        } else if !isOtherDownloading {
+            onDownload()
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/SearchBar.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SearchBar.swift
similarity index 89%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/SearchBar.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SearchBar.swift
index 94c80e49..6acdb8a5 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/SearchBar.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SearchBar.swift
@@ -14,8 +14,10 @@ struct SearchBar: View {
         HStack {
             Image(systemName: "magnifyingglass")
                 .foregroundColor(.gray)
+                .padding(.horizontal, 10)
             
             TextField("Search models...", text: $text)
+                .font(.system(size: 12, weight: .regular))
                 .textFieldStyle(RoundedBorderTextFieldStyle())
                 .autocapitalization(.none)
                 .disableAutocorrection(true)
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SwipeActionsView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SwipeActionsView.swift
new file mode 100644
index 00000000..7a0d4f80
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/SwipeActionsView.swift
@@ -0,0 +1,40 @@
+//
+//  SwipeActionsView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/3.
+//
+
+
+import SwiftUI
+
+struct SwipeActionsView: View {
+    let model: ModelInfo
+    @ObservedObject var viewModel: ModelListViewModel
+    
+    var body: some View {
+        if viewModel.pinnedModelIds.contains(model.id) {
+            Button {
+                viewModel.unpinModel(model)
+            } label: {
+                Label(LocalizedStringKey("button.unpin"), systemImage: "pin.slash")
+            }.tint(.gray)
+        } else {
+            Button {
+                viewModel.pinModel(model)
+            } label: {
+                Label(LocalizedStringKey("button.pin"), systemImage: "pin")
+            }.tint(.primaryBlue)
+        }
+        if model.isDownloaded {
+            Button(role: .destructive) {
+                Task {
+                    await viewModel.deleteModel(model)
+                }
+            } label: {
+                Label("Delete", systemImage: "trash")
+            }
+            .tint(.primaryRed)
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/WebView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/WebView.swift
similarity index 100%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/WebView.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/ModelList/Views/WebView.swift
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSource.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSource.swift
similarity index 77%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSource.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSource.swift
index 368ae80e..8a55a346 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSource.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSource.swift
@@ -1,5 +1,5 @@
 //
-//  ModelDownloadStorage.swift
+//  ModelSource.swift
 //  MNNLLMiOS
 //
 //  Created by 游薪渝(揽清) on 2025/2/20.
@@ -7,11 +7,13 @@
 
 import Foundation
 
-public enum ModelSource: String, CaseIterable {
+public enum ModelSource: String, CaseIterable, Identifiable {
     case modelScope = "ModelScope"
-    case huggingFace = "Hugging Face"
+    case huggingFace = "HuggingFace"
     case modeler = "Modeler"
     
+    public var id: Self { self }
+    
     var description: String {
         switch self {
         case .modelScope:
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSourceManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSourceManager.swift
similarity index 97%
rename from apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSourceManager.swift
rename to apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSourceManager.swift
index c7cc0e88..5a4560f2 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelSourceManager.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Source/ModelSourceManager.swift
@@ -1,5 +1,5 @@
 //
-//  ModelDownloadStorage.swift
+//  ModelSourceManager.swift
 //  MNNLLMiOS
 //
 //  Created by 游薪渝(揽清) on 2025/2/20.
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Storage/ModelStorageManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Storage/ModelStorageManager.swift
new file mode 100644
index 00000000..73f2a5e8
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/Storage/ModelStorageManager.swift
@@ -0,0 +1,101 @@
+//
+//  ModelStorageManager.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/1/10.
+//
+
+import Foundation
+
+class ModelStorageManager {
+    static let shared = ModelStorageManager()
+    
+    private let userDefaults = UserDefaults.standard
+    private let downloadedModelsKey = "com.mnnllm.downloadedModels"
+    private let lastUsedModelKey = "com.mnnllm.lastUsedModels"
+    private let cachedSizesKey = "com.mnnllm.cachedSizes"
+    
+    private init() {}
+    
+    var lastUsedModels: [String: Date] {
+        get {
+            userDefaults.dictionary(forKey: lastUsedModelKey) as? [String: Date] ?? [:]
+        }
+        set {
+            userDefaults.set(newValue, forKey: lastUsedModelKey)
+        }
+    }
+    
+    func updateLastUsed(for modelName: String) {
+        var models = lastUsedModels
+        models[modelName] = Date()
+        lastUsedModels = models
+    }
+    
+    func getLastUsed(for modelName: String) -> Date? {
+        return lastUsedModels[modelName]
+    }
+    
+    var downloadedModels: [String] {
+        get {
+            userDefaults.array(forKey: downloadedModelsKey) as? [String] ?? []
+        }
+        set {
+            userDefaults.set(newValue, forKey: downloadedModelsKey)
+        }
+    }
+    
+    func clearDownloadStatus(for modelName: String) {
+        var models = downloadedModels
+        models.removeAll { $0 == modelName }
+        downloadedModels = models
+    }
+    
+    func isModelDownloaded(_ modelName: String) -> Bool {
+        downloadedModels.contains(modelName)
+    }
+    
+    func markModelAsDownloaded(_ modelName: String) {
+        var models = downloadedModels
+        if !models.contains(modelName) {
+            models.append(modelName)
+            downloadedModels = models
+        }
+    }
+    
+    func markModelAsNotDownloaded(_ modelName: String) {
+        var models = downloadedModels
+        models.removeAll { $0 == modelName }
+        downloadedModels = models
+        
+        // Also clear cached size when model is marked as not downloaded
+        clearCachedSize(for: modelName)
+    }
+    
+    // MARK: - Cached Size Management
+    
+    var cachedSizes: [String: Int64] {
+        get {
+            userDefaults.dictionary(forKey: cachedSizesKey) as? [String: Int64] ?? [:]
+        }
+        set {
+            userDefaults.set(newValue, forKey: cachedSizesKey)
+        }
+    }
+    
+    func getCachedSize(for modelName: String) -> Int64? {
+        return cachedSizes[modelName]
+    }
+    
+    func setCachedSize(_ size: Int64, for modelName: String) {
+        var sizes = cachedSizes
+        sizes[modelName] = size
+        cachedSizes = sizes
+    }
+    
+    func clearCachedSize(for modelName: String) {
+        var sizes = cachedSizes
+        sizes.removeValue(forKey: modelName)
+        cachedSizes = sizes
+    }
+}
\ No newline at end of file
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/TagTranslation/TagTranslationManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/TagTranslation/TagTranslationManager.swift
new file mode 100644
index 00000000..713b47a7
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/MainTab/Services/TagTranslation/TagTranslationManager.swift
@@ -0,0 +1,29 @@
+//
+//  TagTranslationManager.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/4.
+//
+
+import Foundation
+
+class TagTranslationManager {
+    static let shared = TagTranslationManager()
+    private var tagTranslations: [String: String] = [:]
+    
+    private init() {}
+    
+    func loadTagTranslations(_ translations: [String: String]) {
+        tagTranslations = translations
+    }
+    
+    func getLocalizedTag(_ tag: String) -> String {
+        let currentLanguage = LanguageManager.shared.currentLanguage
+        let isChineseLanguage = currentLanguage == "zh-Hans"
+        
+        if isChineseLanguage, let translation = tagTranslations[tag] {
+            return translation
+        }
+        return tag
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelInfo.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelInfo.swift
deleted file mode 100644
index 80f3b775..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelInfo.swift
+++ /dev/null
@@ -1,43 +0,0 @@
-//
-//  ModelClient.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/3.
-//
-
-import Hub
-import Foundation
-
-struct ModelInfo: Codable {
-    let modelId: String
-    let createdAt: String
-    let downloads: Int
-    let tags: [String]
-    
-    var name: String {
-        modelId.removingTaobaoPrefix()
-    }
-    
-    var isDownloaded: Bool = false
-    
-    var localPath: String {
-        return HubApi.shared.localRepoLocation(HubApi.Repo.init(id: modelId)).path
-    }
-    
-    private enum CodingKeys: String, CodingKey {
-        case modelId
-        case tags
-        case downloads
-        case createdAt
-    }
-}
-
-struct RepoInfo: Codable {
-    let modelId: String
-    let sha: String
-    let siblings: [Sibling]
-
-    struct Sibling: Codable {
-        let rfilename: String
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelListViewModel.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelListViewModel.swift
deleted file mode 100644
index da94d670..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelListViewModel.swift
+++ /dev/null
@@ -1,155 +0,0 @@
-//
-//  ModelListViewModel.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/3.
-//
-
-import Foundation
-
-@MainActor
-class ModelListViewModel: ObservableObject {
-    @Published private(set) var models: [ModelInfo] = []
-    @Published private(set) var downloadProgress: [String: Double] = [:]
-    @Published private(set) var currentlyDownloading: String?
-    @Published var showError = false
-    @Published var errorMessage = ""
-    @Published var searchText = ""
-    
-    @Published var selectedModel: ModelInfo?
-    
-    private let modelClient = ModelClient()
-    
-    var filteredModels: [ModelInfo] {
-        
-        let filteredModels = searchText.isEmpty ? models : models.filter { model in
-            model.modelId.localizedCaseInsensitiveContains(searchText) ||
-            model.tags.contains { $0.localizedCaseInsensitiveContains(searchText) }
-        }
-        
-        let downloadedModels = filteredModels.filter { $0.isDownloaded }
-        let notDownloadedModels = filteredModels.filter { !$0.isDownloaded }
-        
-        return downloadedModels + notDownloadedModels
-    }
-    
-    init() {
-        Task {
-            await fetchModels()
-        }
-    }
-    
-    func fetchModels() async {
-        do {
-            var fetchedModels = try await modelClient.getModelList()
-            
-            let hasDiffusionModels = fetchedModels.contains { 
-                $0.name.lowercased().contains("diffusion") 
-            }
-            
-            if hasDiffusionModels {
-                fetchedModels = fetchedModels.filter { model in
-                    let name = model.name.lowercased()
-                    let tags = model.tags.map { $0.lowercased() }
-                    
-                    // only show gpu diffusion
-                    if name.contains("diffusion") {
-                        return name.contains("gpu") || tags.contains { $0.contains("gpu") }
-                    }
-                    
-                    return true
-                }
-            }
-            
-            for i in 0..<fetchedModels.count {
-                fetchedModels[i].isDownloaded = ModelStorageManager.shared.isModelDownloaded(fetchedModels[i].modelId)
-            }
-            models = fetchedModels
-        } catch {
-            showError = true
-            errorMessage = "Error: \(error.localizedDescription)"
-        }
-    }
-    
-    func selectModel(_ model: ModelInfo) {
-        if model.isDownloaded {
-            selectedModel = model
-        } else {
-            Task {
-                await downloadModel(model)
-            }
-        }
-    }
-    
-    func downloadModel(_ model: ModelInfo) async {
-        guard currentlyDownloading == nil else { return }
-        
-        currentlyDownloading = model.modelId
-        downloadProgress[model.modelId] = 0
-        Task(priority: .background) {
-            do {
-                try await modelClient.downloadModel(model: model) { progress in
-                    Task { @MainActor in
-                        DispatchQueue.main.async {
-                            self.downloadProgress[model.modelId] = progress
-                        }
-                    }
-                }
-                
-                if let index = models.firstIndex(where: { $0.modelId == model.modelId }) {
-                    models[index].isDownloaded = true
-                    DispatchQueue.main.async {
-                        ModelStorageManager.shared.markModelAsDownloaded(model.modelId)
-                    }
-                }
-            } catch {
-                showError = true
-                errorMessage = "Failed to download model: \(error.localizedDescription)"
-            }
-        
-            currentlyDownloading = nil
-        }
-    }
-    
-    func deleteModel(_ model: ModelInfo) async {
-        do {
-            let fileManager = FileManager.default
-            let modelPath = URL.init(filePath: model.localPath)
-            
-            // 获取模型目录下的所有文件
-            if let files = try? fileManager.contentsOfDirectory(
-                at: modelPath,
-                includingPropertiesForKeys: nil,
-                options: [.skipsHiddenFiles]
-            ) {
-                // 清理每个文件的下载状态
-                let storage = ModelDownloadStorage()
-                for file in files {
-                    storage.clearFileStatus(at: file.path)
-                }
-            }
-            
-            // 删除文件
-            if fileManager.fileExists(atPath: modelPath.path) {
-                try fileManager.removeItem(at: modelPath)
-            }
-            
-            await MainActor.run {
-                if let index = models.firstIndex(where: { $0.modelId == model.modelId }) {
-                    models[index].isDownloaded = false
-                    ModelStorageManager.shared.clearDownloadStatus(for: model.modelId)
-                }
-                if selectedModel?.modelId == model.modelId {
-                    selectedModel = nil
-                }
-            }
-            
-        } catch {
-            print("Error deleting model: \(error)")
-            await MainActor.run {
-                self.errorMessage = "Failed to delete model: \(error.localizedDescription)"
-                self.showError = true
-            }
-        }
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelStorageManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelStorageManager.swift
deleted file mode 100644
index bdb20c0f..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Models/ModelStorageManager.swift
+++ /dev/null
@@ -1,44 +0,0 @@
-//
-//  ModelStorageManager.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/10.
-//
-
-import Foundation
-
-class ModelStorageManager {
-    static let shared = ModelStorageManager()
-    
-    private let userDefaults = UserDefaults.standard
-    private let downloadedModelsKey = "com.mnnllm.downloadedModels"
-    
-    private init() {}
-    
-    var downloadedModels: [String] {
-        get {
-            userDefaults.array(forKey: downloadedModelsKey) as? [String] ?? []
-        }
-        set {
-            userDefaults.set(newValue, forKey: downloadedModelsKey)
-        }
-    }
-    
-    func clearDownloadStatus(for modelId: String) {
-        var models = downloadedModels
-        models.removeAll { $0 == modelId }
-        downloadedModels = models
-    }
-    
-    func isModelDownloaded(_ modelId: String) -> Bool {
-        downloadedModels.contains(modelId)
-    }
-    
-    func markModelAsDownloaded(_ modelId: String) {
-        var models = downloadedModels
-        if !models.contains(modelId) {
-            models.append(modelId)
-            downloadedModels = models
-        }
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelClient.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelClient.swift
deleted file mode 100644
index f4c5b855..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Network/ModelClient.swift
+++ /dev/null
@@ -1,110 +0,0 @@
-//
-//  ModelClient.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/3.
-//
-
-import Hub
-import Foundation
-
-class ModelClient {
-    private let baseMirrorURL = "https://hf-mirror.com"
-    private let baseURL = "https://huggingface.co"
-    private let maxRetries = 5
-    
-    private lazy var baseURLString: String = {
-        switch ModelSourceManager.shared.selectedSource {
-        case .huggingFace:
-            return baseURL
-        default:
-            return baseMirrorURL
-        }
-    }()
-    
-    init() {}
-    
-    func getModelList() async throws -> [ModelInfo] {
-        let url = URL(string: "\(baseURLString)/api/models?author=taobao-mnn&limit=100")!
-        return try await performRequest(url: url, retries: maxRetries)
-    }
-    
-    func getRepoInfo(repoName: String, revision: String) async throws -> RepoInfo {
-        let url = URL(string: "\(baseURLString)/api/models/\(repoName)")!
-        return try await performRequest(url: url, retries: maxRetries)
-    }
-
-    @MainActor
-    func downloadModel(model: ModelInfo,
-                       progress: @escaping (Double) -> Void) async throws {
-        switch ModelSourceManager.shared.selectedSource {
-        case .modelScope, .modeler:
-            try await downloadFromModelScope(model, progress: progress)
-        case .huggingFace:
-            try await downloadFromHuggingFace(model, progress: progress)
-        }
-    }
-
-    private func downloadFromModelScope(_ model: ModelInfo,
-                                        progress: @escaping (Double) -> Void) async throws {
-        let ModelScopeId = model.modelId.replacingOccurrences(of: "taobao-mnn", with: "MNN")
-        let config = URLSessionConfiguration.default
-        config.timeoutIntervalForRequest = 30
-        config.timeoutIntervalForResource = 300
-        
-        let manager = ModelScopeDownloadManager.init(repoPath: ModelScopeId, config: config, enableLogging: true, source: ModelSourceManager.shared.selectedSource)
-        
-        try await manager.downloadModel(to:"huggingface/models/taobao-mnn", modelId: ModelScopeId, modelName: model.name) { fileProgress in
-            progress(fileProgress)
-        }
-    }
-
-    private func downloadFromHuggingFace(_ model: ModelInfo,
-                                         progress: @escaping (Double) -> Void) async throws {
-        let repo = Hub.Repo(id: model.modelId)
-        let modelFiles = ["*.*"]
-        let mirrorHubApi = HubApi(endpoint: baseURL)
-        try await mirrorHubApi.snapshot(from: repo, matching: modelFiles) { fileProgress in
-            progress(fileProgress.fractionCompleted)
-        }
-    }
-    
-    private func performRequest<T: Decodable>(url: URL, retries: Int = 3) async throws -> T {
-        var lastError: Error?
-        
-        for attempt in 1...retries {
-            do {
-                var request = URLRequest(url: url)
-                request.setValue("application/json", forHTTPHeaderField: "Accept")
-                
-                let (data, response) = try await URLSession.shared.data(for: request)
-                
-                guard let httpResponse = response as? HTTPURLResponse else {
-                    throw NetworkError.invalidResponse
-                }
-                
-                if httpResponse.statusCode == 200 {
-                    return try JSONDecoder().decode(T.self, from: data)
-                }
-                
-                throw NetworkError.invalidResponse
-                
-            } catch {
-                lastError = error
-                if attempt < retries {
-                    try await Task.sleep(nanoseconds: UInt64(pow(2.0, Double(attempt)) * 1_000_000_000))
-                    continue
-                }
-            }
-        }
-        
-        throw lastError ?? NetworkError.unknown
-    }
-}
-
-enum NetworkError: Error {
-    case invalidResponse
-    case invalidData
-    case downloadFailed
-    case unknown
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelListView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelListView.swift
deleted file mode 100644
index 57ae1be3..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelListView.swift
+++ /dev/null
@@ -1,202 +0,0 @@
-//
-//  ModelListView.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/3.
-//
-
-import SwiftUI
-
-struct ModelListView: View {
-    
-    @State private var scrollOffset: CGFloat = 0
-    @State private var showHelp = false
-    @State private var showUserGuide = false
-    @State private var showHistory = false
-    @State private var selectedHistory: ChatHistory?
-    @State private var histories: [ChatHistory] = []
-    @State private var showSettings = false
-    @State private var showWebView = false
-    @State private var webViewURL: URL?
-    
-    @StateObject private var viewModel = ModelListViewModel()
-    
-    var body: some View {
-        ZStack {
-            NavigationView {
-                List {
-                    SearchBar(text: $viewModel.searchText)
-                        .listRowInsets(EdgeInsets())
-                        .listRowSeparator(.hidden)
-                        .padding(.horizontal)
-                        
-                    ForEach(viewModel.filteredModels, id: \.modelId) { model in
-                        ModelRowView(model: model,
-                                   downloadProgress: viewModel.downloadProgress[model.modelId] ?? 0,
-                                   isDownloading: viewModel.currentlyDownloading == model.modelId,
-                                   isOtherDownloading: viewModel.currentlyDownloading != nil) {
-                            if model.isDownloaded {
-                                viewModel.selectModel(model)
-                            } else {
-                                Task {
-                                    await viewModel.downloadModel(model)
-                                }
-                            }
-                        }
-                        .swipeActions(edge: .trailing, allowsFullSwipe: false) {
-                            if model.isDownloaded {
-                                Button(role: .destructive) {
-                                    Task {
-                                        await viewModel.deleteModel(model)
-                                    }
-                                } label: {
-                                    Label("Delete", systemImage: "trash")
-                                }
-                            }
-                        }
-                    }
-                }
-                .listStyle(.plain)
-                .navigationTitle("Models")
-                .navigationBarTitleDisplayMode(.large)
-                .navigationBarItems(
-                    leading: Button(action: {
-                        showHistory.toggle()
-                        updateHistory()
-                    }) {
-                        Image(systemName: "clock.arrow.circlepath")
-                            .resizable()
-                            .aspectRatio(contentMode: .fit)
-                            .frame(width: 22, height: 22)
-                    },
-                    trailing: settingsButton
-                )
-                .sheet(isPresented: $showHelp) {
-                    HelpView()
-                }
-                .sheet(isPresented: $showWebView) {
-                    if let url = webViewURL {
-                        WebView(url: url)
-                    }
-                }
-                .refreshable {
-                    await viewModel.fetchModels()
-                }
-                .alert("Error", isPresented: $viewModel.showError) {
-                    Button("OK", role: .cancel) {}
-                } message: {
-                    Text(viewModel.errorMessage)
-                }
-                .background(
-                    NavigationLink(
-                        destination: {
-                            if let selectedModel = viewModel.selectedModel {
-                                return AnyView(LLMChatView(modelInfo: selectedModel))
-                            } else if let selectedHistory = selectedHistory {
-                                return AnyView(LLMChatView(modelInfo: ModelInfo(
-                                    modelId: selectedHistory.modelId,
-                                    createdAt: selectedHistory.createdAt.formatAgo(),
-                                    downloads: 0,
-                                    tags: [],
-                                    isDownloaded: true
-                                ), history: selectedHistory))
-                            }
-                            return AnyView(EmptyView())
-                        }(),
-                        isActive: Binding(
-                            get: { viewModel.selectedModel != nil || selectedHistory != nil },
-                            set: { if !$0 { viewModel.selectedModel = nil; selectedHistory = nil } }
-                        )
-                    ) {
-                        EmptyView()
-                    }
-                )
-                .onAppear {
-                    checkFirstLaunch()
-                }
-                .alert(isPresented: $showUserGuide) {
-                    Alert(
-                        title: Text("User Guide"),
-                        message: Text("""
-                        This is a local large model application that requires certain performance from your device.
-                        It is recommended to choose different model sizes based on your device's memory. 
-                        
-                        The model recommendations for iPhone are as follows:
-                        - For 8GB of RAM, models up to 8B are recommended (e.g., iPhone 16 Pro).
-                        - For 6GB of RAM, models up to 3B are recommended (e.g., iPhone 15 Pro).
-                        - For 4GB of RAM, models up to 1B or smaller are recommended (e.g., iPhone 13).
-                        
-                        Choosing a model that is too large may cause insufficient memory and crashes.
-                        """),
-                        dismissButton: .default(Text("OK"))
-                    )
-                }
-            }
-            .disabled(showHistory)
-            
-            
-            if showHistory {
-                Color.black.opacity(0.5)
-                    .edgesIgnoringSafeArea(.all)
-                    .onTapGesture {
-                        withAnimation {
-                            showHistory = false
-                        }
-                    }
-            }
-            
-            SideMenuView(isOpen: $showHistory, selectedHistory: $selectedHistory, histories: $histories)
-                .edgesIgnoringSafeArea(.all)
-        }
-        .onAppear {
-            updateHistory()
-        }
-        .actionSheet(isPresented: $showSettings) {
-            ActionSheet(title: Text("Settings"), buttons: [
-                .default(Text("Report an Issue")) {
-                    webViewURL = URL(string: "https://github.com/alibaba/MNN/issues")
-                    showWebView = true
-                },
-                .default(Text("Go to MNN Homepage")) {
-                    webViewURL = URL(string: "https://github.com/alibaba/MNN")
-                    showWebView = true
-                },
-                .default(Text(ModelSource.modelScope.description)) {
-                    ModelSourceManager.shared.updateSelectedSource(.modelScope)
-                },
-                .default(Text(ModelSource.modeler.description)) {
-                    ModelSourceManager.shared.updateSelectedSource(.modeler)
-                },
-                .default(Text(ModelSource.huggingFace.description)) {
-                    ModelSourceManager.shared.updateSelectedSource(.huggingFace)
-                },
-                .cancel()
-            ])
-        }
-    }
-    
-    private func updateHistory() {
-        histories = ChatHistoryManager.shared.getAllHistory()
-    }
-    
-    private func checkFirstLaunch() {
-        let hasLaunchedBefore = UserDefaults.standard.bool(forKey: "hasLaunchedBefore")
-        if !hasLaunchedBefore {
-            // Show the user guide alert
-            showUserGuide = true
-            // Set the flag to true so it doesn't show again
-            UserDefaults.standard.set(true, forKey: "hasLaunchedBefore")
-        }
-    }
-    
-    private var settingsButton: some View {
-        Button(action: {
-            showSettings.toggle()
-        }) {
-            Image(systemName: "gear")
-                .resizable()
-                .aspectRatio(contentMode: .fit)
-                .frame(width: 22, height: 22)
-        }
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelRowView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelRowView.swift
deleted file mode 100644
index c0feeaf6..00000000
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/ModelList/Views/ModelRowView.swift
+++ /dev/null
@@ -1,60 +0,0 @@
-//
-//  ModelRowView.swift
-//  MNNLLMiOS
-//
-//  Created by 游薪渝(揽清) on 2025/1/3.
-//
-
-import SwiftUI
-
-struct ModelRowView: View {
-    
-    let model: ModelInfo
-    let downloadProgress: Double
-    let isDownloading: Bool
-    let isOtherDownloading: Bool
-    let onDownload: () -> Void
-    
-    var body: some View {
-        HStack(alignment: .top) {
-            
-            ModelIconView(modelId: model.modelId)
-                .frame(width: 50, height: 50)
-            
-            VStack(alignment: .leading, spacing: 8) {
-                Text(model.name)
-                    .font(.headline)
-                    .lineLimit(1)
-                
-                if !model.tags.isEmpty {
-                    ScrollView(.horizontal, showsIndicators: false) {
-                        HStack {
-                            ForEach(model.tags, id: \.self) { tag in
-                                Text(tag)
-                                    .font(.caption)
-                                    .padding(.horizontal, 8)
-                                    .padding(.vertical, 4)
-                                    .background(Color.blue.opacity(0.1))
-                                    .cornerRadius(8)
-                            }
-                        }
-                    }
-                }
-                
-                if isDownloading {
-                    ProgressView(value: downloadProgress) {
-                        Text(String(format: "%.2f%%", downloadProgress * 100))
-                            .font(.system(size: 14, weight: .regular, design: .default))
-                    }
-                } else {
-                    Button(action: onDownload) {
-                        Label(model.isDownloaded ? "Chat" : "Download",
-                              systemImage: model.isDownloaded ? "message" : "arrow.down.circle")
-                        .font(.system(size: 14, weight: .medium, design: .default))
-                    }
-                    .disabled(isOtherDownloading)
-                }
-            }
-        }
-    }
-}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Resources/mock.json b/apps/iOS/MNNLLMChat/MNNLLMiOS/Resources/mock.json
new file mode 100644
index 00000000..73bb7325
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Resources/mock.json
@@ -0,0 +1,1485 @@
+{
+  "tagTranslations": {
+    "Chat": "对话",
+    "TextGeneration": "文本生成",
+    "Multimodal": "多模态",
+    "ImageUnderstanding": "图片理解",
+    "VideoUnderstanding": "视频理解",
+    "AudioUnderstanding": "音频理解",
+    "CodeGeneration": "代码生成",
+    "Math": "数学",
+    "DocumentUnderstanding": "文档理解",
+    "TextToImage": "文生图",
+    "DeepThinking": "深度思考"
+  },
+  "quickFilterTags": ["ImageUnderstanding", "TextToImage", "AudioUnderstanding", "DeepThinking"],
+  "models": [
+    {
+      "modelName": "gemma-3-1b-it-qat-q4_0-gguf-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 4,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/gemma-3-1b-it-qat-q4_0-gguf-MNN",
+         "ModelScope": "MNN/gemma-3-1b-it-qat-q4_0-gguf-MNN"
+        }
+    },
+    {
+      "modelName": "Qwen3-4B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 4,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-4B-MNN",
+         "ModelScope": "MNN/Qwen3-4B-MNN"
+        }
+    },
+    {
+      "modelName": "Qwen3-0.6B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration",
+        "DeepThinking"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 0.6,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-0.6B-MNN",
+         "ModelScope": "MNN/Qwen3-0.6B-MNN",
+        "Modelers": "MNN/Qwen3-0.6B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Omni-3B-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 3,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Omni-3B-MNN",
+         "ModelScope": "MNN/Qwen2.5-Omni-3B-MNN",
+        "Modelers": "MNN/Qwen2.5-Omni-3B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen3-1.7B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 1.7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-1.7B-MNN",
+         "ModelScope": "MNN/Qwen3-1.7B-MNN",
+        "Modelers": "MNN/Qwen3-1.7B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen3-8B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 8,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-8B-MNN",
+         "ModelScope": "MNN/Qwen3-8B-MNN",
+        "Modelers": "MNN/Qwen3-8B-MNN"
+      }
+    },
+    {
+      "modelName": "DeepSeek-R1-0528-Qwen3-8B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "deepseek"
+      ],
+      "size_gb": 8,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/DeepSeek-R1-0528-Qwen3-8B-MNN",
+         "ModelScope": "MNN/DeepSeek-R1-0528-Qwen3-8B-MNN",
+        "Modelers": "MNN/DeepSeek-R1-0528-Qwen3-8B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen3-30B-A3B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 30,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-30B-A3B-MNN",
+         "ModelScope": "MNN/Qwen3-30B-A3B-MNN",
+        "Modelers": "MNN/Qwen3-30B-A3B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Omni-7B-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Omni-7B-MNN",
+         "ModelScope": "MNN/Qwen2.5-Omni-7B-MNN",
+        "Modelers": "MNN/Qwen2.5-Omni-7B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen3-32B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 32,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-32B-MNN",
+         "ModelScope": "MNN/Qwen3-32B-MNN",
+        "Modelers": "MNN/Qwen3-32B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen3-14B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "recommended",
+        "qwen"
+      ],
+      "size_gb": 14,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen3-14B-MNN",
+         "ModelScope": "MNN/Qwen3-14B-MNN",
+        "Modelers": "MNN/Qwen3-14B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-0.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "qwen"
+      ],
+      "size_gb": 0.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-0.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-0.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-0.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-1.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "qwen"
+      ],
+      "size_gb": 1.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-1.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-1.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-1.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "DeepSeek-R1-1.5B-Qwen-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "deepseek"
+      ],
+      "size_gb": 1.5,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/DeepSeek-R1-1.5B-Qwen-MNN",
+         "ModelScope": "MNN/DeepSeek-R1-1.5B-Qwen-MNN",
+        "Modelers": "MNN/DeepSeek-R1-1.5B-Qwen-MNN"
+      }
+    },
+    {
+      "modelName": "gemma-2-2b-it-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "gemma"
+      ],
+      "size_gb": 2,
+      "vendor": "Gemma",
+      "sources": {
+        "HuggingFace": "taobao-mnn/gemma-2-2b-it-MNN",
+         "ModelScope": "MNN/gemma-2-2b-it-MNN",
+        "Modelers": "MNN/gemma-2-2b-it-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-3B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "good",
+        "qwen"
+      ],
+      "size_gb": 3,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-3B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-3B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-3B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "DeepSeek-R1-7B-Qwen-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "deepseek"
+      ],
+      "size_gb": 7,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/DeepSeek-R1-7B-Qwen-MNN",
+         "ModelScope": "MNN/DeepSeek-R1-7B-Qwen-MNN",
+        "Modelers": "MNN/DeepSeek-R1-7B-Qwen-MNN"
+      }
+    },
+    {
+      "modelName": "deepseek-vl-7b-chat-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "deepseek"
+      ],
+      "size_gb": 7,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/deepseek-vl-7b-chat-MNN",
+         "ModelScope": "MNN/deepseek-vl-7b-chat-MNN",
+        "Modelers": "MNN/deepseek-vl-7b-chat-MNN"
+      }
+    },
+    {
+      "modelName": "deepseek-llm-7b-chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "deepseek"
+      ],
+      "size_gb": 7,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/deepseek-llm-7b-chat-MNN",
+         "ModelScope": "MNN/deepseek-llm-7b-chat-MNN",
+        "Modelers": "MNN/deepseek-llm-7b-chat-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-VL-3B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 3,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-VL-3B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-VL-3B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-VL-3B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM2-500M-Video-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "VideoUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.5,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM2-500M-Video-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM2-500M-Video-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM2-500M-Video-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "InternVL2_5-1B-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "internvl"
+      ],
+      "size_gb": 1,
+      "vendor": "InternLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/InternVL2_5-1B-MNN",
+         "ModelScope": "MNN/InternVL2_5-1B-MNN",
+        "Modelers": "MNN/InternVL2_5-1B-MNN"
+      }
+    },
+    {
+      "modelName": "Llama-3.2-3B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 3,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Llama-3.2-3B-Instruct-MNN",
+         "ModelScope": "MNN/Llama-3.2-3B-Instruct-MNN",
+        "Modelers": "MNN/Llama-3.2-3B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM2-2.2B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 2.2,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM2-2.2B-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM2-2.2B-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM2-2.2B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "gemma-2-9b-it-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "gemma"
+      ],
+      "size_gb": 9,
+      "vendor": "Gemma",
+      "sources": {
+        "HuggingFace": "taobao-mnn/gemma-2-9b-it-MNN",
+         "ModelScope": "MNN/gemma-2-9b-it-MNN",
+        "Modelers": "MNN/gemma-2-9b-it-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM2-256M-Video-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "VideoUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.256,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM2-256M-Video-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM2-256M-Video-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM2-256M-Video-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "FastVLM-1.5B-Stage3-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "fastvlm"
+      ],
+      "size_gb": 1.5,
+      "vendor": "FastVLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/FastVLM-1.5B-Stage3-MNN",
+         "ModelScope": "MNN/FastVLM-1.5B-Stage3-MNN",
+        "Modelers": "MNN/FastVLM-1.5B-Stage3-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM-500M-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.5,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM-500M-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM-500M-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM-500M-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen-VL-Chat-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": null,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen-VL-Chat-MNN",
+         "ModelScope": "MNN/Qwen-VL-Chat-MNN",
+        "Modelers": "MNN/Qwen-VL-Chat-MNN"
+      }
+    },
+    {
+      "modelName": "SmolLM2-360M-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.36,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolLM2-360M-Instruct-MNN",
+         "ModelScope": "MNN/SmolLM2-360M-Instruct-MNN",
+        "Modelers": "MNN/SmolLM2-360M-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM-256M-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.256,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM-256M-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM-256M-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM-256M-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "FastVLM-0.5B-Stage3-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "fastvlm"
+      ],
+      "size_gb": 0.5,
+      "vendor": "FastVLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/FastVLM-0.5B-Stage3-MNN",
+         "ModelScope": "MNN/FastVLM-0.5B-Stage3-MNN",
+        "Modelers": "MNN/FastVLM-0.5B-Stage3-MNN"
+      }
+    },
+    {
+      "modelName": "Yi-6B-Chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "yi"
+      ],
+      "size_gb": 6,
+      "vendor": "01.AI",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Yi-6B-Chat-MNN",
+         "ModelScope": "MNN/Yi-6B-Chat-MNN",
+        "Modelers": "MNN/Yi-6B-Chat-MNN"
+      }
+    },
+    {
+      "modelName": "SmolVLM-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": null,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolVLM-Instruct-MNN",
+         "ModelScope": "MNN/SmolVLM-Instruct-MNN",
+        "Modelers": "MNN/SmolVLM-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "MiMo-7B-RL-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mimo"
+      ],
+      "size_gb": 7,
+      "vendor": "MiMo",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MiMo-7B-RL-MNN",
+         "ModelScope": "MNN/MiMo-7B-RL-MNN",
+        "Modelers": "MNN/MiMo-7B-RL-MNN"
+      }
+    },
+    {
+      "modelName": "TinyLlama-1.1B-Chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 1.1,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/TinyLlama-1.1B-Chat-MNN",
+         "ModelScope": "MNN/TinyLlama-1.1B-Chat-MNN",
+        "Modelers": "MNN/TinyLlama-1.1B-Chat-MNN"
+      }
+    },
+    {
+      "modelName": "internlm-chat-7b-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "internlm"
+      ],
+      "size_gb": 7,
+      "vendor": "InternLm",
+      "sources": {
+        "HuggingFace": "taobao-mnn/internlm-chat-7b-MNN",
+         "ModelScope": "MNN/internlm-chat-7b-MNN",
+        "Modelers": "MNN/internlm-chat-7b-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Math-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Math",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Math-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-Math-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-Math-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-VL-2B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 2,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-VL-2B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-VL-2B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-VL-2B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "FastVLM-0.5B-Stage2-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "fastvlm"
+      ],
+      "size_gb": 0.5,
+      "vendor": "FastVLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/FastVLM-0.5B-Stage2-MNN",
+         "ModelScope": "MNN/FastVLM-0.5B-Stage2-MNN",
+        "Modelers": "MNN/FastVLM-0.5B-Stage2-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-VL-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-VL-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-VL-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-VL-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "FastVLM-1.5B-Stage2-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "fastvlm"
+      ],
+      "size_gb": 1.5,
+      "vendor": "FastVLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/FastVLM-1.5B-Stage2-MNN",
+         "ModelScope": "MNN/FastVLM-1.5B-Stage2-MNN",
+        "Modelers": "MNN/FastVLM-1.5B-Stage2-MNN"
+      }
+    },
+    {
+      "modelName": "DeepSeek-Prover-V2-7B-MNN",
+      "tags": [
+        "Chat",
+        "CodeGeneration",
+        "Math",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "deepseek"
+      ],
+      "size_gb": 7,
+      "vendor": "DeepSeek",
+      "sources": {
+        "HuggingFace": "taobao-mnn/DeepSeek-Prover-V2-7B-MNN",
+         "ModelScope": "MNN/DeepSeek-Prover-V2-7B-MNN",
+        "Modelers": "MNN/DeepSeek-Prover-V2-7B-MNN"
+      }
+    },
+    {
+      "modelName": "Baichuan2-7B-Chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "baichuan"
+      ],
+      "size_gb": 7,
+      "vendor": "Baichuan",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Baichuan2-7B-Chat-MNN",
+         "ModelScope": "MNN/Baichuan2-7B-Chat-MNN",
+        "Modelers": "MNN/Baichuan2-7B-Chat-MNN"
+      }
+    },
+    {
+      "modelName": "phi-2-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "phi"
+      ],
+      "size_gb": 2,
+      "vendor": "Phi",
+      "sources": {
+        "HuggingFace": "taobao-mnn/phi-2-MNN",
+         "ModelScope": "MNN/phi-2-MNN",
+        "Modelers": "MNN/phi-2-MNN"
+      }
+    },
+    {
+      "modelName": "MobileLLM-125M-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mobilellm"
+      ],
+      "size_gb": 0.125,
+      "vendor": "MobileLLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MobileLLM-125M-MNN",
+         "ModelScope": "MNN/MobileLLM-125M-MNN",
+        "Modelers": "MNN/MobileLLM-125M-MNN"
+      }
+    },
+    {
+      "modelName": "Llama-2-7b-chat-ms-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 7,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Llama-2-7b-chat-ms-MNN",
+         "ModelScope": "MNN/Llama-2-7b-chat-ms-MNN",
+        "Modelers": "MNN/Llama-2-7b-chat-ms-MNN"
+      }
+    },
+    {
+      "modelName": "SmolLM2-135M-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.135,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolLM2-135M-Instruct-MNN",
+         "ModelScope": "MNN/SmolLM2-135M-Instruct-MNN",
+        "Modelers": "MNN/SmolLM2-135M-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen-7B-Chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen-7B-Chat-MNN",
+         "ModelScope": "MNN/Qwen-7B-Chat-MNN",
+        "Modelers": "MNN/Qwen-7B-Chat-MNN"
+      }
+    },
+    {
+      "modelName": "Llama-3.2-1B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 1,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Llama-3.2-1B-Instruct-MNN",
+         "ModelScope": "MNN/Llama-3.2-1B-Instruct-MNN",
+        "Modelers": "MNN/Llama-3.2-1B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "glm-4-9b-chat-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "glm"
+      ],
+      "size_gb": 9,
+      "vendor": "THUDM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/glm-4-9b-chat-MNN",
+         "ModelScope": "MNN/glm-4-9b-chat-MNN",
+        "Modelers": "MNN/glm-4-9b-chat-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-Audio-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "AudioUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-Audio-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-Audio-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-Audio-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "gemma-7b-it-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "gemma"
+      ],
+      "size_gb": 7,
+      "vendor": "Gemma",
+      "sources": {
+        "HuggingFace": "taobao-mnn/gemma-7b-it-MNN",
+         "ModelScope": "MNN/gemma-7b-it-MNN",
+        "Modelers": "MNN/gemma-7b-it-MNN"
+      }
+    },
+    {
+      "modelName": "TinyLlama-1.1B-Chat-v1.0-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 1.1,
+      "vendor": "TinyLlama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/TinyLlama-1.1B-Chat-v1.0-MNN",
+         "ModelScope": "MNN/TinyLlama-1.1B-Chat-v1.0-MNN",
+        "Modelers": "MNN/TinyLlama-1.1B-Chat-v1.0-MNN"
+      }
+    },
+    {
+      "modelName": "MobileLLM-1B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mobilellm"
+      ],
+      "size_gb": 1,
+      "vendor": "MobileLLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MobileLLM-1B-MNN",
+         "ModelScope": "MNN/MobileLLM-1B-MNN",
+        "Modelers": "MNN/MobileLLM-1B-MNN"
+      }
+    },
+    {
+      "modelName": "Meta-Llama-3.1-8B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 8,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Meta-Llama-3.1-8B-Instruct-MNN",
+         "ModelScope": "MNN/Meta-Llama-3.1-8B-Instruct-MNN",
+        "Modelers": "MNN/Meta-Llama-3.1-8B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-1.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 1.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-1.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-1.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-1.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "chatglm3-6b-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "glm"
+      ],
+      "size_gb": 6,
+      "vendor": "THUDM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/chatglm3-6b-MNN",
+         "ModelScope": "MNN/chatglm3-6b-MNN",
+        "Modelers": "MNN/chatglm3-6b-MNN"
+      }
+    },
+    {
+      "modelName": "MobileLLM-350M-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mobilellm"
+      ],
+      "size_gb": 0.35,
+      "vendor": "MobileLLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MobileLLM-350M-MNN",
+         "ModelScope": "MNN/MobileLLM-350M-MNN",
+        "Modelers": "MNN/MobileLLM-350M-MNN"
+      }
+    },
+    {
+      "modelName": "SmolLM2-1.7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 1.7,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolLM2-1.7B-Instruct-MNN",
+         "ModelScope": "MNN/SmolLM2-1.7B-Instruct-MNN",
+        "Modelers": "MNN/SmolLM2-1.7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Coder-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "CodeGeneration",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Coder-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-Coder-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-Coder-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Math-1.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Math",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 1.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Math-1.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-Math-1.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-Math-1.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "QwQ-32B-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwq"
+      ],
+      "size_gb": 32,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/QwQ-32B-MNN",
+         "ModelScope": "MNN/QwQ-32B-MNN",
+        "Modelers": "MNN/QwQ-32B-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-0.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 0.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-0.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-0.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-0.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Meta-Llama-3-8B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "llama"
+      ],
+      "size_gb": 8,
+      "vendor": "Llama",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Meta-Llama-3-8B-Instruct-MNN",
+         "ModelScope": "MNN/Meta-Llama-3-8B-Instruct-MNN",
+        "Modelers": "MNN/Meta-Llama-3-8B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2.5-Coder-1.5B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "CodeGeneration",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 1.5,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2.5-Coder-1.5B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2.5-Coder-1.5B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2.5-Coder-1.5B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "MobileLLM-600M-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mobilellm"
+      ],
+      "size_gb": 0.6,
+      "vendor": "MobileLLM",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MobileLLM-600M-MNN",
+         "ModelScope": "MNN/MobileLLM-600M-MNN",
+        "Modelers": "MNN/MobileLLM-600M-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-VL-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "Multimodal",
+        "ImageUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-VL-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-VL-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-VL-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "MiMo-7B-SFT-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mimo"
+      ],
+      "size_gb": 7,
+      "vendor": "MiMo",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MiMo-7B-SFT-MNN",
+         "ModelScope": "MNN/MiMo-7B-SFT-MNN",
+        "Modelers": "MNN/MiMo-7B-SFT-MNN"
+      }
+    },
+    {
+      "modelName": "MiMo-7B-RL-Zero-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mimo"
+      ],
+      "size_gb": 7,
+      "vendor": "MiMo",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MiMo-7B-RL-Zero-MNN",
+         "ModelScope": "MNN/MiMo-7B-RL-Zero-MNN",
+        "Modelers": "MNN/MiMo-7B-RL-Zero-MNN"
+      }
+    },
+    {
+      "modelName": "MiMo-7B-Base-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "mimo"
+      ],
+      "size_gb": 7,
+      "vendor": "MiMo",
+      "sources": {
+        "HuggingFace": "taobao-mnn/MiMo-7B-Base-MNN",
+         "ModelScope": "MNN/MiMo-7B-Base-MNN",
+        "Modelers": "MNN/MiMo-7B-Base-MNN"
+      }
+    },
+    {
+      "modelName": "Qwen2-7B-Instruct-MNN",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "qwen"
+      ],
+      "size_gb": 7,
+      "vendor": "Qwen",
+      "sources": {
+        "HuggingFace": "taobao-mnn/Qwen2-7B-Instruct-MNN",
+         "ModelScope": "MNN/Qwen2-7B-Instruct-MNN",
+        "Modelers": "MNN/Qwen2-7B-Instruct-MNN"
+      }
+    },
+    {
+      "modelName": "SmolDocling-256M-preview-MNN",
+      "tags": [
+        "Chat",
+        "DocumentUnderstanding",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "smol"
+      ],
+      "size_gb": 0.256,
+      "vendor": "Smol",
+      "sources": {
+        "HuggingFace": "taobao-mnn/SmolDocling-256M-preview-MNN",
+         "ModelScope": "MNN/SmolDocling-256M-preview-MNN",
+        "Modelers": "MNN/SmolDocling-256M-preview-MNN"
+      }
+    },
+    {
+      "modelName": "gemma-3-4b-it-q4_0-mnn",
+      "tags": [
+        "Chat",
+        "TextGeneration"
+      ],
+      "categories": [
+        "chat",
+        "gemma"
+      ],
+      "size_gb": 4,
+      "vendor": "Gemma",
+      "sources": {
+        "HuggingFace": "taobao-mnn/gemma-3-4b-it-q4_0-mnn",
+         "ModelScope": "MNN/gemma-3-4b-it-q4_0-mnn",
+        "Modelers": "MNN/gemma-3-4b-it-q4_0-mnn"
+      }
+    },
+    {
+      "modelName": "stable-diffusion-v1-5-mnn-opencl",
+      "tags": [
+        "TextToImage"
+      ],
+      "categories": [
+        "other",
+        "stable-diffusion"
+      ],
+      "size_gb": 1.5,
+      "vendor": "stability.ai",
+      "sources": {
+        "HuggingFace": "taobao-mnn/stable-diffusion-v1-5-mnn-opencl",
+         "ModelScope": "MNN/stable-diffusion-v1-5-mnn-opencl",
+        "Modelers": "MNN/stable-diffusion-v1-5-mnn-opencl"
+      }
+    }
+  ],
+  "tts_models": [
+    {
+      "modelName": "bert-vits2-MNN",
+      "tags": [
+        "TTS"
+      ],
+      "categories": [
+        "other",
+        "stable-diffusion"
+      ],
+      "size_gb": 1.5,
+      "vendor": "stability.ai",
+      "sources": {
+         "ModelScope": "MNN/bert-vits2-MNN"
+      }
+    }
+  ],
+  "asr_models": [
+    {
+      "modelName": "sherpa-mnn-streaming-zipformer-bilingual-zh-en-2023-02-20",
+      "tags": [
+        "ASR"
+      ],
+      "categories": [
+        "other",
+        "stable-diffusion"
+      ],
+      "size_gb": 1.5,
+      "vendor": "stability.ai",
+      "sources": {
+         "ModelScope": "MNN/sherpa-mnn-streaming-zipformer-bilingual-zh-en-2023-02-20"
+      }
+    },
+    {
+      "modelName": "sherpa-mnn-streaming-zipformer-en-2023-02-21",
+      "tags": [
+        "ASR"
+      ],
+      "categories": [
+        "other",
+        "stable-diffusion"
+      ],
+      "size_gb": 1.5,
+      "vendor": "stability.ai",
+      "sources": {
+         "ModelScope": "MNN/sherpa-mnn-streaming-zipformer-en-2023-02-21"
+      }
+    }
+  ],
+  "metadata": {
+    "version": "1.0.0",
+    "lastUpdated": "2025-01-03T00:00:00Z",
+    "schemaVersion": "2.0",
+    "totalModels": 11,
+    "supportedPlatforms": ["iOS", "Android"],
+    "minAppVersion": "1.0.0"
+  }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Settings/View/SettingsView.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Settings/View/SettingsView.swift
new file mode 100644
index 00000000..31e31c06
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Settings/View/SettingsView.swift
@@ -0,0 +1,96 @@
+//
+//  SettingsView.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/1.
+//
+
+import SwiftUI
+
+struct SettingsView: View {
+    
+    private var sourceManager = ModelSourceManager.shared
+    @State private var selectedLanguage = ""
+    @State private var selectedSource = ModelSourceManager.shared.selectedSource
+    @State private var showLanguageAlert = false
+    
+    private let languageOptions = LanguageManager.shared.languageOptions
+    
+    var body: some View {
+        List {
+            Section(header: Text("settings.section.application")) {
+                
+                Picker("settings.picker.downloadSource", selection: $selectedSource) {
+                    ForEach(ModelSource.allCases, id: \.self) { source in
+                        Text(source.rawValue).tag(source)
+                    }
+                }
+                .onChange(of: selectedSource) { _, newValue in
+                    sourceManager.updateSelectedSource(newValue)
+                }
+                
+                Picker("settings.picker.language", selection: $selectedLanguage) {
+                    ForEach(languageOptions.keys.sorted(), id: \.self) { key in
+                        Text(languageOptions[key] ?? "").tag(key)
+                    }
+                }
+                .onChange(of: selectedLanguage) { _, newValue in
+                    if newValue != LanguageManager.shared.currentLanguage {
+                        showLanguageAlert = true
+                    }
+                }
+            }
+            
+            Section(header: Text("settings.section.about")) {
+                Button(action: {
+                    if let url = URL(string: "https://github.com/alibaba/MNN") {
+                        UIApplication.shared.open(url)
+                    }
+                }) {
+                    HStack {
+                        Text("settings.button.aboutMNN")
+                        Spacer()
+                        Image(systemName: "chevron.right")
+                            .foregroundColor(.gray)
+                            .font(.system(size: 14))
+                    }
+                    .foregroundColor(.primary)
+                }
+                
+                Button(action: {
+                    if let url = URL(string: "https://github.com/alibaba/MNN") {
+                        UIApplication.shared.open(url)
+                    }
+                }) {
+                    HStack {
+                        Text("settings.button.reportIssue")
+                        Spacer()
+                        Image(systemName: "chevron.right")
+                            .foregroundColor(.gray)
+                            .font(.system(size: 14))
+                    }
+                    .foregroundColor(.primary)
+                }
+            }
+        }
+        .listStyle(InsetGroupedListStyle())
+        .navigationTitle("settings.navigation.title")
+        .navigationBarTitleDisplayMode(.inline)
+        .alert("settings.alert.switchLanguage.title", isPresented: $showLanguageAlert) {
+            Button("settings.alert.switchLanguage.confirm") {
+                LanguageManager.shared.applyLanguage(selectedLanguage)
+                // 重启应用以应用语言更改
+                exit(0)
+            }
+            Button("settings.alert.switchLanguage.cancel", role: .cancel) {
+                // 恢复原来的选择
+                selectedLanguage = LanguageManager.shared.currentLanguage
+            }
+        } message: {
+            Text("settings.alert.switchLanguage.message")
+        }
+        .onAppear {
+            selectedLanguage = LanguageManager.shared.currentLanguage
+        }
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Color+Extension.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Color+Extension.swift
index 92ead781..d27f765c 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Color+Extension.swift
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Color+Extension.swift
@@ -12,4 +12,19 @@ extension Color {
     static var customBlue = Color(hex: "4859FD")
     static var customPickerBg = Color(hex: "2F2F2F")
     static var customLightPink = Color(hex: "E3E3E3")
+    
+    static var primaryPurple = Color(hex: "4252B6")
+    static var primaryBlue = Color(hex: "2E97F2")
+    static var primaryRed = Color(hex: "D16D6A")
+    
+    // Enhanced colors for benchmark UI
+    static var benchmarkGradientStart = Color(hex: "667eea")
+    static var benchmarkGradientEnd = Color(hex: "764ba2")
+    static var benchmarkCardBg = Color(hex: "FFFFFF")
+    static var benchmarkAccent = Color(hex: "6366f1")
+    static var benchmarkSuccess = Color(hex: "10b981")
+    static var benchmarkWarning = Color(hex: "f59e0b")
+    static var benchmarkError = Color(hex: "ef4444")
+    static var benchmarkSecondary = Color(hex: "6b7280")
+    static var benchmarkLight = Color(hex: "f8fafc")
 }
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/FileOperationManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/FileOperationManager.swift
new file mode 100644
index 00000000..8d9e9f17
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/FileOperationManager.swift
@@ -0,0 +1,282 @@
+//
+//  FileOperationManager.swift
+//  MNNLLMiOS
+//  Created by 游薪渝(揽清) on 2025/7/10.
+//
+
+import Foundation
+import UIKit
+
+/**
+ * FileOperationManager is a singleton utility class that handles various file operations
+ * including image processing, audio processing, directory size calculation, and file cleanup.
+ */
+final class FileOperationManager {
+    
+    /// Shared singleton instance
+    static let shared = FileOperationManager()
+    
+    /// Private initializer to enforce singleton pattern
+    private init() {}
+    
+    // MARK: - Image Processing
+    
+    /**
+     * Processes image files by copying to temporary directory and performing HEIC conversion if needed
+     * 
+     * - Parameters:
+     *   - url: The original image URL
+     *   - fileName: The desired file name for the processed image
+     * - Returns: The processed image URL, or nil if processing fails
+     * 
+     * Usage:
+     * ```swift
+     * let imageURL = URL(fileURLWithPath: "/path/to/image.heic")
+     * if let processedURL = FileOperationManager.shared.processImageFile(from: imageURL, fileName: "converted.jpg") {
+     *     // Use the processed image URL
+     * }
+     * ```
+     */
+    func processImageFile(from url: URL, fileName: String) -> URL? {
+        let isInTempDirectory = url.path.contains("/tmp/")
+        
+        if !isInTempDirectory {
+            guard let fileUrl = AssetExtractor.copyFileToTmpDirectory(from: url, fileName: fileName) else {
+                return nil
+            }
+            return convertHEICImage(from: fileUrl)
+        } else {
+            return convertHEICImage(from: url)
+        }
+    }
+    
+    /**
+     * Converts HEIC images to JPG format using AssetExtractor utility
+     * 
+     * - Parameter url: The HEIC image URL to convert
+     * - Returns: The converted JPG image URL, or original URL if not HEIC format
+     */
+    private func convertHEICImage(from url: URL) -> URL? {
+        var fileUrl = url
+        if fileUrl.isHEICImage() {
+            if let convertedUrl = AssetExtractor.convertHEICToJPG(heicUrl: fileUrl) {
+                fileUrl = convertedUrl
+            }
+        }
+        return fileUrl
+    }
+    
+    
+    // MARK: - Directory Size Calculation
+    
+    /**
+     * Formats byte count into human-readable string using ByteCountFormatter
+     * 
+     * - Parameter bytes: The number of bytes to format
+     * - Returns: Formatted string (e.g., "1.5 GB")
+     * 
+     * Usage:
+     * ```swift
+     * let size: Int64 = 1073741824 // 1 GB
+     * let formatted = FileOperationManager.shared.formatBytes(size)
+     * print(formatted) // "1.0 GB"
+     * ```
+     */
+    func formatBytes(_ bytes: Int64) -> String {
+        let formatter = ByteCountFormatter()
+        formatter.allowedUnits = [.useGB]
+        formatter.countStyle = .file
+        return formatter.string(fromByteCount: bytes)
+    }
+    
+    /**
+     * Calculates the size of a local directory and returns a formatted string
+     * 
+     * - Parameter path: The directory path to calculate size for
+     * - Returns: Formatted size string or "Unknown" if calculation fails
+     * 
+     * Usage:
+     * ```swift
+     * let directoryPath = "/path/to/directory"
+     * let sizeString = FileOperationManager.shared.formatLocalDirectorySize(at: directoryPath)
+     * print("Directory size: \(sizeString)")
+     * ```
+     */
+    func formatLocalDirectorySize(at path: String) -> String {
+        guard FileManager.default.fileExists(atPath: path) else { return "Unknown" }
+        
+        do {
+            let totalSize = try calculateDirectorySize(at: path)
+            return formatBytes(totalSize)
+        } catch {
+            return "Unknown"
+        }
+    }
+    
+    /**
+     * Calculates the total size of a directory by traversing all files recursively
+     * Uses actual disk allocated size when available, falls back to logical file size
+     * 
+     * - Parameter path: The directory path to calculate size for
+     * - Returns: Total directory size in bytes
+     * - Throws: FileSystem errors during directory traversal
+     * 
+     * Usage:
+     * ```swift
+     * do {
+     *     let size = try FileOperationManager.shared.calculateDirectorySize(at: "/path/to/directory")
+     *     print("Directory size: \(size) bytes")
+     * } catch {
+     *     print("Failed to calculate directory size: \(error)")
+     * }
+     * ```
+     */
+    func calculateDirectorySize(at path: String) throws -> Int64 {
+        let fileManager = FileManager.default
+        var totalSize: Int64 = 0
+        
+        // print("Calculating directory size for path: \(path)")
+        
+        let directoryURL = URL(fileURLWithPath: path)
+        
+        guard fileManager.fileExists(atPath: path) else {
+            // print("Path does not exist: \(path)")
+            return 0
+        }
+        
+        let resourceKeys: [URLResourceKey] = [.isRegularFileKey, .totalFileAllocatedSizeKey, .fileSizeKey, .nameKey]
+        let enumerator = fileManager.enumerator(
+            at: directoryURL,
+            includingPropertiesForKeys: resourceKeys,
+            options: [.skipsHiddenFiles, .skipsPackageDescendants],
+            errorHandler: { (url, error) -> Bool in
+                print("Error accessing \(url): \(error)")
+                return true
+            }
+        )
+        
+        guard let fileEnumerator = enumerator else {
+            throw NSError(domain: "FileEnumerationError", code: -1, 
+                         userInfo: [NSLocalizedDescriptionKey: "Failed to create file enumerator"])
+        }
+        
+        var fileCount = 0
+        for case let fileURL as URL in fileEnumerator {
+            do {
+                let resourceValues = try fileURL.resourceValues(forKeys: Set(resourceKeys))
+                
+                guard let isRegularFile = resourceValues.isRegularFile, isRegularFile else { continue }
+                
+                let fileName = resourceValues.name ?? "Unknown"
+                fileCount += 1
+                
+                // Use actual disk allocated size, fallback to logical size if not available
+                if let actualSize = resourceValues.totalFileAllocatedSize {
+                    totalSize += Int64(actualSize)
+                    
+                    if fileCount <= 10 {
+                        let actualSizeGB = Double(actualSize) / (1024 * 1024 * 1024)
+                        let logicalSizeGB = Double(resourceValues.fileSize ?? 0) / (1024 * 1024 * 1024)
+                        // print("File \(fileCount): \(fileName) - Logical: \(String(format: "%.3f", logicalSizeGB)) GB, Actual: \(String(format: "%.3f", actualSizeGB)) GB")
+                    }
+                } else if let logicalSize = resourceValues.fileSize {
+                    totalSize += Int64(logicalSize)
+                    
+                    if fileCount <= 10 {
+                        let logicalSizeGB = Double(logicalSize) / (1024 * 1024 * 1024)
+                        // print("File \(fileCount): \(fileName) - Size: \(String(format: "%.3f", logicalSizeGB)) GB (fallback)")
+                    }
+                }
+            } catch {
+                print("Error getting resource values for \(fileURL): \(error)")
+                continue
+            }
+        }
+        
+        let totalSizeGB = Double(totalSize) / (1024 * 1024 * 1024)
+        print("Total files: \(fileCount), Total actual disk usage: \(String(format: "%.2f", totalSizeGB)) GB")
+        
+        return totalSize
+    }
+    
+    // MARK: - Directory Cleaning
+    
+    /**
+     * Cleans temporary directories based on memory mapping usage
+     * Cleans system temporary directory and optionally model temporary directories
+     *
+     * 
+     * Usage:
+     * ```swift
+     * // Clean temporary directories
+     * FileOperationManager.shared.cleanTempDirectories()
+     * ```
+     */
+    func cleanTempDirectories() {
+        let fileManager = FileManager.default
+        let tmpDirectoryURL = fileManager.temporaryDirectory
+        
+        cleanFolder(at: tmpDirectoryURL)
+    }
+    
+    /**
+     * Cleans the temporary folder for a specific model
+     * 
+     * - Parameter modelPath: The path to the model directory
+     * 
+     * Usage:
+     * ```swift
+     * let modelPath = "/path/to/model"
+     * FileOperationManager.shared.cleanModelTempFolder(modelPath: modelPath)
+     * ```
+     */
+    func cleanModelTempFolder(modelPath: String) {
+        let tmpFolderURL = URL(fileURLWithPath: modelPath).appendingPathComponent("temp")
+        cleanFolder(at: tmpFolderURL)
+    }
+    
+    /**
+     * Recursively cleans all files in the specified folder
+     * Preserves files containing "networkdownload" in their path
+     * 
+     * - Parameter folderURL: The folder URL to clean
+     */
+    private func cleanFolder(at folderURL: URL) {
+        let fileManager = FileManager.default
+        do {
+            let files = try fileManager.contentsOfDirectory(at: folderURL, includingPropertiesForKeys: nil)
+            for file in files {
+                if !file.absoluteString.lowercased().contains("networkdownload") {
+                    do {
+                        try fileManager.removeItem(at: file)
+                        print("Deleted file: \(file.path)")
+                    } catch {
+                        print("Error deleting file: \(file.path), \(error.localizedDescription)")
+                    }
+                }
+            }
+        } catch {
+            print("Error accessing directory: \(error.localizedDescription)")
+        }
+    }
+    
+    // MARK: - Diffusion Image Generation
+    
+    /**
+     * Generates a unique temporary file path for Diffusion model image output
+     * Creates a unique JPG filename in the system temporary directory
+     * 
+     * - Returns: A unique temporary image file URL
+     * 
+     * Usage:
+     * ```swift
+     * let tempImageURL = FileOperationManager.shared.generateTempImagePath()
+     * // Use tempImageURL for image generation output
+     * ```
+     */
+    func generateTempImagePath() -> URL {
+        let tempDir = FileManager.default.temporaryDirectory
+        let imageName = UUID().uuidString + ".jpg"
+        return tempDir.appendingPathComponent(imageName)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/LanguageManager.swift b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/LanguageManager.swift
new file mode 100644
index 00000000..5bfb3027
--- /dev/null
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/LanguageManager.swift
@@ -0,0 +1,48 @@
+
+//
+//  Color+Extension.swift
+//  MNNLLMiOS
+//
+//  Created by 游薪渝(揽清) on 2025/7/1.
+//
+
+import Foundation
+
+class LanguageManager {
+    static let shared = LanguageManager()
+    
+    var languageOptions: [String: String] {
+        ["en": NSLocalizedString("language.english", comment: ""),
+         "zh-Hans": NSLocalizedString("language.simplifiedChinese", comment: "")]
+    }
+    
+    private let languageKey = "AppLanguage"
+    
+    var currentLanguage: String {
+        get {
+            return UserDefaults.standard.string(forKey: languageKey) ?? getSystemLanguage()
+        }
+        set {
+            UserDefaults.standard.set(newValue, forKey: languageKey)
+            UserDefaults.standard.synchronize()
+        }
+    }
+    
+    private func getSystemLanguage() -> String {
+        let preferredLanguage = Locale.preferredLanguages.first ?? "en"
+        if preferredLanguage.starts(with: "zh") {
+            return "zh-Hans"
+        } else {
+            return "en"
+        }
+    }
+    
+    func applyLanguage(_ code: String) {
+        currentLanguage = code
+        
+        UserDefaults.standard.set([code], forKey: "AppleLanguages")
+        UserDefaults.standard.synchronize()
+        
+        NotificationCenter.default.post(name: Notification.Name("LanguageChanged"), object: nil)
+    }
+}
diff --git a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Localizable.xcstrings b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Localizable.xcstrings
index b5420869..8d36263a 100644
--- a/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Localizable.xcstrings
+++ b/apps/iOS/MNNLLMChat/MNNLLMiOS/Util/Localizable.xcstrings
@@ -3,9 +3,50 @@
   "strings" : {
     "" : {
 
+    },
+    "%@" : {
+
     },
     "%lld" : {
 
+    },
+    "%lld%%" : {
+
+    },
+    "• %@" : {
+
+    },
+    "alert.deleteModel.message" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Are you sure you want to delete this model?"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "是否确认删除该模型？"
+          }
+        }
+      }
+    },
+    "alert.deleteModel.title" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Confirm Deletion"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "确认删除"
+          }
+        }
+      }
     },
     "Are you sure you want to delete this history?" : {
       "localizations" : {
@@ -17,6 +58,146 @@
         }
       }
     },
+    "Are you sure you want to stop the benchmark test?" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "确定要结束基准测试吗？"
+          }
+        }
+      }
+    },
+    "Audio Message" : {
+
+    },
+    "Benchmark" : {
+      "comment" : "基准测试标签",
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "基准测试"
+          }
+        }
+      }
+    },
+    "Benchmark Config" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "基准测试配置"
+          }
+        }
+      }
+    },
+    "Benchmark Results" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "基准测试结果"
+          }
+        }
+      }
+    },
+    "button.clear" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Clear"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "清除"
+          }
+        }
+      }
+    },
+    "button.delete" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Delete"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "删除"
+          }
+        }
+      }
+    },
+    "button.done" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Done"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "完成"
+          }
+        }
+      }
+    },
+    "button.downloaded" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Downloaded"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "已下载"
+          }
+        }
+      }
+    },
+    "button.pin" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Pin"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "置顶"
+          }
+        }
+      }
+    },
+    "button.unpin" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Unpin"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "取消置顶"
+          }
+        }
+      }
+    },
     "Cache Cleared Successfully" : {
       "localizations" : {
         "zh-Hans" : {
@@ -38,6 +219,7 @@
       }
     },
     "Chat" : {
+      "extractionState" : "stale",
       "localizations" : {
         "zh-Hans" : {
           "stringUnit" : {
@@ -53,7 +235,7 @@
         "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "Chat Histroy"
+            "value" : "Chat History"
           }
         },
         "zh-Hans" : {
@@ -64,6 +246,23 @@
         }
       }
     },
+    "Choose your AI model" : {
+      "extractionState" : "manual",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Choose your AI model"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "选择AI模型"
+          }
+        }
+      }
+    },
     "Clear mmap Cache" : {
       "localizations" : {
         "zh-Hans" : {
@@ -74,6 +273,26 @@
         }
       }
     },
+    "Complete" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "完成"
+          }
+        }
+      }
+    },
+    "Completed" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "完成时间"
+          }
+        }
+      }
+    },
     "Configure mixed samplers:" : {
       "localizations" : {
         "zh-Hans" : {
@@ -151,23 +370,50 @@
         }
       }
     },
-    "Generation Parameters" : {
-      "extractionState" : "stale",
+    "filter.byTag" : {
       "localizations" : {
-        "zh-Hans" : {
+        "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "生成参数"
+            "value" : "By Tag"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "按标签筛选"
           }
         }
       }
     },
-    "Go to MNN Homepage" : {
+    "filter.byVendor" : {
       "localizations" : {
-        "zh-Hans" : {
+        "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "查看 MNN 主页"
+            "value" : "By Title"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "按厂商筛选"
+          }
+        }
+      }
+    },
+    "filter.title" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Title"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "筛选选项"
           }
         }
       }
@@ -192,6 +438,69 @@
         }
       }
     },
+    "language.english" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "English"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "英文"
+          }
+        }
+      }
+    },
+    "language.simplifiedChinese" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Simplified Chinese"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "简体中文"
+          }
+        }
+      }
+    },
+    "Loading models..." : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "加载模型中"
+          }
+        }
+      }
+    },
+    "Local" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "本地"
+          }
+        }
+      }
+    },
+    "Local Model" : {
+      "comment" : "本地模型标签",
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "本地模型"
+          }
+        }
+      }
+    },
     "Model Configuration" : {
       "localizations" : {
         "zh-Hans" : {
@@ -202,6 +511,17 @@
         }
       }
     },
+    "Model Market" : {
+      "comment" : "模型市场标签",
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "模型市场"
+          }
+        }
+      }
+    },
     "ModelLoadingFailText" : {
       "extractionState" : "manual",
       "localizations" : {
@@ -253,12 +573,48 @@
         }
       }
     },
-    "Models" : {
+    "modelSource.title" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Source"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "下载源"
+          }
+        }
+      }
+    },
+    "No" : {
       "localizations" : {
         "zh-Hans" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "模型"
+            "value" : "否"
+          }
+        }
+      }
+    },
+    "No local models found. Please download a model first." : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "本地没有，请先下载"
+          }
+        }
+      }
+    },
+    "No models available" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "没有模型可选"
           }
         }
       }
@@ -275,6 +631,32 @@
     },
     "Penalty Sampler" : {
 
+    },
+    "Performance analysis complete" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "性能测试完成"
+          }
+        }
+      }
+    },
+    "Powered By MNN" : {
+
+    },
+    "PP: %lld • TG: %lld" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "new",
+            "value" : "PP: %1$lld • TG: %2$lld"
+          }
+        }
+      }
+    },
+    "Progress" : {
+
     },
     "Random Seed" : {
       "localizations" : {
@@ -286,12 +668,15 @@
         }
       }
     },
-    "Report an Issue" : {
+    "Ready" : {
+
+    },
+    "Running performance tests" : {
       "localizations" : {
         "zh-Hans" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "反馈问题"
+            "value" : "运行测试集中"
           }
         }
       }
@@ -329,6 +714,16 @@
         }
       }
     },
+    "Select Model" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "选择模型"
+          }
+        }
+      }
+    },
     "Settings" : {
       "localizations" : {
         "zh-Hans" : {
@@ -339,6 +734,250 @@
         }
       }
     },
+    "settings.alert.switchLanguage.cancel" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Cancel"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "取消"
+          }
+        }
+      }
+    },
+    "settings.alert.switchLanguage.confirm" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Confirm"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "确定"
+          }
+        }
+      }
+    },
+    "settings.alert.switchLanguage.message" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Message"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "切换语言需要重启应用，是否继续？"
+          }
+        }
+      }
+    },
+    "settings.alert.switchLanguage.title" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Switch Language"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "切换语言"
+          }
+        }
+      }
+    },
+    "settings.button.aboutMNN" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "About MNN"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "关于 MNN"
+          }
+        }
+      }
+    },
+    "settings.button.reportIssue" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Report Issue"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "反馈问题"
+          }
+        }
+      }
+    },
+    "settings.navigation.title" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Settings"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "设置"
+          }
+        }
+      }
+    },
+    "settings.picker.downloadSource" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Source"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "下载源"
+          }
+        }
+      }
+    },
+    "settings.picker.language" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Language"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "语言"
+          }
+        }
+      }
+    },
+    "settings.section.about" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "About"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "关于"
+          }
+        }
+      }
+    },
+    "settings.section.application" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Application"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "应用"
+          }
+        }
+      }
+    },
+    "Share" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Share"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "分享"
+          }
+        }
+      }
+    },
+    "Start benchmark after selecting your model" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "选择模型之后开始测试"
+          }
+        }
+      }
+    },
+    "Start Test" : {
+      "extractionState" : "manual",
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "开始测试"
+          }
+        }
+      }
+    },
+    "Status Update" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "状态更新"
+          }
+        }
+      }
+    },
+    "Stop Benchmark" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "停止测试"
+          }
+        }
+      }
+    },
+    "Stop Test" : {
+      "extractionState" : "manual",
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "停止测试"
+          }
+        }
+      }
+    },
     "Success" : {
       "comment" : "Alert title for cache cleared",
       "localizations" : {
@@ -350,7 +989,225 @@
         }
       }
     },
+    "tag.audioUnderstanding" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Audio Understanding"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "音频理解"
+          }
+        }
+      }
+    },
+    "tag.chat" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Chat"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "对话"
+          }
+        }
+      }
+    },
+    "tag.codeGeneration" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Code Generation"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "代码生成"
+          }
+        }
+      }
+    },
+    "tag.deepThinking" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Deep Thinking"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "深度思考"
+          }
+        }
+      }
+    },
+    "tag.documentUnderstanding" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Document Understanding"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "文档理解"
+          }
+        }
+      }
+    },
+    "tag.imageUnderstanding" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Image Understanding"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "图片理解"
+          }
+        }
+      }
+    },
+    "tag.math" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Math"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "数学"
+          }
+        }
+      }
+    },
+    "tag.multimodal" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Multimodal"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "多模态"
+          }
+        }
+      }
+    },
+    "tag.textGeneration" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Text Generation"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "文本生成"
+          }
+        }
+      }
+    },
+    "tag.textToImage" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Text to Image"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "文生图"
+          }
+        }
+      }
+    },
+    "tag.videoUnderstanding" : {
+      "extractionState" : "stale",
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Video Understanding"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "视频理解"
+          }
+        }
+      }
+    },
+    "Tap to select a model for testing" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "点击选择需要测试的模型"
+          }
+        }
+      }
+    },
+    "Test %lld of %lld" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "new",
+            "value" : "Test %1$lld of %2$lld"
+          }
+        }
+      }
+    },
+    "Test Progress" : {
+      "localizations" : {
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "测试进展"
+          }
+        }
+      }
+    },
     "This is a local large model application that requires certain performance from your device.\nIt is recommended to choose different model sizes based on your device's memory. \n\nThe model recommendations for iPhone are as follows:\n- For 8GB of RAM, models up to 8B are recommended (e.g., iPhone 16 Pro).\n- For 6GB of RAM, models up to 3B are recommended (e.g., iPhone 15 Pro).\n- For 4GB of RAM, models up to 1B or smaller are recommended (e.g., iPhone 13).\n\nChoosing a model that is too large may cause insufficient memory and crashes." : {
+      "extractionState" : "stale",
       "localizations" : {
         "en" : {
           "stringUnit" : {
@@ -407,6 +1264,7 @@
       }
     },
     "User Guide" : {
+      "extractionState" : "manual",
       "localizations" : {
         "zh-Hans" : {
           "stringUnit" : {
@@ -431,6 +1289,9 @@
           }
         }
       }
+    },
+    "Yes" : {
+
     }
   },
   "version" : "1.0"
diff --git a/apps/iOS/MNNLLMChat/README-ZH.md b/apps/iOS/MNNLLMChat/README-ZH.md
index b45a4e6e..ca45bc3a 100644
--- a/apps/iOS/MNNLLMChat/README-ZH.md
+++ b/apps/iOS/MNNLLMChat/README-ZH.md
@@ -8,24 +8,26 @@
 
 ## 功能：
 
-1. 模型列表
-    - 获取 MNN 支持的模型列表；
-    - 模型管理，支持下载和删除模型；
-        - 支持切换 Hugging Face 和 ModelScope 源
-    - 模型搜索，支持本地模型搜索；
-
-2. 多模态聊天对话：支持完整的Markdown格式输出，
+1. 本地模型
+    - 本地已下载模式展示
+    - 支持自定义置顶
+2. 模型市场
+    - 获取 MNN 支持的模型列表
+    - 模型管理，支持下载和删除模型
+        - 支持切换 Hugging Face、 ModelScope 和 Modeler 下载源
+    - 模型搜索，支持关键词搜索、标签搜索
+3. 基准测试
+    - 支持自动化基准测试，输出Prefill speed、 Decode Speed 和 Memory Usage等信息
+4. 多模态聊天对话：支持完整的Markdown格式输出
     - 文本到文本
     - 语音到文本
     - 图片到文本，图片可以拍摄输入或从图库中选择
-
-3. 模型配置
+5. 模型配置
     - 支持配置 mmap
     - 支持配置 sampling strategy
     - 支持配置 diffusion 设置
-
-4. 对话历史
-    - 包含对话历史列表，可以还原对话场景
+6. 对话历史
+    - 支持模型对话历史列表，还原历史对话场景
 
 ### 视频介绍
 
@@ -35,12 +37,12 @@
 
 ### 应用预览图
 
-|  |  |  |
-|--|--|--|
-| **Text To Text**  | **Image To Text**  | **Audio To Text**  |
-| ![Text To Text](./assets/text.PNG) | ![Image To Text](./assets/image.PNG) | ![Audio To Text](./assets/audio.jpg) |
-| **Model List**  | **History**  | **History**  |
-| ![Model List](./assets/list.PNG) | ![History](./assets/history2.PNG) | ![History](./assets/history.PNG) |
+|  |  |  | |
+|--|--|--|--|
+| **Text To Text**  | **Image To Text**  | **Audio To Text**  | **Model Fliter** |
+| ![Text To Text](./assets/text.PNG) | ![Image To Text](./assets/image.PNG) | ![Audio To Text](./assets/audio.jpg) | ![Audio To Text](./assets/fliter.PNG) |
+| **Local Model** | **Model Market** | **Benckmark** | **History** |
+| ![Model List](./assets/localModel.PNG) | ![History](./assets/modelMarket.PNG) | ![History](./assets/benchmark.jpeg) | ![History](./assets/history2.PNG) |
 
 
 <p></p>
@@ -155,6 +157,14 @@ iPhone 因为内存有限，建议使用7B以及以下的模型，避免内存
 
 ## Release Notes
 
+### Version 0.4
+
+- 新增项目三个大模块：本地模型，模型市场和基准测试 
+- 新增基准测试，可以测试不同模型效果 
+- 新增设置页面，可以从历史侧边蓝进入 
+- 新增Ali CDN获取模型列表 
+- 新增模型市场筛选功能
+
 ### Version 0.3.1
 
 - 支持模型参数配置
diff --git a/apps/iOS/MNNLLMChat/README.md b/apps/iOS/MNNLLMChat/README.md
index 1004f8d3..582fbc76 100644
--- a/apps/iOS/MNNLLMChat/README.md
+++ b/apps/iOS/MNNLLMChat/README.md
@@ -11,39 +11,43 @@ It operates fully offline with high privacy. Once the models are downloaded to t
 
 ## Features
 
-1. **Model List**
-   - Browse models supported by MNN.
-   - Manage models: download and delete models.
-    - Support for switching between Hugging Face and ModelScope sources
-   - Search for models locally.
-
-2. **Multimodal Chat**: Supports full Markdown format output
-   - Text-to-text conversation.
-   - Audio-to-text conversation.
-   - Image-to-text conversation: capture images via camera or select from the gallery.
-
-3. **Model Configuration**
+1. **Local Models**
+    - Display locally downloaded models
+    - Support custom pinning
+2. **Model Market**
+    - Get list of models supported by MNN
+    - Model management: download and delete models
+        - Support switching between Hugging Face, ModelScope, and Modeler download sources
+    - Model search: support keyword search and tag search
+3. **Benchmark Testing**
+    - Support automated benchmark testing, outputting Prefill speed, Decode Speed, and Memory Usage information
+4. **Multimodal Chat**: Supports full Markdown format output
+    - Text-to-text
+    - Audio-to-text
+    - Image-to-text: images can be captured or selected from gallery
+5. **Model Configuration**
     - Support configuring mmap
-    - Support configuring Sampling Strategy
+    - Support configuring sampling strategy
     - Support configuring diffusion settings
-
-4. **Chat History**
-   - View conversation history, with the ability to restore previous chat sessions.
+6. **Chat History**
+    - Support model conversation history list, restore historical conversation scenarios
 
 
 ### Video Introduction
 
 <img width="200" alt="image" src="./assets/introduction.gif" />
 
+[Click here to download the original resolution introduction video](https://github.com/Yogayu/MNN/blob/master/project/MNNLLMForiOS/assets/introduction.mov)
+
 
 ### Application Preview:
 
-|  |  |  |
-|--|--|--|
-| **Text To Text**  | **Image To Text**  | **Audio To Text**  |
-| ![Text To Text](./assets/text.PNG) | ![Image To Text](./assets/image.PNG) | ![Audio To Text](./assets/audio.jpg) |
-| **Model List**  | **History**  | **History**  |
-| ![Model List](./assets/list.PNG) | ![History](./assets/history2.PNG) | ![History](./assets/history.PNG) |
+|  |  |  | |
+|--|--|--|--|
+| **Text To Text**  | **Image To Text**  | **Audio To Text**  | **Model Filter** |
+| ![Text To Text](./assets/text.PNG) | ![Image To Text](./assets/image.PNG) | ![Audio To Text](./assets/audio.jpg) | ![Audio To Text](./assets/fliter.PNG) |
+| **Local Model** | **Model Market** | **Benchmark** | **History** |
+| ![Model List](./assets/localModel.PNG) | ![History](./assets/modelMarket.PNG) | ![History](./assets/benchmark.jpeg) | ![History](./assets/history2.PNG) |
 
 <p></p>
 
@@ -165,7 +169,15 @@ If we want to directly download the models to the computer for debugging without
 
 6. Run the project, navigate to the chat page, and perform model interactions and debugging.
 
-## Release Notes  
+## Release Notes
+
+### Version 0.4
+
+- Added three major project modules: Local Models, Model Market, and Benchmark Testing
+- Added benchmark testing to test different model performance
+- Added settings page, accessible from the history sidebar
+- Added Ali CDN for getting model lists
+- Added model market filtering functionality
 
 ### Version 0.3.1  
 
diff --git a/apps/iOS/MNNLLMChat/assets/benchmark.jpeg b/apps/iOS/MNNLLMChat/assets/benchmark.jpeg
new file mode 100644
index 00000000..1391f50d
Binary files /dev/null and b/apps/iOS/MNNLLMChat/assets/benchmark.jpeg differ
diff --git a/apps/iOS/MNNLLMChat/assets/fliter.PNG b/apps/iOS/MNNLLMChat/assets/fliter.PNG
new file mode 100644
index 00000000..6ab1b83d
Binary files /dev/null and b/apps/iOS/MNNLLMChat/assets/fliter.PNG differ
diff --git a/apps/iOS/MNNLLMChat/assets/localModel.PNG b/apps/iOS/MNNLLMChat/assets/localModel.PNG
new file mode 100644
index 00000000..f38f276e
Binary files /dev/null and b/apps/iOS/MNNLLMChat/assets/localModel.PNG differ
diff --git a/apps/iOS/MNNLLMChat/assets/modelMarket.PNG b/apps/iOS/MNNLLMChat/assets/modelMarket.PNG
new file mode 100644
index 00000000..cafb8185
Binary files /dev/null and b/apps/iOS/MNNLLMChat/assets/modelMarket.PNG differ
diff --git a/apps/iOS/MNNLLMChat/assets/settings.PNG b/apps/iOS/MNNLLMChat/assets/settings.PNG
new file mode 100644
index 00000000..d078eb3b
Binary files /dev/null and b/apps/iOS/MNNLLMChat/assets/settings.PNG differ