MNN/apps/iOS/MNNLLMChat/MNNLLMiOS/InferenceEngine/LLMInferenceEngineWrapper.h

217 lines
6.9 KiB
Objective-C

//
// LLMInferenceEngineWrapper.h
// mnn-llm
//
// Created by wangzhaode on 2023/12/14.
//
#ifndef LLMInferenceEngineWrapper_h
#define LLMInferenceEngineWrapper_h
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
typedef void (^CompletionHandler)(BOOL success);
typedef void (^OutputHandler)(NSString * _Nonnull output);
// MARK: - Benchmark Related Types
/**
* Progress type enumeration for structured benchmark reporting
*/
typedef NS_ENUM(NSInteger, BenchmarkProgressType) {
BenchmarkProgressTypeUnknown = 0,
BenchmarkProgressTypeInitializing = 1,
BenchmarkProgressTypeWarmingUp = 2,
BenchmarkProgressTypeRunningTest = 3,
BenchmarkProgressTypeProcessingResults = 4,
BenchmarkProgressTypeCompleted = 5,
BenchmarkProgressTypeStopping = 6
};
/**
* Structured progress information for benchmark
*/
@interface BenchmarkProgressInfo : NSObject
@property (nonatomic, assign) NSInteger progress; // 0-100
@property (nonatomic, strong) NSString *statusMessage; // Status description
@property (nonatomic, assign) BenchmarkProgressType progressType;
@property (nonatomic, assign) NSInteger currentIteration;
@property (nonatomic, assign) NSInteger totalIterations;
@property (nonatomic, assign) NSInteger nPrompt;
@property (nonatomic, assign) NSInteger nGenerate;
@property (nonatomic, assign) float runTimeSeconds;
@property (nonatomic, assign) float prefillTimeSeconds;
@property (nonatomic, assign) float decodeTimeSeconds;
@property (nonatomic, assign) float prefillSpeed;
@property (nonatomic, assign) float decodeSpeed;
@end
/**
* Benchmark result structure
*/
@interface BenchmarkResult : NSObject
@property (nonatomic, assign) BOOL success;
@property (nonatomic, strong, nullable) NSString *errorMessage;
@property (nonatomic, strong) NSArray<NSNumber *> *prefillTimesUs;
@property (nonatomic, strong) NSArray<NSNumber *> *decodeTimesUs;
@property (nonatomic, strong) NSArray<NSNumber *> *sampleTimesUs;
@property (nonatomic, assign) NSInteger promptTokens;
@property (nonatomic, assign) NSInteger generateTokens;
@property (nonatomic, assign) NSInteger repeatCount;
@property (nonatomic, assign) BOOL kvCacheEnabled;
@end
// Benchmark callback blocks
typedef void (^BenchmarkProgressCallback)(BenchmarkProgressInfo *progressInfo);
typedef void (^BenchmarkErrorCallback)(NSString *error);
typedef void (^BenchmarkIterationCompleteCallback)(NSString *detailedStats);
typedef void (^BenchmarkCompleteCallback)(BenchmarkResult *result);
/**
* LLMInferenceEngineWrapper - A high-level Objective-C wrapper for MNN LLM inference engine
*
* This class provides a convenient interface for integrating MNN's Large Language Model
* inference capabilities into iOS applications with enhanced error handling, performance
* optimization, and thread safety.
*/
@interface LLMInferenceEngineWrapper : NSObject
/**
* Initialize the LLM inference engine with a model path
*
* @param modelPath The file system path to the model directory
* @param completion Completion handler called with success/failure status
* @return Initialized instance of LLMInferenceEngineWrapper
*/
- (instancetype)initWithModelPath:(NSString *)modelPath completion:(CompletionHandler)completion;
/**
* Process user input and generate streaming LLM response
*
* @param input The user's input text to process
* @param output Callback block that receives streaming output chunks
*/
- (void)processInput:(NSString *)input withOutput:(OutputHandler)output;
/**
* Process user input and generate streaming LLM response with optional performance output
*
* @param input The user's input text to process
* @param output Callback block that receives streaming output chunks
* @param showPerformance Whether to output performance statistics after response completion
*/
- (void)processInput:(NSString *)input withOutput:(OutputHandler)output showPerformance:(BOOL)showPerformance;
/**
* Add chat prompts from an array of dictionaries to the conversation history
*
* @param array NSArray containing NSDictionary objects with chat messages
*/
- (void)addPromptsFromArray:(NSArray<NSDictionary *> *)array;
/**
* Set the configuration for the LLM engine using a JSON string
*
* @param jsonStr JSON string containing configuration parameters
*/
- (void)setConfigWithJSONString:(NSString *)jsonStr;
/**
* Set thinking mode for the LLM engine
*
* @param enabled Whether to enable thinking mode
*/
- (void)setThinkingModeEnabled:(BOOL)enabled;
/**
* Check if model is ready for inference
*
* @return YES if model is loaded and ready
*/
- (BOOL)isModelReady;
/**
* Get current processing status
*
* @return YES if currently processing an inference request
*/
- (BOOL)isProcessing;
/**
* Cancel ongoing inference (if supported)
*/
- (void)cancelInference;
/**
* Get chat history count
*
* @return Number of messages in chat history
*/
- (NSUInteger)getChatHistoryCount;
/**
* Clear chat history
*/
- (void)clearChatHistory;
// MARK: - Benchmark Methods
/**
* Run official benchmark following llm_bench.cpp approach
*
* @param backend Backend type (0 for CPU)
* @param threads Number of threads
* @param useMmap Whether to use memory mapping
* @param power Power setting
* @param precision Precision setting (2 for low precision)
* @param memory Memory setting (2 for low memory)
* @param dynamicOption Dynamic optimization option
* @param nPrompt Number of prompt tokens
* @param nGenerate Number of tokens to generate
* @param nRepeat Number of repetitions
* @param kvCache Whether to use KV cache
* @param progressCallback Progress update callback
* @param errorCallback Error callback
* @param iterationCompleteCallback Iteration completion callback
* @param completeCallback Final completion callback
*/
- (void)runOfficialBenchmarkWithBackend:(NSInteger)backend
threads:(NSInteger)threads
useMmap:(BOOL)useMmap
power:(NSInteger)power
precision:(NSInteger)precision
memory:(NSInteger)memory
dynamicOption:(NSInteger)dynamicOption
nPrompt:(NSInteger)nPrompt
nGenerate:(NSInteger)nGenerate
nRepeat:(NSInteger)nRepeat
kvCache:(BOOL)kvCache
progressCallback:(BenchmarkProgressCallback _Nullable)progressCallback
errorCallback:(BenchmarkErrorCallback _Nullable)errorCallback
iterationCompleteCallback:(BenchmarkIterationCompleteCallback _Nullable)iterationCompleteCallback
completeCallback:(BenchmarkCompleteCallback _Nullable)completeCallback;
/**
* Stop running benchmark
*/
- (void)stopBenchmark;
/**
* Check if benchmark is currently running
*
* @return YES if benchmark is running
*/
- (BOOL)isBenchmarkRunning;
@end
NS_ASSUME_NONNULL_END
#endif /* LLMInferenceEngineWrapper_h */