Path: blob/next/external/packages/bsp/rk3588/usr/include/rkllm.h
18207 views
#ifndef _RKLLM_H_1#define _RKLLM_H_23#ifdef __cplusplus4extern "C" {5#endif67/**8* @typedef LLMHandle9* @brief A handle used to manage and interact with the large language model.10*/11typedef void* LLMHandle;1213/**14* @enum LLMCallState15* @brief Describes the possible states of an LLM call.16*/17typedef enum {18RKLLM_RUN_NORMAL = 0, /**< The LLM call is in a normal running state. */19RKLLM_RUN_WAITING = 1, /**< The LLM call is waiting for complete UTF-8 encoded character. */20RKLLM_RUN_FINISH = 2, /**< The LLM call has finished execution. */21RKLLM_RUN_ERROR = 3, /**< An error occurred during the LLM call. */22RKLLM_RUN_GET_LAST_HIDDEN_LAYER = 4 /**< Retrieve the last hidden layer during inference. */23} LLMCallState;2425/**26* @enum RKLLMInputType27* @brief Defines the types of inputs that can be fed into the LLM.28*/29typedef enum {30RKLLM_INPUT_PROMPT = 0, /**< Input is a text prompt. */31RKLLM_INPUT_TOKEN = 1, /**< Input is a sequence of tokens. */32RKLLM_INPUT_EMBED = 2, /**< Input is an embedding vector. */33RKLLM_INPUT_MULTIMODAL = 3, /**< Input is multimodal (e.g., text and image). */34} RKLLMInputType;3536/**37* @enum RKLLMInferMode38* @brief Specifies the inference modes of the LLM.39*/40typedef enum {41RKLLM_INFER_GENERATE = 0, /**< The LLM generates text based on input. */42RKLLM_INFER_GET_LAST_HIDDEN_LAYER = 1, /**< The LLM retrieves the last hidden layer for further processing. */43} RKLLMInferMode;4445/**46* @struct RKLLMExtendParam47* @brief The extend parameters for configuring an LLM instance.48*/49typedef struct {50int32_t base_domain_id; /**< base_domain_id */51uint8_t reserved[112]; /**< reserved */52} RKLLMExtendParam;5354/**55* @struct RKLLMParam56* @brief Defines the parameters for configuring an LLM instance.57*/58typedef struct {59const char* model_path; /**< Path to the model file. */60int32_t max_context_len; /**< Maximum number of tokens in the context window. */61int32_t max_new_tokens; /**< Maximum number of new tokens to generate. */62int32_t top_k; /**< Top-K sampling parameter for token generation. */63float top_p; /**< Top-P (nucleus) sampling parameter. */64float temperature; /**< Sampling temperature, affecting the randomness of token selection. */65float repeat_penalty; /**< Penalty for repeating tokens in generation. */66float frequency_penalty; /**< Penalizes frequent tokens during generation. */67float presence_penalty; /**< Penalizes tokens based on their presence in the input. */68int32_t mirostat; /**< Mirostat sampling strategy flag (0 to disable). */69float mirostat_tau; /**< Tau parameter for Mirostat sampling. */70float mirostat_eta; /**< Eta parameter for Mirostat sampling. */71bool skip_special_token; /**< Whether to skip special tokens during generation. */72bool is_async; /**< Whether to run inference asynchronously. */73const char* img_start; /**< Starting position of an image in multimodal input. */74const char* img_end; /**< Ending position of an image in multimodal input. */75const char* img_content; /**< Pointer to the image content. */76RKLLMExtendParam extend_param; /**< Extend parameters. */77} RKLLMParam;7879/**80* @struct RKLLMLoraAdapter81* @brief Defines parameters for a Lora adapter used in model fine-tuning.82*/83typedef struct {84const char* lora_adapter_path; /**< Path to the Lora adapter file. */85const char* lora_adapter_name; /**< Name of the Lora adapter. */86float scale; /**< Scaling factor for applying the Lora adapter. */87} RKLLMLoraAdapter;8889/**90* @struct RKLLMEmbedInput91* @brief Represents an embedding input to the LLM.92*/93typedef struct {94float* embed; /**< Pointer to the embedding vector (of size n_tokens * n_embed). */95size_t n_tokens; /**< Number of tokens represented in the embedding. */96} RKLLMEmbedInput;9798/**99* @struct RKLLMTokenInput100* @brief Represents token input to the LLM.101*/102typedef struct {103int32_t* input_ids; /**< Array of token IDs. */104size_t n_tokens; /**< Number of tokens in the input. */105} RKLLMTokenInput;106107/**108* @struct RKLLMMultiModelInput109* @brief Represents multimodal input (e.g., text and image).110*/111typedef struct {112char* prompt; /**< Text prompt input. */113float* image_embed; /**< Embedding of the image (of size n_image_tokens * n_image_embed). */114size_t n_image_tokens; /**< Number of image tokens. */115} RKLLMMultiModelInput;116117/**118* @struct RKLLMInput119* @brief Represents different types of input to the LLM via a union.120*/121typedef struct {122RKLLMInputType input_type; /**< Specifies the type of input provided (e.g., prompt, token, embed, multimodal). */123union {124const char* prompt_input; /**< Text prompt input if input_type is RKLLM_INPUT_PROMPT. */125RKLLMEmbedInput embed_input; /**< Embedding input if input_type is RKLLM_INPUT_EMBED. */126RKLLMTokenInput token_input; /**< Token input if input_type is RKLLM_INPUT_TOKEN. */127RKLLMMultiModelInput multimodal_input; /**< Multimodal input if input_type is RKLLM_INPUT_MULTIMODAL. */128};129} RKLLMInput;130131/**132* @struct RKLLMLoraParam133* @brief Structure defining parameters for Lora adapters.134*/135typedef struct {136const char* lora_adapter_name; /**< Name of the Lora adapter. */137} RKLLMLoraParam;138139/**140* @struct RKLLMPromptCacheParam141* @brief Structure to define parameters for caching prompts.142*/143typedef struct {144int save_prompt_cache; /**< Flag to indicate whether to save the prompt cache (0 = don't save, 1 = save). */145const char* prompt_cache_path; /**< Path to the prompt cache file. */146} RKLLMPromptCacheParam;147148/**149* @struct RKLLMInferParam150* @brief Structure for defining parameters during inference.151*/152typedef struct {153RKLLMInferMode mode; /**< Inference mode (e.g., generate or get last hidden layer). */154RKLLMLoraParam* lora_params; /**< Pointer to Lora adapter parameters. */155RKLLMPromptCacheParam* prompt_cache_params; /**< Pointer to prompt cache parameters. */156} RKLLMInferParam;157158/**159* @struct RKLLMResultLastHiddenLayer160* @brief Structure to hold the hidden states from the last layer.161*/162typedef struct {163const float* hidden_states; /**< Pointer to the hidden states (of size num_tokens * embd_size). */164int embd_size; /**< Size of the embedding vector. */165int num_tokens; /**< Number of tokens for which hidden states are stored. */166} RKLLMResultLastHiddenLayer;167168/**169* @struct RKLLMResult170* @brief Structure to represent the result of LLM inference.171*/172typedef struct {173const char* text; /**< Generated text result. */174int32_t token_id; /**< ID of the generated token. */175RKLLMResultLastHiddenLayer last_hidden_layer; /**< Hidden states of the last layer (if requested). */176} RKLLMResult;177178/**179* @typedef LLMResultCallback180* @brief Callback function to handle LLM results.181* @param result Pointer to the LLM result.182* @param userdata Pointer to user data for the callback.183* @param state State of the LLM call (e.g., finished, error).184*/185typedef void(*LLMResultCallback)(RKLLMResult* result, void* userdata, LLMCallState state);186187/**188* @brief Creates a default RKLLMParam structure with preset values.189* @return A default RKLLMParam structure.190*/191RKLLMParam rkllm_createDefaultParam();192193/**194* @brief Initializes the LLM with the given parameters.195* @param handle Pointer to the LLM handle.196* @param param Configuration parameters for the LLM.197* @param callback Callback function to handle LLM results.198* @return Status code (0 for success, non-zero for failure).199*/200int rkllm_init(LLMHandle* handle, RKLLMParam* param, LLMResultCallback callback);201202/**203* @brief Loads a Lora adapter into the LLM.204* @param handle LLM handle.205* @param lora_adapter Pointer to the Lora adapter structure.206* @return Status code (0 for success, non-zero for failure).207*/208int rkllm_load_lora(LLMHandle handle, RKLLMLoraAdapter* lora_adapter);209210/**211* @brief Loads a prompt cache from a file.212* @param handle LLM handle.213* @param prompt_cache_path Path to the prompt cache file.214* @return Status code (0 for success, non-zero for failure).215*/216int rkllm_load_prompt_cache(LLMHandle handle, const char* prompt_cache_path);217218/**219* @brief Releases the prompt cache from memory.220* @param handle LLM handle.221* @return Status code (0 for success, non-zero for failure).222*/223int rkllm_release_prompt_cache(LLMHandle handle);224225/**226* @brief Destroys the LLM instance and releases resources.227* @param handle LLM handle.228* @return Status code (0 for success, non-zero for failure).229*/230int rkllm_destroy(LLMHandle handle);231232/**233* @brief Runs an LLM inference task synchronously.234* @param handle LLM handle.235* @param rkllm_input Input data for the LLM.236* @param rkllm_infer_params Parameters for the inference task.237* @param userdata Pointer to user data for the callback.238* @return Status code (0 for success, non-zero for failure).239*/240int rkllm_run(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);241242/**243* @brief Runs an LLM inference task asynchronously.244* @param handle LLM handle.245* @param rkllm_input Input data for the LLM.246* @param rkllm_infer_params Parameters for the inference task.247* @param userdata Pointer to user data for the callback.248* @return Status code (0 for success, non-zero for failure).249*/250int rkllm_run_async(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);251252/**253* @brief Aborts an ongoing LLM task.254* @param handle LLM handle.255* @return Status code (0 for success, non-zero for failure).256*/257int rkllm_abort(LLMHandle handle);258259/**260* @brief Checks if an LLM task is currently running.261* @param handle LLM handle.262* @return Status code (0 if a task is running, non-zero for otherwise).263*/264int rkllm_is_running(LLMHandle handle);265266#ifdef __cplusplus267}268#endif269270#endif271272273