CoCalc -- rkllm.h

GitHub Repository: orangepi-xunlong/orangepi-build
Path: blob/next/external/packages/bsp/rk3588/usr/include/rkllm.h
²²⁷⁵⁷ views
1
#ifndef _RKLLM_H_
2
#define _RKLLM_H_
3

4
#ifdef __cplusplus
5
extern "C" {
6
#endif
7

8
/**
9
 * @typedef LLMHandle
10
 * @brief A handle used to manage and interact with the large language model.
11
 */
12
typedef void* LLMHandle;
13

14
/**
15
 * @enum LLMCallState
16
 * @brief Describes the possible states of an LLM call.
17
 */
18
typedef enum {
19
    RKLLM_RUN_NORMAL  = 0, /**< The LLM call is in a normal running state. */
20
    RKLLM_RUN_WAITING = 1, /**< The LLM call is waiting for complete UTF-8 encoded character. */
21
    RKLLM_RUN_FINISH  = 2, /**< The LLM call has finished execution. */
22
    RKLLM_RUN_ERROR   = 3, /**< An error occurred during the LLM call. */
23
    RKLLM_RUN_GET_LAST_HIDDEN_LAYER = 4 /**< Retrieve the last hidden layer during inference. */
24
} LLMCallState;
25

26
/**
27
 * @enum RKLLMInputType
28
 * @brief Defines the types of inputs that can be fed into the LLM.
29
 */
30
typedef enum {
31
    RKLLM_INPUT_PROMPT      = 0, /**< Input is a text prompt. */
32
    RKLLM_INPUT_TOKEN       = 1, /**< Input is a sequence of tokens. */
33
    RKLLM_INPUT_EMBED       = 2, /**< Input is an embedding vector. */
34
    RKLLM_INPUT_MULTIMODAL  = 3, /**< Input is multimodal (e.g., text and image). */
35
} RKLLMInputType;
36

37
/**
38
 * @enum RKLLMInferMode
39
 * @brief Specifies the inference modes of the LLM.
40
 */
41
typedef enum {
42
    RKLLM_INFER_GENERATE                    = 0, /**< The LLM generates text based on input. */
43
    RKLLM_INFER_GET_LAST_HIDDEN_LAYER       = 1, /**< The LLM retrieves the last hidden layer for further processing. */
44
} RKLLMInferMode;
45

46
/**
47
 * @struct RKLLMExtendParam
48
 * @brief The extend parameters for configuring an LLM instance.
49
 */
50
typedef struct {
51
    int32_t      base_domain_id;   /**< base_domain_id */
52
    uint8_t      reserved[112];    /**< reserved */
53
} RKLLMExtendParam;
54

55
/**
56
 * @struct RKLLMParam
57
 * @brief Defines the parameters for configuring an LLM instance.
58
 */
59
typedef struct {
60
    const char* model_path;         /**< Path to the model file. */
61
    int32_t max_context_len;        /**< Maximum number of tokens in the context window. */
62
    int32_t max_new_tokens;         /**< Maximum number of new tokens to generate. */
63
    int32_t top_k;                  /**< Top-K sampling parameter for token generation. */
64
    float top_p;                    /**< Top-P (nucleus) sampling parameter. */
65
    float temperature;              /**< Sampling temperature, affecting the randomness of token selection. */
66
    float repeat_penalty;           /**< Penalty for repeating tokens in generation. */
67
    float frequency_penalty;        /**< Penalizes frequent tokens during generation. */
68
    float presence_penalty;         /**< Penalizes tokens based on their presence in the input. */
69
    int32_t mirostat;               /**< Mirostat sampling strategy flag (0 to disable). */
70
    float mirostat_tau;             /**< Tau parameter for Mirostat sampling. */
71
    float mirostat_eta;             /**< Eta parameter for Mirostat sampling. */
72
    bool skip_special_token;        /**< Whether to skip special tokens during generation. */
73
    bool is_async;                  /**< Whether to run inference asynchronously. */
74
    const char* img_start;          /**< Starting position of an image in multimodal input. */
75
    const char* img_end;            /**< Ending position of an image in multimodal input. */
76
    const char* img_content;        /**< Pointer to the image content. */
77
    RKLLMExtendParam extend_param; /**< Extend parameters. */
78
} RKLLMParam;
79

80
/**
81
 * @struct RKLLMLoraAdapter
82
 * @brief Defines parameters for a Lora adapter used in model fine-tuning.
83
 */
84
typedef struct {
85
    const char* lora_adapter_path; /**< Path to the Lora adapter file. */
86
    const char* lora_adapter_name; /**< Name of the Lora adapter. */
87
    float scale;                   /**< Scaling factor for applying the Lora adapter. */
88
} RKLLMLoraAdapter;
89

90
/**
91
 * @struct RKLLMEmbedInput
92
 * @brief Represents an embedding input to the LLM.
93
 */
94
typedef struct {
95
    float* embed;      /**< Pointer to the embedding vector (of size n_tokens * n_embed). */
96
    size_t n_tokens;   /**< Number of tokens represented in the embedding. */
97
} RKLLMEmbedInput;
98

99
/**
100
 * @struct RKLLMTokenInput
101
 * @brief Represents token input to the LLM.
102
 */
103
typedef struct {
104
    int32_t* input_ids; /**< Array of token IDs. */
105
    size_t n_tokens;    /**< Number of tokens in the input. */
106
} RKLLMTokenInput;
107

108
/**
109
 * @struct RKLLMMultiModelInput
110
 * @brief Represents multimodal input (e.g., text and image).
111
 */
112
typedef struct {
113
    char* prompt;           /**< Text prompt input. */
114
    float* image_embed;     /**< Embedding of the image (of size n_image_tokens * n_image_embed). */
115
    size_t n_image_tokens;  /**< Number of image tokens. */
116
} RKLLMMultiModelInput;
117

118
/**
119
 * @struct RKLLMInput
120
 * @brief Represents different types of input to the LLM via a union.
121
 */
122
typedef struct {
123
    RKLLMInputType input_type; /**< Specifies the type of input provided (e.g., prompt, token, embed, multimodal). */
124
    union {
125
        const char* prompt_input;               /**< Text prompt input if input_type is RKLLM_INPUT_PROMPT. */
126
        RKLLMEmbedInput embed_input;            /**< Embedding input if input_type is RKLLM_INPUT_EMBED. */
127
        RKLLMTokenInput token_input;            /**< Token input if input_type is RKLLM_INPUT_TOKEN. */
128
        RKLLMMultiModelInput multimodal_input;  /**< Multimodal input if input_type is RKLLM_INPUT_MULTIMODAL. */
129
    };
130
} RKLLMInput;
131

132
/**
133
 * @struct RKLLMLoraParam
134
 * @brief Structure defining parameters for Lora adapters.
135
 */
136
typedef struct {
137
    const char* lora_adapter_name; /**< Name of the Lora adapter. */
138
} RKLLMLoraParam;
139

140
/**
141
 * @struct RKLLMPromptCacheParam
142
 * @brief Structure to define parameters for caching prompts.
143
 */
144
typedef struct {
145
    int save_prompt_cache;          /**< Flag to indicate whether to save the prompt cache (0 = don't save, 1 = save). */
146
    const char* prompt_cache_path;  /**< Path to the prompt cache file. */
147
} RKLLMPromptCacheParam;
148

149
/**
150
 * @struct RKLLMInferParam
151
 * @brief Structure for defining parameters during inference.
152
 */
153
typedef struct {
154
    RKLLMInferMode mode;                    /**< Inference mode (e.g., generate or get last hidden layer). */
155
    RKLLMLoraParam* lora_params;            /**< Pointer to Lora adapter parameters. */
156
    RKLLMPromptCacheParam* prompt_cache_params; /**< Pointer to prompt cache parameters. */
157
} RKLLMInferParam;
158

159
/**
160
 * @struct RKLLMResultLastHiddenLayer
161
 * @brief Structure to hold the hidden states from the last layer.
162
 */
163
typedef struct {
164
    const float* hidden_states; /**< Pointer to the hidden states (of size num_tokens * embd_size). */
165
    int embd_size;              /**< Size of the embedding vector. */
166
    int num_tokens;             /**< Number of tokens for which hidden states are stored. */
167
} RKLLMResultLastHiddenLayer;
168

169
/**
170
 * @struct RKLLMResult
171
 * @brief Structure to represent the result of LLM inference.
172
 */
173
typedef struct {
174
    const char* text;                        /**< Generated text result. */
175
    int32_t token_id;                        /**< ID of the generated token. */
176
    RKLLMResultLastHiddenLayer last_hidden_layer; /**< Hidden states of the last layer (if requested). */
177
} RKLLMResult;
178

179
/**
180
 * @typedef LLMResultCallback
181
 * @brief Callback function to handle LLM results.
182
 * @param result Pointer to the LLM result.
183
 * @param userdata Pointer to user data for the callback.
184
 * @param state State of the LLM call (e.g., finished, error).
185
 */
186
typedef void(*LLMResultCallback)(RKLLMResult* result, void* userdata, LLMCallState state);
187

188
/**
189
 * @brief Creates a default RKLLMParam structure with preset values.
190
 * @return A default RKLLMParam structure.
191
 */
192
RKLLMParam rkllm_createDefaultParam();
193

194
/**
195
 * @brief Initializes the LLM with the given parameters.
196
 * @param handle Pointer to the LLM handle.
197
 * @param param Configuration parameters for the LLM.
198
 * @param callback Callback function to handle LLM results.
199
 * @return Status code (0 for success, non-zero for failure).
200
 */
201
int rkllm_init(LLMHandle* handle, RKLLMParam* param, LLMResultCallback callback);
202

203
/**
204
 * @brief Loads a Lora adapter into the LLM.
205
 * @param handle LLM handle.
206
 * @param lora_adapter Pointer to the Lora adapter structure.
207
 * @return Status code (0 for success, non-zero for failure).
208
 */
209
int rkllm_load_lora(LLMHandle handle, RKLLMLoraAdapter* lora_adapter);
210

211
/**
212
 * @brief Loads a prompt cache from a file.
213
 * @param handle LLM handle.
214
 * @param prompt_cache_path Path to the prompt cache file.
215
 * @return Status code (0 for success, non-zero for failure).
216
 */
217
int rkllm_load_prompt_cache(LLMHandle handle, const char* prompt_cache_path);
218

219
/**
220
 * @brief Releases the prompt cache from memory.
221
 * @param handle LLM handle.
222
 * @return Status code (0 for success, non-zero for failure).
223
 */
224
int rkllm_release_prompt_cache(LLMHandle handle);
225

226
/**
227
 * @brief Destroys the LLM instance and releases resources.
228
 * @param handle LLM handle.
229
 * @return Status code (0 for success, non-zero for failure).
230
 */
231
int rkllm_destroy(LLMHandle handle);
232

233
/**
234
 * @brief Runs an LLM inference task synchronously.
235
 * @param handle LLM handle.
236
 * @param rkllm_input Input data for the LLM.
237
 * @param rkllm_infer_params Parameters for the inference task.
238
 * @param userdata Pointer to user data for the callback.
239
 * @return Status code (0 for success, non-zero for failure).
240
 */
241
int rkllm_run(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);
242

243
/**
244
 * @brief Runs an LLM inference task asynchronously.
245
 * @param handle LLM handle.
246
 * @param rkllm_input Input data for the LLM.
247
 * @param rkllm_infer_params Parameters for the inference task.
248
 * @param userdata Pointer to user data for the callback.
249
 * @return Status code (0 for success, non-zero for failure).
250
 */
251
int rkllm_run_async(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);
252

253
/**
254
 * @brief Aborts an ongoing LLM task.
255
 * @param handle LLM handle.
256
 * @return Status code (0 for success, non-zero for failure).
257
 */
258
int rkllm_abort(LLMHandle handle);
259

260
/**
261
 * @brief Checks if an LLM task is currently running.
262
 * @param handle LLM handle.
263
 * @return Status code (0 if a task is running, non-zero for otherwise).
264
 */
265
int rkllm_is_running(LLMHandle handle);
266

267
#ifdef __cplusplus
268
}
269
#endif
270

271
#endif
272

273
Product

Resources

Company