Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
orangepi-xunlong
GitHub Repository: orangepi-xunlong/orangepi-build
Path: blob/next/external/packages/bsp/rk3588/usr/include/rkllm.h
18207 views
1
#ifndef _RKLLM_H_
2
#define _RKLLM_H_
3
4
#ifdef __cplusplus
5
extern "C" {
6
#endif
7
8
/**
9
* @typedef LLMHandle
10
* @brief A handle used to manage and interact with the large language model.
11
*/
12
typedef void* LLMHandle;
13
14
/**
15
* @enum LLMCallState
16
* @brief Describes the possible states of an LLM call.
17
*/
18
typedef enum {
19
RKLLM_RUN_NORMAL = 0, /**< The LLM call is in a normal running state. */
20
RKLLM_RUN_WAITING = 1, /**< The LLM call is waiting for complete UTF-8 encoded character. */
21
RKLLM_RUN_FINISH = 2, /**< The LLM call has finished execution. */
22
RKLLM_RUN_ERROR = 3, /**< An error occurred during the LLM call. */
23
RKLLM_RUN_GET_LAST_HIDDEN_LAYER = 4 /**< Retrieve the last hidden layer during inference. */
24
} LLMCallState;
25
26
/**
27
* @enum RKLLMInputType
28
* @brief Defines the types of inputs that can be fed into the LLM.
29
*/
30
typedef enum {
31
RKLLM_INPUT_PROMPT = 0, /**< Input is a text prompt. */
32
RKLLM_INPUT_TOKEN = 1, /**< Input is a sequence of tokens. */
33
RKLLM_INPUT_EMBED = 2, /**< Input is an embedding vector. */
34
RKLLM_INPUT_MULTIMODAL = 3, /**< Input is multimodal (e.g., text and image). */
35
} RKLLMInputType;
36
37
/**
38
* @enum RKLLMInferMode
39
* @brief Specifies the inference modes of the LLM.
40
*/
41
typedef enum {
42
RKLLM_INFER_GENERATE = 0, /**< The LLM generates text based on input. */
43
RKLLM_INFER_GET_LAST_HIDDEN_LAYER = 1, /**< The LLM retrieves the last hidden layer for further processing. */
44
} RKLLMInferMode;
45
46
/**
47
* @struct RKLLMExtendParam
48
* @brief The extend parameters for configuring an LLM instance.
49
*/
50
typedef struct {
51
int32_t base_domain_id; /**< base_domain_id */
52
uint8_t reserved[112]; /**< reserved */
53
} RKLLMExtendParam;
54
55
/**
56
* @struct RKLLMParam
57
* @brief Defines the parameters for configuring an LLM instance.
58
*/
59
typedef struct {
60
const char* model_path; /**< Path to the model file. */
61
int32_t max_context_len; /**< Maximum number of tokens in the context window. */
62
int32_t max_new_tokens; /**< Maximum number of new tokens to generate. */
63
int32_t top_k; /**< Top-K sampling parameter for token generation. */
64
float top_p; /**< Top-P (nucleus) sampling parameter. */
65
float temperature; /**< Sampling temperature, affecting the randomness of token selection. */
66
float repeat_penalty; /**< Penalty for repeating tokens in generation. */
67
float frequency_penalty; /**< Penalizes frequent tokens during generation. */
68
float presence_penalty; /**< Penalizes tokens based on their presence in the input. */
69
int32_t mirostat; /**< Mirostat sampling strategy flag (0 to disable). */
70
float mirostat_tau; /**< Tau parameter for Mirostat sampling. */
71
float mirostat_eta; /**< Eta parameter for Mirostat sampling. */
72
bool skip_special_token; /**< Whether to skip special tokens during generation. */
73
bool is_async; /**< Whether to run inference asynchronously. */
74
const char* img_start; /**< Starting position of an image in multimodal input. */
75
const char* img_end; /**< Ending position of an image in multimodal input. */
76
const char* img_content; /**< Pointer to the image content. */
77
RKLLMExtendParam extend_param; /**< Extend parameters. */
78
} RKLLMParam;
79
80
/**
81
* @struct RKLLMLoraAdapter
82
* @brief Defines parameters for a Lora adapter used in model fine-tuning.
83
*/
84
typedef struct {
85
const char* lora_adapter_path; /**< Path to the Lora adapter file. */
86
const char* lora_adapter_name; /**< Name of the Lora adapter. */
87
float scale; /**< Scaling factor for applying the Lora adapter. */
88
} RKLLMLoraAdapter;
89
90
/**
91
* @struct RKLLMEmbedInput
92
* @brief Represents an embedding input to the LLM.
93
*/
94
typedef struct {
95
float* embed; /**< Pointer to the embedding vector (of size n_tokens * n_embed). */
96
size_t n_tokens; /**< Number of tokens represented in the embedding. */
97
} RKLLMEmbedInput;
98
99
/**
100
* @struct RKLLMTokenInput
101
* @brief Represents token input to the LLM.
102
*/
103
typedef struct {
104
int32_t* input_ids; /**< Array of token IDs. */
105
size_t n_tokens; /**< Number of tokens in the input. */
106
} RKLLMTokenInput;
107
108
/**
109
* @struct RKLLMMultiModelInput
110
* @brief Represents multimodal input (e.g., text and image).
111
*/
112
typedef struct {
113
char* prompt; /**< Text prompt input. */
114
float* image_embed; /**< Embedding of the image (of size n_image_tokens * n_image_embed). */
115
size_t n_image_tokens; /**< Number of image tokens. */
116
} RKLLMMultiModelInput;
117
118
/**
119
* @struct RKLLMInput
120
* @brief Represents different types of input to the LLM via a union.
121
*/
122
typedef struct {
123
RKLLMInputType input_type; /**< Specifies the type of input provided (e.g., prompt, token, embed, multimodal). */
124
union {
125
const char* prompt_input; /**< Text prompt input if input_type is RKLLM_INPUT_PROMPT. */
126
RKLLMEmbedInput embed_input; /**< Embedding input if input_type is RKLLM_INPUT_EMBED. */
127
RKLLMTokenInput token_input; /**< Token input if input_type is RKLLM_INPUT_TOKEN. */
128
RKLLMMultiModelInput multimodal_input; /**< Multimodal input if input_type is RKLLM_INPUT_MULTIMODAL. */
129
};
130
} RKLLMInput;
131
132
/**
133
* @struct RKLLMLoraParam
134
* @brief Structure defining parameters for Lora adapters.
135
*/
136
typedef struct {
137
const char* lora_adapter_name; /**< Name of the Lora adapter. */
138
} RKLLMLoraParam;
139
140
/**
141
* @struct RKLLMPromptCacheParam
142
* @brief Structure to define parameters for caching prompts.
143
*/
144
typedef struct {
145
int save_prompt_cache; /**< Flag to indicate whether to save the prompt cache (0 = don't save, 1 = save). */
146
const char* prompt_cache_path; /**< Path to the prompt cache file. */
147
} RKLLMPromptCacheParam;
148
149
/**
150
* @struct RKLLMInferParam
151
* @brief Structure for defining parameters during inference.
152
*/
153
typedef struct {
154
RKLLMInferMode mode; /**< Inference mode (e.g., generate or get last hidden layer). */
155
RKLLMLoraParam* lora_params; /**< Pointer to Lora adapter parameters. */
156
RKLLMPromptCacheParam* prompt_cache_params; /**< Pointer to prompt cache parameters. */
157
} RKLLMInferParam;
158
159
/**
160
* @struct RKLLMResultLastHiddenLayer
161
* @brief Structure to hold the hidden states from the last layer.
162
*/
163
typedef struct {
164
const float* hidden_states; /**< Pointer to the hidden states (of size num_tokens * embd_size). */
165
int embd_size; /**< Size of the embedding vector. */
166
int num_tokens; /**< Number of tokens for which hidden states are stored. */
167
} RKLLMResultLastHiddenLayer;
168
169
/**
170
* @struct RKLLMResult
171
* @brief Structure to represent the result of LLM inference.
172
*/
173
typedef struct {
174
const char* text; /**< Generated text result. */
175
int32_t token_id; /**< ID of the generated token. */
176
RKLLMResultLastHiddenLayer last_hidden_layer; /**< Hidden states of the last layer (if requested). */
177
} RKLLMResult;
178
179
/**
180
* @typedef LLMResultCallback
181
* @brief Callback function to handle LLM results.
182
* @param result Pointer to the LLM result.
183
* @param userdata Pointer to user data for the callback.
184
* @param state State of the LLM call (e.g., finished, error).
185
*/
186
typedef void(*LLMResultCallback)(RKLLMResult* result, void* userdata, LLMCallState state);
187
188
/**
189
* @brief Creates a default RKLLMParam structure with preset values.
190
* @return A default RKLLMParam structure.
191
*/
192
RKLLMParam rkllm_createDefaultParam();
193
194
/**
195
* @brief Initializes the LLM with the given parameters.
196
* @param handle Pointer to the LLM handle.
197
* @param param Configuration parameters for the LLM.
198
* @param callback Callback function to handle LLM results.
199
* @return Status code (0 for success, non-zero for failure).
200
*/
201
int rkllm_init(LLMHandle* handle, RKLLMParam* param, LLMResultCallback callback);
202
203
/**
204
* @brief Loads a Lora adapter into the LLM.
205
* @param handle LLM handle.
206
* @param lora_adapter Pointer to the Lora adapter structure.
207
* @return Status code (0 for success, non-zero for failure).
208
*/
209
int rkllm_load_lora(LLMHandle handle, RKLLMLoraAdapter* lora_adapter);
210
211
/**
212
* @brief Loads a prompt cache from a file.
213
* @param handle LLM handle.
214
* @param prompt_cache_path Path to the prompt cache file.
215
* @return Status code (0 for success, non-zero for failure).
216
*/
217
int rkllm_load_prompt_cache(LLMHandle handle, const char* prompt_cache_path);
218
219
/**
220
* @brief Releases the prompt cache from memory.
221
* @param handle LLM handle.
222
* @return Status code (0 for success, non-zero for failure).
223
*/
224
int rkllm_release_prompt_cache(LLMHandle handle);
225
226
/**
227
* @brief Destroys the LLM instance and releases resources.
228
* @param handle LLM handle.
229
* @return Status code (0 for success, non-zero for failure).
230
*/
231
int rkllm_destroy(LLMHandle handle);
232
233
/**
234
* @brief Runs an LLM inference task synchronously.
235
* @param handle LLM handle.
236
* @param rkllm_input Input data for the LLM.
237
* @param rkllm_infer_params Parameters for the inference task.
238
* @param userdata Pointer to user data for the callback.
239
* @return Status code (0 for success, non-zero for failure).
240
*/
241
int rkllm_run(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);
242
243
/**
244
* @brief Runs an LLM inference task asynchronously.
245
* @param handle LLM handle.
246
* @param rkllm_input Input data for the LLM.
247
* @param rkllm_infer_params Parameters for the inference task.
248
* @param userdata Pointer to user data for the callback.
249
* @return Status code (0 for success, non-zero for failure).
250
*/
251
int rkllm_run_async(LLMHandle handle, RKLLMInput* rkllm_input, RKLLMInferParam* rkllm_infer_params, void* userdata);
252
253
/**
254
* @brief Aborts an ongoing LLM task.
255
* @param handle LLM handle.
256
* @return Status code (0 for success, non-zero for failure).
257
*/
258
int rkllm_abort(LLMHandle handle);
259
260
/**
261
* @brief Checks if an LLM task is currently running.
262
* @param handle LLM handle.
263
* @return Status code (0 if a task is running, non-zero for otherwise).
264
*/
265
int rkllm_is_running(LLMHandle handle);
266
267
#ifdef __cplusplus
268
}
269
#endif
270
271
#endif
272
273