grafana/public/app/features/dashboard/components/GenAI/llms/openai.ts

387 lines
14 KiB
TypeScript

/**
* OpenAI API client.
*
* This module contains functions used to make requests to the OpenAI API via
* the Grafana LLM app plugin. That plugin must be installed, enabled and configured
* in order for these functions to work.
*
* The {@link enabled} function can be used to check if the plugin is enabled and configured.
*/
import { pipe, Observable, UnaryFunction } from 'rxjs';
import { filter, map, scan, takeWhile, tap } from 'rxjs/operators';
import {
isLiveChannelMessageEvent,
LiveChannelAddress,
LiveChannelMessageEvent,
LiveChannelScope,
} from '@grafana/data';
import { getBackendSrv, getGrafanaLiveSrv, logDebug } from '@grafana/runtime';
import { LLM_PLUGIN_ID, LLM_PLUGIN_ROUTE, setLLMPluginVersion } from './constants';
import { LLMAppSettings } from './types';
const OPENAI_CHAT_COMPLETIONS_PATH = 'openai/v1/chat/completions';
/** The role of a message's author. */
export type Role = 'system' | 'user' | 'assistant' | 'function';
/** A message in a conversation. */
export interface Message {
/** The role of the message's author. */
role: Role;
/** The contents of the message. content is required for all messages, and may be null for assistant messages with function calls. */
content: string;
/**
* The name of the author of this message.
*
* This is required if role is 'function', and it should be the name of the function whose response is in the content.
*
* May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
*/
name?: string;
/**
* The name and arguments of a function that should be called, as generated by the model.
*/
function_call?: Object;
}
/** A function the model may generate JSON inputs for. */
export interface Function {
/**
* The name of the function to be called.
*
* Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
*/
name: string;
/**
* A description of what the function does, used by the model to choose when and how to call the function.
*/
description?: string;
/*
* The parameters the functions accepts, described as a JSON Schema object. See the OpenAI guide for examples, and the JSON Schema reference for documentation about the format.
*
* To describe a function that accepts no parameters, provide the value {"type": "object", "properties": {}}.
*/
parameters: Object;
}
export interface ChatCompletionsRequest {
/**
* ID of the model to use.
*
* See the model endpoint compatibility table for details on which models work with the Chat Completions API.
*/
model: string;
/** A list of messages comprising the conversation so far. */
messages: Message[];
/** A list of functions the model may generate JSON inputs for. */
functions?: Function[];
/**
* Controls how the model responds to function calls.
*
* "none" means the model does not call a function, and responds to the end-user.
* "auto" means the model can pick between an end-user or calling a function.
* Specifying a particular function via {"name": "my_function"} forces the model to call that function.
*
* "none" is the default when no functions are present. "auto" is the default if functions are present.
*/
function_call?: 'none' | 'auto' | { name: string };
/**
* What sampling temperature to use, between 0 and 2.
* Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
*
* We generally recommend altering this or top_p but not both.
*/
temperature?: number;
/**
* An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
* So 0.1 means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number;
/**
* How many chat completion choices to generate for each input message.
*/
n?: number;
/**
* Up to 4 sequences where the API will stop generating further tokens.
*/
stop?: string | string[];
/**
* The maximum number of tokens to generate in the chat completion.
*
* The total length of input tokens and generated tokens is limited by the model's context length. Example Python code for counting tokens.
*/
max_tokens?: number;
/**
* Number between -2.0 and 2.0.
*
* Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
*/
presence_penalty?: number;
/**
* Number between -2.0 and 2.0.
*
* Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
*/
frequency_penalty?: number;
/**
* Modify the likelihood of specified tokens appearing in the completion.
*
* Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.
* Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,
* but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban
* or exclusive selection of the relevant token.
*/
logit_bias?: { [key: string]: number };
/**
* A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
*/
user?: string;
}
/** A completion object from an OpenAI model. */
export interface Choice {
/** The message object generated by the model. */
message: Message;
/**
* The reason the model stopped generating text.
*
* This may be one of:
* - stop: API returned complete message, or a message terminated by one of the stop sequences provided via the stop parameter
* - length: incomplete model output due to max_tokens parameter or token limit
* - function_call: the model decided to call a function
* - content_filter: omitted content due to a flag from our content filters
* - null: API response still in progress or incomplete
*/
finish_reason: string;
/** The index of the completion in the list of choices. */
index: number;
}
/** The usage statistics for a request to OpenAPI. */
export interface Usage {
/** The number of tokens in the prompt. */
prompt_tokens: number;
/** The number of tokens in the completion. */
completion_tokens: number;
/** The total number of tokens. */
total_tokens: number;
}
/** The error response from the Grafana LLM app when trying to call the chat completions API. */
interface ChatCompletionsErrorResponse {
/** The error message. */
error: string;
}
/** A response from the OpenAI Chat Completions API. */
export interface ChatCompletionsResponse<T = Choice> {
/** The ID of the request. */
id: string;
/** The type of object returned (e.g. 'chat.completion'). */
object: string;
/** The timestamp of the request, as a UNIX timestamp. */
created: number;
/** The name of the model used to generate the response. */
model: string;
/** A list of completion objects (only one, unless `n > 1` in the request). */
choices: T[];
/** The number of tokens used to generate the replies, counting prompt, completion, and total. */
usage: Usage;
}
/** A content message returned from the model. */
export interface ContentMessage {
/** The content of the message. */
content: string;
}
/** A message returned from the model indicating that it is done. */
export interface DoneMessage {
done: boolean;
}
/** A function call message returned from the model. */
export interface FunctionCallMessage {
/** The name of the function to call. */
name: string;
/** JSON string for the arguments to the function call. */
arguments: string;
}
/**
* A delta returned from a stream of chat completion responses.
*
* In practice this will be either a content message or a function call;
* done messages are filtered out by the `streamChatCompletions` function.
*/
export type ChatCompletionsDelta = ContentMessage | FunctionCallMessage | DoneMessage;
/** A chunk included in a chat completion response. */
export interface ChatCompletionsChunk {
/** The delta since the previous chunk. */
delta: ChatCompletionsDelta;
}
/** Return true if the message is a 'content' message. */
export function isContentMessage(message: unknown): message is ContentMessage {
return typeof message === 'object' && message !== null && 'content' in message;
}
/** Return true if the message is a 'done' message. */
export function isDoneMessage(message: unknown): message is DoneMessage {
return typeof message === 'object' && message !== null && 'done' in message;
}
/** Return true if the response is an error response. */
export function isErrorResponse(response: unknown): response is ChatCompletionsErrorResponse {
return typeof response === 'object' && response !== null && 'error' in response;
}
/**
* An rxjs operator that extracts the content messages from a stream of chat completion responses.
*
* @returns An observable that emits the content messages. Each emission will be a string containing the
* token emitted by the model.
* @example <caption>Example of reading all tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(extractContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', '? ', 'How ', 'are ', 'you', '?']
*/
export function extractContent(): UnaryFunction<
Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
Observable<string>
> {
return pipe(
filter((response: ChatCompletionsResponse<ChatCompletionsChunk>) => isContentMessage(response.choices[0].delta)),
// The type assertion is needed here because the type predicate above doesn't seem to propagate.
map(
(response: ChatCompletionsResponse<ChatCompletionsChunk>) => (response.choices[0].delta as ContentMessage).content
)
);
}
/**
* An rxjs operator that accumulates the content messages from a stream of chat completion responses.
*
* @returns An observable that emits the accumulated content messages. Each emission will be a string containing the
* content of all messages received so far.
* @example
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(accumulateContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
*/
export function accumulateContent(): UnaryFunction<
Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
Observable<string>
> {
return pipe(
extractContent(),
scan((acc, curr) => acc + curr, '')
);
}
/**
* Make a request to OpenAI's chat-completions API via the Grafana LLM plugin proxy.
*/
export async function chatCompletions(request: ChatCompletionsRequest): Promise<ChatCompletionsResponse> {
const response = await getBackendSrv().post<ChatCompletionsResponse>(
'/api/plugins/grafana-llm-app/resources/openai/v1/chat/completions',
request,
{
headers: { 'Content-Type': 'application/json' },
}
);
return response;
}
/**
* Make a streaming request to OpenAI's chat-completions API via the Grafana LLM plugin proxy.
*
* A stream of tokens will be returned as an `Observable<string>`. Use the `extractContent` operator to
* filter the stream to only content messages, or the `accumulateContent` operator to obtain a stream of
* accumulated content messages.
*
* The 'done' message will not be emitted; the stream will simply end when this message is encountered.
*
* @example <caption>Example of reading all tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(extractContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', '? ', 'How ', 'are ', 'you', '?']
*
* @example <caption>Example of accumulating tokens in a stream.</caption>
* const stream = streamChatCompletions({ model: 'gpt-3.5-turbo', messages: [
* { role: 'system', content: 'You are a great bot.' },
* { role: 'user', content: 'Hello, bot.' },
* ]}).pipe(accumulateContent());
* stream.subscribe({ next: console.log, error: console.error });
* // Output:
* // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
*/
export function streamChatCompletions(
request: ChatCompletionsRequest
): Observable<ChatCompletionsResponse<ChatCompletionsChunk>> {
const channel: LiveChannelAddress = {
scope: LiveChannelScope.Plugin,
namespace: LLM_PLUGIN_ID,
path: OPENAI_CHAT_COMPLETIONS_PATH + '/' + self.crypto.randomUUID(),
data: request,
};
const messages = getGrafanaLiveSrv()
.getStream(channel)
.pipe(filter((event) => isLiveChannelMessageEvent(event))) as Observable<
LiveChannelMessageEvent<ChatCompletionsResponse<ChatCompletionsChunk>>
>;
return messages.pipe(
tap((event) => {
if (isErrorResponse(event.message)) {
throw new Error(event.message.error);
}
}),
takeWhile((event) => isErrorResponse(event.message) || !isDoneMessage(event.message.choices[0].delta)),
map((event) => event.message)
);
}
let loggedWarning = false;
/** Check if the OpenAI API is enabled via the LLM plugin. */
export const enabled = async () => {
try {
const settings: LLMAppSettings = await getBackendSrv().get(`${LLM_PLUGIN_ROUTE}/settings`, undefined, undefined, {
showSuccessAlert: false,
showErrorAlert: false,
});
setLLMPluginVersion(settings.info.version);
return settings.enabled ?? false;
} catch (e) {
if (!loggedWarning) {
logDebug(String(e));
logDebug(
'Failed to check if OpenAI is enabled. This is expected if the Grafana LLM plugin is not installed, and the above error can be ignored.'
);
loggedWarning = true;
}
return false;
}
};