- Review OpenAI response.
All checks were successful
GBCI / build (push) Successful in 7m24s

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-09-24 17:07:21 -03:00
parent a7bf8353f0
commit 51a2141a00
3 changed files with 62 additions and 62 deletions

View file

@ -1,6 +1,7 @@
lxc stop pragmatismo-alm-ci
lxc export pragmatismo-alm-ci backup.tar.gz
lxc delete pragmatismo-alm-ci
lxc import backup.tar.gz pragmatismo-alm-ci
zfs set refquota=15G default/containers/pragmatismo-alm-ci
lxc start pragmatismo-alm-ci
lxc config device override $CONTAINER_NAME root
lxc config device set $CONTAINER_NAME root size 6GB
zpool set autoexpand=on default
zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
zpool list
zfs list

View file

@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
re.replace_all(body, "").to_string()
}
#[post("/v1/chat/completions")]
pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
// Log raw POST data
@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
let endpoint = env::var("AI_ENDPOINT")
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
// Clean the request body (remove unsupported parameters)
let cleaned_body_str = clean_request_body(body_str);
info!("Cleaned POST Data: {}", cleaned_body_str);
// Parse and modify the request body
let mut json_value: serde_json::Value = serde_json::from_str(body_str)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
// Add model parameter
if let Some(obj) = json_value.as_object_mut() {
obj.insert("model".to_string(), serde_json::Value::String(model));
}
let modified_body_str = serde_json::to_string(&json_value)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
info!("Modified POST Data: {}", modified_body_str);
// Set up headers
let mut headers = reqwest::header::HeaderMap::new();
@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
reqwest::header::HeaderValue::from_static("application/json"),
);
// After cleaning the request body, add the unused parameter
let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
// Add the unused parameter
json_value
.as_object_mut()
.unwrap()
.insert("model".to_string(), serde_json::Value::String(model));
// Serialize the modified JSON
let modified_body_str = serde_json::to_string(&json_value)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
// Send request to the OpenAI-compatible provider
// Send request to the AI provider
let client = Client::new();
let response = client
.post(&endpoint)
@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
info!("Provider response status: {}", status);
info!("Provider response body: {}", raw_response);
// Convert Llama response to OpenAI format if successful
// Convert response to OpenAI format if successful
if status.is_success() {
match convert_llama_to_openai_format(&raw_response) {
match convert_to_openai_format(&raw_response) {
Ok(openai_response) => Ok(HttpResponse::Ok()
.content_type("application/json")
.body(openai_response)),
Err(e) => {
error!("Failed to convert response format: {}", e);
Err(actix_web::error::ErrorInternalServerError(
"Response format conversion failed",
))
// Return the original response if conversion fails
Ok(HttpResponse::Ok()
.content_type("application/json")
.body(raw_response))
}
}
} else {
// Return error as-is
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
Ok(HttpResponse::build(actix_status).body(raw_response))
Ok(HttpResponse::build(actix_status)
.content_type("application/json")
.body(raw_response))
}
}
/// Converts Llama response format to OpenAI-compatible format
fn convert_llama_to_openai_format(
llama_response: &str,
) -> Result<String, Box<dyn std::error::Error>> {
/// Converts provider response to OpenAI-compatible format
fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
#[derive(serde::Deserialize)]
struct LlamaResponse {
struct ProviderResponse {
text: String,
sources: Option<Vec<serde_json::Value>>,
#[serde(default)]
generated_tokens: Option<u32>,
#[serde(default)]
input_tokens: Option<u32>,
}
#[derive(serde::Serialize)]
struct OpenAIResponse {
id: String,
object: String,
created: u64,
model: String,
choices: Vec<OpenAIChoice>,
usage: OpenAIUsage,
}
#[derive(serde::Serialize)]
@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
total_tokens: u32,
}
#[derive(serde::Serialize)]
struct OpenAIResponse {
id: String,
object: String,
created: u64,
model: String,
choices: Vec<OpenAIChoice>,
usage: OpenAIUsage,
}
// Parse the provider response
let provider: ProviderResponse = serde_json::from_str(provider_response)?;
// Parse the Llama response
let llama: LlamaResponse = serde_json::from_str(llama_response)?;
let completion_tokens = provider
.generated_tokens
.unwrap_or_else(|| provider.text.split_whitespace().count() as u32);
// Prepare content and token counts without moving llama.text
let content = llama.text.clone();
let token_count = content.split_whitespace().count() as u32;
let prompt_tokens = provider.input_tokens.unwrap_or(0);
let total_tokens = prompt_tokens + completion_tokens;
// Create OpenAI-compatible response
let openai_response = OpenAIResponse {
id: format!(
"chatcmpl-{}",
uuid::Uuid::new_v4().to_string().replace("-", "")
),
id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
object: "chat.completion".to_string(),
created: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
model: "llama".to_string(), // You might want to make this configurable
model: "llama".to_string(),
choices: vec![OpenAIChoice {
index: 0,
message: OpenAIMessage {
role: "assistant".to_string(),
content,
content: provider.text,
},
finish_reason: "stop".to_string(),
}],
usage: OpenAIUsage {
prompt_tokens: 0, // You might need to extract these from the Llama response
completion_tokens: token_count,
total_tokens: token_count,
prompt_tokens,
completion_tokens,
total_tokens,
},
};

View file

@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
#[derive(Debug, Deserialize)]
struct LlamaCppEmbeddingResponseItem {
pub index: usize,
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
pub embedding: Vec<Vec<f32>>, // This is the up part - embedding is an array of arrays
}
// Proxy endpoint for embeddings