- Review OpenAI response.
All checks were successful
GBCI / build (push) Successful in 7m24s

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-09-24 17:07:21 -03:00
parent a7bf8353f0
commit 51a2141a00
3 changed files with 62 additions and 62 deletions

View file

@ -1,6 +1,7 @@
lxc stop pragmatismo-alm-ci lxc config device override $CONTAINER_NAME root
lxc export pragmatismo-alm-ci backup.tar.gz lxc config device set $CONTAINER_NAME root size 6GB
lxc delete pragmatismo-alm-ci
lxc import backup.tar.gz pragmatismo-alm-ci zpool set autoexpand=on default
zfs set refquota=15G default/containers/pragmatismo-alm-ci zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
lxc start pragmatismo-alm-ci zpool list
zfs list

View file

@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap(); let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
re.replace_all(body, "").to_string() re.replace_all(body, "").to_string()
} }
#[post("/v1/chat/completions")] #[post("/v1/chat/completions")]
pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> { pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
// Log raw POST data // Log raw POST data
@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
let endpoint = env::var("AI_ENDPOINT") let endpoint = env::var("AI_ENDPOINT")
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?; .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
// Clean the request body (remove unsupported parameters) // Parse and modify the request body
let cleaned_body_str = clean_request_body(body_str); let mut json_value: serde_json::Value = serde_json::from_str(body_str)
info!("Cleaned POST Data: {}", cleaned_body_str); .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
// Add model parameter
if let Some(obj) = json_value.as_object_mut() {
obj.insert("model".to_string(), serde_json::Value::String(model));
}
let modified_body_str = serde_json::to_string(&json_value)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
info!("Modified POST Data: {}", modified_body_str);
// Set up headers // Set up headers
let mut headers = reqwest::header::HeaderMap::new(); let mut headers = reqwest::header::HeaderMap::new();
@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
reqwest::header::HeaderValue::from_static("application/json"), reqwest::header::HeaderValue::from_static("application/json"),
); );
// After cleaning the request body, add the unused parameter // Send request to the AI provider
let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
// Add the unused parameter
json_value
.as_object_mut()
.unwrap()
.insert("model".to_string(), serde_json::Value::String(model));
// Serialize the modified JSON
let modified_body_str = serde_json::to_string(&json_value)
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
// Send request to the OpenAI-compatible provider
let client = Client::new(); let client = Client::new();
let response = client let response = client
.post(&endpoint) .post(&endpoint)
@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
info!("Provider response status: {}", status); info!("Provider response status: {}", status);
info!("Provider response body: {}", raw_response); info!("Provider response body: {}", raw_response);
// Convert Llama response to OpenAI format if successful // Convert response to OpenAI format if successful
if status.is_success() { if status.is_success() {
match convert_llama_to_openai_format(&raw_response) { match convert_to_openai_format(&raw_response) {
Ok(openai_response) => Ok(HttpResponse::Ok() Ok(openai_response) => Ok(HttpResponse::Ok()
.content_type("application/json") .content_type("application/json")
.body(openai_response)), .body(openai_response)),
Err(e) => { Err(e) => {
error!("Failed to convert response format: {}", e); error!("Failed to convert response format: {}", e);
Err(actix_web::error::ErrorInternalServerError( // Return the original response if conversion fails
"Response format conversion failed", Ok(HttpResponse::Ok()
)) .content_type("application/json")
.body(raw_response))
} }
} }
} else { } else {
// Return error as-is
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
Ok(HttpResponse::build(actix_status).body(raw_response)) Ok(HttpResponse::build(actix_status)
.content_type("application/json")
.body(raw_response))
} }
} }
/// Converts Llama response format to OpenAI-compatible format /// Converts provider response to OpenAI-compatible format
fn convert_llama_to_openai_format( fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
llama_response: &str,
) -> Result<String, Box<dyn std::error::Error>> {
#[derive(serde::Deserialize)] #[derive(serde::Deserialize)]
struct LlamaResponse { struct ProviderResponse {
text: String, text: String,
sources: Option<Vec<serde_json::Value>>, #[serde(default)]
generated_tokens: Option<u32>,
#[serde(default)]
input_tokens: Option<u32>,
}
#[derive(serde::Serialize)]
struct OpenAIResponse {
id: String,
object: String,
created: u64,
model: String,
choices: Vec<OpenAIChoice>,
usage: OpenAIUsage,
} }
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
total_tokens: u32, total_tokens: u32,
} }
#[derive(serde::Serialize)] // Parse the provider response
struct OpenAIResponse { let provider: ProviderResponse = serde_json::from_str(provider_response)?;
id: String,
object: String,
created: u64,
model: String,
choices: Vec<OpenAIChoice>,
usage: OpenAIUsage,
}
// Parse the Llama response let completion_tokens = provider
let llama: LlamaResponse = serde_json::from_str(llama_response)?; .generated_tokens
.unwrap_or_else(|| provider.text.split_whitespace().count() as u32);
// Prepare content and token counts without moving llama.text let prompt_tokens = provider.input_tokens.unwrap_or(0);
let content = llama.text.clone(); let total_tokens = prompt_tokens + completion_tokens;
let token_count = content.split_whitespace().count() as u32;
// Create OpenAI-compatible response
let openai_response = OpenAIResponse { let openai_response = OpenAIResponse {
id: format!( id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
"chatcmpl-{}",
uuid::Uuid::new_v4().to_string().replace("-", "")
),
object: "chat.completion".to_string(), object: "chat.completion".to_string(),
created: std::time::SystemTime::now() created: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
.unwrap() .unwrap()
.as_secs(), .as_secs(),
model: "llama".to_string(), // You might want to make this configurable model: "llama".to_string(),
choices: vec![OpenAIChoice { choices: vec![OpenAIChoice {
index: 0, index: 0,
message: OpenAIMessage { message: OpenAIMessage {
role: "assistant".to_string(), role: "assistant".to_string(),
content, content: provider.text,
}, },
finish_reason: "stop".to_string(), finish_reason: "stop".to_string(),
}], }],
usage: OpenAIUsage { usage: OpenAIUsage {
prompt_tokens: 0, // You might need to extract these from the Llama response prompt_tokens,
completion_tokens: token_count, completion_tokens,
total_tokens: token_count, total_tokens,
}, },
}; };

View file

@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
struct LlamaCppEmbeddingResponseItem { struct LlamaCppEmbeddingResponseItem {
pub index: usize, pub index: usize,
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays pub embedding: Vec<Vec<f32>>, // This is the up part - embedding is an array of arrays
} }
// Proxy endpoint for embeddings // Proxy endpoint for embeddings