- Review response from LLM generic.
Some checks failed
GBCI / build (push) Failing after 1m40s

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-09-24 08:27:09 -03:00
parent 4e762a1a5d
commit a7bf8353f0
2 changed files with 96 additions and 13 deletions

View file

@ -1,10 +1,6 @@
export container="pragmatismo-alm-ci" lxc stop pragmatismo-alm-ci
lxc stop "$container" lxc export pragmatismo-alm-ci backup.tar.gz
lxc delete pragmatismo-alm-ci
lxc config device override "$container" root size=15GB lxc import backup.tar.gz pragmatismo-alm-ci
lxc config device set "$container" root size=15GB zfs set refquota=15G default/containers/pragmatismo-alm-ci
lxc start "$container" lxc start pragmatismo-alm-ci
ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1)
lxc exec "$container" -- growpart "$(dirname "$ROOT_DEV")" "$(basename "$ROOT_DEV")"
lxc exec "$container" -- resize2fs "$ROOT_DEV"

View file

@ -1,4 +1,4 @@
use log::info; use log::{error, info};
use actix_web::{post, web, HttpRequest, HttpResponse, Result}; use actix_web::{post, web, HttpRequest, HttpResponse, Result};
use dotenv::dotenv; use dotenv::dotenv;
@ -108,9 +108,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
info!("Provider response status: {}", status); info!("Provider response status: {}", status);
info!("Provider response body: {}", raw_response); info!("Provider response body: {}", raw_response);
// Return the response with appropriate status code // Convert Llama response to OpenAI format if successful
if status.is_success() { if status.is_success() {
Ok(HttpResponse::Ok().body(raw_response)) match convert_llama_to_openai_format(&raw_response) {
Ok(openai_response) => Ok(HttpResponse::Ok()
.content_type("application/json")
.body(openai_response)),
Err(e) => {
error!("Failed to convert response format: {}", e);
Err(actix_web::error::ErrorInternalServerError(
"Response format conversion failed",
))
}
}
} else { } else {
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
@ -118,3 +128,80 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
Ok(HttpResponse::build(actix_status).body(raw_response)) Ok(HttpResponse::build(actix_status).body(raw_response))
} }
} }
/// Converts Llama response format to OpenAI-compatible format
fn convert_llama_to_openai_format(
llama_response: &str,
) -> Result<String, Box<dyn std::error::Error>> {
#[derive(serde::Deserialize)]
struct LlamaResponse {
text: String,
sources: Option<Vec<serde_json::Value>>,
}
#[derive(serde::Serialize)]
struct OpenAIChoice {
index: u32,
message: OpenAIMessage,
finish_reason: String,
}
#[derive(serde::Serialize)]
struct OpenAIMessage {
role: String,
content: String,
}
#[derive(serde::Serialize)]
struct OpenAIUsage {
prompt_tokens: u32,
completion_tokens: u32,
total_tokens: u32,
}
#[derive(serde::Serialize)]
struct OpenAIResponse {
id: String,
object: String,
created: u64,
model: String,
choices: Vec<OpenAIChoice>,
usage: OpenAIUsage,
}
// Parse the Llama response
let llama: LlamaResponse = serde_json::from_str(llama_response)?;
// Prepare content and token counts without moving llama.text
let content = llama.text.clone();
let token_count = content.split_whitespace().count() as u32;
// Create OpenAI-compatible response
let openai_response = OpenAIResponse {
id: format!(
"chatcmpl-{}",
uuid::Uuid::new_v4().to_string().replace("-", "")
),
object: "chat.completion".to_string(),
created: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
model: "llama".to_string(), // You might want to make this configurable
choices: vec![OpenAIChoice {
index: 0,
message: OpenAIMessage {
role: "assistant".to_string(),
content,
},
finish_reason: "stop".to_string(),
}],
usage: OpenAIUsage {
prompt_tokens: 0, // You might need to extract these from the Llama response
completion_tokens: token_count,
total_tokens: token_count,
},
};
serde_json::to_string(&openai_response).map_err(|e| e.into())
}