diff --git a/src/scripts/utils/set-size-5GB.sh b/src/scripts/utils/set-size-5GB.sh index 0d9a5d6..6de4216 100644 --- a/src/scripts/utils/set-size-5GB.sh +++ b/src/scripts/utils/set-size-5GB.sh @@ -1,6 +1,7 @@ -lxc stop pragmatismo-alm-ci -lxc export pragmatismo-alm-ci backup.tar.gz -lxc delete pragmatismo-alm-ci -lxc import backup.tar.gz pragmatismo-alm-ci -zfs set refquota=15G default/containers/pragmatismo-alm-ci -lxc start pragmatismo-alm-ci +lxc config device override $CONTAINER_NAME root +lxc config device set $CONTAINER_NAME root size 6GB + +zpool set autoexpand=on default +zpool online -e default /var/snap/lxd/common/lxd/disks/default.img +zpool list +zfs list diff --git a/src/services/llm_generic.rs b/src/services/llm_generic.rs index b63df7c..6f802bf 100644 --- a/src/services/llm_generic.rs +++ b/src/services/llm_generic.rs @@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String { let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap(); re.replace_all(body, "").to_string() } - #[post("/v1/chat/completions")] pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result { // Log raw POST data @@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re let endpoint = env::var("AI_ENDPOINT") .map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?; - // Clean the request body (remove unsupported parameters) - let cleaned_body_str = clean_request_body(body_str); - info!("Cleaned POST Data: {}", cleaned_body_str); + // Parse and modify the request body + let mut json_value: serde_json::Value = serde_json::from_str(body_str) + .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?; + + // Add model parameter + if let Some(obj) = json_value.as_object_mut() { + obj.insert("model".to_string(), serde_json::Value::String(model)); + } + + let modified_body_str = serde_json::to_string(&json_value) + .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?; + + info!("Modified POST Data: {}", modified_body_str); // Set up headers let mut headers = reqwest::header::HeaderMap::new(); @@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re reqwest::header::HeaderValue::from_static("application/json"), ); - // After cleaning the request body, add the unused parameter - let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str) - .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?; - - // Add the unused parameter - json_value - .as_object_mut() - .unwrap() - .insert("model".to_string(), serde_json::Value::String(model)); - - // Serialize the modified JSON - let modified_body_str = serde_json::to_string(&json_value) - .map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?; - - // Send request to the OpenAI-compatible provider + // Send request to the AI provider let client = Client::new(); let response = client .post(&endpoint) @@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re info!("Provider response status: {}", status); info!("Provider response body: {}", raw_response); - // Convert Llama response to OpenAI format if successful + // Convert response to OpenAI format if successful if status.is_success() { - match convert_llama_to_openai_format(&raw_response) { + match convert_to_openai_format(&raw_response) { Ok(openai_response) => Ok(HttpResponse::Ok() .content_type("application/json") .body(openai_response)), Err(e) => { error!("Failed to convert response format: {}", e); - Err(actix_web::error::ErrorInternalServerError( - "Response format conversion failed", - )) + // Return the original response if conversion fails + Ok(HttpResponse::Ok() + .content_type("application/json") + .body(raw_response)) } } } else { + // Return error as-is let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16()) .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); - Ok(HttpResponse::build(actix_status).body(raw_response)) + Ok(HttpResponse::build(actix_status) + .content_type("application/json") + .body(raw_response)) } } -/// Converts Llama response format to OpenAI-compatible format -fn convert_llama_to_openai_format( - llama_response: &str, -) -> Result> { +/// Converts provider response to OpenAI-compatible format +fn convert_to_openai_format(provider_response: &str) -> Result> { #[derive(serde::Deserialize)] - struct LlamaResponse { + struct ProviderResponse { text: String, - sources: Option>, + #[serde(default)] + generated_tokens: Option, + #[serde(default)] + input_tokens: Option, + } + + #[derive(serde::Serialize)] + struct OpenAIResponse { + id: String, + object: String, + created: u64, + model: String, + choices: Vec, + usage: OpenAIUsage, } #[derive(serde::Serialize)] @@ -159,47 +169,36 @@ fn convert_llama_to_openai_format( total_tokens: u32, } - #[derive(serde::Serialize)] - struct OpenAIResponse { - id: String, - object: String, - created: u64, - model: String, - choices: Vec, - usage: OpenAIUsage, - } + // Parse the provider response + let provider: ProviderResponse = serde_json::from_str(provider_response)?; - // Parse the Llama response - let llama: LlamaResponse = serde_json::from_str(llama_response)?; + let completion_tokens = provider + .generated_tokens + .unwrap_or_else(|| provider.text.split_whitespace().count() as u32); - // Prepare content and token counts without moving llama.text - let content = llama.text.clone(); - let token_count = content.split_whitespace().count() as u32; + let prompt_tokens = provider.input_tokens.unwrap_or(0); + let total_tokens = prompt_tokens + completion_tokens; - // Create OpenAI-compatible response let openai_response = OpenAIResponse { - id: format!( - "chatcmpl-{}", - uuid::Uuid::new_v4().to_string().replace("-", "") - ), + id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()), object: "chat.completion".to_string(), created: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() .as_secs(), - model: "llama".to_string(), // You might want to make this configurable + model: "llama".to_string(), choices: vec![OpenAIChoice { index: 0, message: OpenAIMessage { role: "assistant".to_string(), - content, + content: provider.text, }, finish_reason: "stop".to_string(), }], usage: OpenAIUsage { - prompt_tokens: 0, // You might need to extract these from the Llama response - completion_tokens: token_count, - total_tokens: token_count, + prompt_tokens, + completion_tokens, + total_tokens, }, }; diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index 00ff628..c1e21cb 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest { #[derive(Debug, Deserialize)] struct LlamaCppEmbeddingResponseItem { pub index: usize, - pub embedding: Vec>, // This is the fucked up part - embedding is an array of arrays + pub embedding: Vec>, // This is the up part - embedding is an array of arrays } // Proxy endpoint for embeddings