This commit is contained in:
parent
a7bf8353f0
commit
51a2141a00
3 changed files with 62 additions and 62 deletions
|
|
@ -1,6 +1,7 @@
|
|||
lxc stop pragmatismo-alm-ci
|
||||
lxc export pragmatismo-alm-ci backup.tar.gz
|
||||
lxc delete pragmatismo-alm-ci
|
||||
lxc import backup.tar.gz pragmatismo-alm-ci
|
||||
zfs set refquota=15G default/containers/pragmatismo-alm-ci
|
||||
lxc start pragmatismo-alm-ci
|
||||
lxc config device override $CONTAINER_NAME root
|
||||
lxc config device set $CONTAINER_NAME root size 6GB
|
||||
|
||||
zpool set autoexpand=on default
|
||||
zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
|
||||
zpool list
|
||||
zfs list
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
|
|||
let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
|
||||
re.replace_all(body, "").to_string()
|
||||
}
|
||||
|
||||
#[post("/v1/chat/completions")]
|
||||
pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
|
||||
// Log raw POST data
|
||||
|
|
@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
|||
let endpoint = env::var("AI_ENDPOINT")
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
|
||||
|
||||
// Clean the request body (remove unsupported parameters)
|
||||
let cleaned_body_str = clean_request_body(body_str);
|
||||
info!("Cleaned POST Data: {}", cleaned_body_str);
|
||||
// Parse and modify the request body
|
||||
let mut json_value: serde_json::Value = serde_json::from_str(body_str)
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
|
||||
|
||||
// Add model parameter
|
||||
if let Some(obj) = json_value.as_object_mut() {
|
||||
obj.insert("model".to_string(), serde_json::Value::String(model));
|
||||
}
|
||||
|
||||
let modified_body_str = serde_json::to_string(&json_value)
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
|
||||
|
||||
info!("Modified POST Data: {}", modified_body_str);
|
||||
|
||||
// Set up headers
|
||||
let mut headers = reqwest::header::HeaderMap::new();
|
||||
|
|
@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
|||
reqwest::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
// After cleaning the request body, add the unused parameter
|
||||
let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
|
||||
|
||||
// Add the unused parameter
|
||||
json_value
|
||||
.as_object_mut()
|
||||
.unwrap()
|
||||
.insert("model".to_string(), serde_json::Value::String(model));
|
||||
|
||||
// Serialize the modified JSON
|
||||
let modified_body_str = serde_json::to_string(&json_value)
|
||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
|
||||
|
||||
// Send request to the OpenAI-compatible provider
|
||||
// Send request to the AI provider
|
||||
let client = Client::new();
|
||||
let response = client
|
||||
.post(&endpoint)
|
||||
|
|
@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
|||
info!("Provider response status: {}", status);
|
||||
info!("Provider response body: {}", raw_response);
|
||||
|
||||
// Convert Llama response to OpenAI format if successful
|
||||
// Convert response to OpenAI format if successful
|
||||
if status.is_success() {
|
||||
match convert_llama_to_openai_format(&raw_response) {
|
||||
match convert_to_openai_format(&raw_response) {
|
||||
Ok(openai_response) => Ok(HttpResponse::Ok()
|
||||
.content_type("application/json")
|
||||
.body(openai_response)),
|
||||
Err(e) => {
|
||||
error!("Failed to convert response format: {}", e);
|
||||
Err(actix_web::error::ErrorInternalServerError(
|
||||
"Response format conversion failed",
|
||||
))
|
||||
// Return the original response if conversion fails
|
||||
Ok(HttpResponse::Ok()
|
||||
.content_type("application/json")
|
||||
.body(raw_response))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Return error as-is
|
||||
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||
|
||||
Ok(HttpResponse::build(actix_status).body(raw_response))
|
||||
Ok(HttpResponse::build(actix_status)
|
||||
.content_type("application/json")
|
||||
.body(raw_response))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts Llama response format to OpenAI-compatible format
|
||||
fn convert_llama_to_openai_format(
|
||||
llama_response: &str,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
/// Converts provider response to OpenAI-compatible format
|
||||
fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
#[derive(serde::Deserialize)]
|
||||
struct LlamaResponse {
|
||||
struct ProviderResponse {
|
||||
text: String,
|
||||
sources: Option<Vec<serde_json::Value>>,
|
||||
#[serde(default)]
|
||||
generated_tokens: Option<u32>,
|
||||
#[serde(default)]
|
||||
input_tokens: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct OpenAIResponse {
|
||||
id: String,
|
||||
object: String,
|
||||
created: u64,
|
||||
model: String,
|
||||
choices: Vec<OpenAIChoice>,
|
||||
usage: OpenAIUsage,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
|
|
@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
|
|||
total_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct OpenAIResponse {
|
||||
id: String,
|
||||
object: String,
|
||||
created: u64,
|
||||
model: String,
|
||||
choices: Vec<OpenAIChoice>,
|
||||
usage: OpenAIUsage,
|
||||
}
|
||||
// Parse the provider response
|
||||
let provider: ProviderResponse = serde_json::from_str(provider_response)?;
|
||||
|
||||
// Parse the Llama response
|
||||
let llama: LlamaResponse = serde_json::from_str(llama_response)?;
|
||||
let completion_tokens = provider
|
||||
.generated_tokens
|
||||
.unwrap_or_else(|| provider.text.split_whitespace().count() as u32);
|
||||
|
||||
// Prepare content and token counts without moving llama.text
|
||||
let content = llama.text.clone();
|
||||
let token_count = content.split_whitespace().count() as u32;
|
||||
let prompt_tokens = provider.input_tokens.unwrap_or(0);
|
||||
let total_tokens = prompt_tokens + completion_tokens;
|
||||
|
||||
// Create OpenAI-compatible response
|
||||
let openai_response = OpenAIResponse {
|
||||
id: format!(
|
||||
"chatcmpl-{}",
|
||||
uuid::Uuid::new_v4().to_string().replace("-", "")
|
||||
),
|
||||
id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
|
||||
object: "chat.completion".to_string(),
|
||||
created: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs(),
|
||||
model: "llama".to_string(), // You might want to make this configurable
|
||||
model: "llama".to_string(),
|
||||
choices: vec![OpenAIChoice {
|
||||
index: 0,
|
||||
message: OpenAIMessage {
|
||||
role: "assistant".to_string(),
|
||||
content,
|
||||
content: provider.text,
|
||||
},
|
||||
finish_reason: "stop".to_string(),
|
||||
}],
|
||||
usage: OpenAIUsage {
|
||||
prompt_tokens: 0, // You might need to extract these from the Llama response
|
||||
completion_tokens: token_count,
|
||||
total_tokens: token_count,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
|
|||
#[derive(Debug, Deserialize)]
|
||||
struct LlamaCppEmbeddingResponseItem {
|
||||
pub index: usize,
|
||||
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
|
||||
pub embedding: Vec<Vec<f32>>, // This is the up part - embedding is an array of arrays
|
||||
}
|
||||
|
||||
// Proxy endpoint for embeddings
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue