This commit is contained in:
parent
a7bf8353f0
commit
51a2141a00
3 changed files with 62 additions and 62 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
lxc stop pragmatismo-alm-ci
|
lxc config device override $CONTAINER_NAME root
|
||||||
lxc export pragmatismo-alm-ci backup.tar.gz
|
lxc config device set $CONTAINER_NAME root size 6GB
|
||||||
lxc delete pragmatismo-alm-ci
|
|
||||||
lxc import backup.tar.gz pragmatismo-alm-ci
|
zpool set autoexpand=on default
|
||||||
zfs set refquota=15G default/containers/pragmatismo-alm-ci
|
zpool online -e default /var/snap/lxd/common/lxd/disks/default.img
|
||||||
lxc start pragmatismo-alm-ci
|
zpool list
|
||||||
|
zfs list
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,6 @@ fn clean_request_body(body: &str) -> String {
|
||||||
let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
|
let re = Regex::new(r#","?\s*"(max_completion_tokens|parallel_tool_calls|top_p|frequency_penalty|presence_penalty)"\s*:\s*[^,}]*"#).unwrap();
|
||||||
re.replace_all(body, "").to_string()
|
re.replace_all(body, "").to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[post("/v1/chat/completions")]
|
#[post("/v1/chat/completions")]
|
||||||
pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
|
pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Result<HttpResponse> {
|
||||||
// Log raw POST data
|
// Log raw POST data
|
||||||
|
|
@ -58,9 +57,19 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
||||||
let endpoint = env::var("AI_ENDPOINT")
|
let endpoint = env::var("AI_ENDPOINT")
|
||||||
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
|
.map_err(|_| actix_web::error::ErrorInternalServerError("AI_ENDPOINT not set."))?;
|
||||||
|
|
||||||
// Clean the request body (remove unsupported parameters)
|
// Parse and modify the request body
|
||||||
let cleaned_body_str = clean_request_body(body_str);
|
let mut json_value: serde_json::Value = serde_json::from_str(body_str)
|
||||||
info!("Cleaned POST Data: {}", cleaned_body_str);
|
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
|
||||||
|
|
||||||
|
// Add model parameter
|
||||||
|
if let Some(obj) = json_value.as_object_mut() {
|
||||||
|
obj.insert("model".to_string(), serde_json::Value::String(model));
|
||||||
|
}
|
||||||
|
|
||||||
|
let modified_body_str = serde_json::to_string(&json_value)
|
||||||
|
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
|
||||||
|
|
||||||
|
info!("Modified POST Data: {}", modified_body_str);
|
||||||
|
|
||||||
// Set up headers
|
// Set up headers
|
||||||
let mut headers = reqwest::header::HeaderMap::new();
|
let mut headers = reqwest::header::HeaderMap::new();
|
||||||
|
|
@ -74,21 +83,7 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
||||||
reqwest::header::HeaderValue::from_static("application/json"),
|
reqwest::header::HeaderValue::from_static("application/json"),
|
||||||
);
|
);
|
||||||
|
|
||||||
// After cleaning the request body, add the unused parameter
|
// Send request to the AI provider
|
||||||
let mut json_value: serde_json::Value = serde_json::from_str(&cleaned_body_str)
|
|
||||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to parse JSON"))?;
|
|
||||||
|
|
||||||
// Add the unused parameter
|
|
||||||
json_value
|
|
||||||
.as_object_mut()
|
|
||||||
.unwrap()
|
|
||||||
.insert("model".to_string(), serde_json::Value::String(model));
|
|
||||||
|
|
||||||
// Serialize the modified JSON
|
|
||||||
let modified_body_str = serde_json::to_string(&json_value)
|
|
||||||
.map_err(|_| actix_web::error::ErrorInternalServerError("Failed to serialize JSON"))?;
|
|
||||||
|
|
||||||
// Send request to the OpenAI-compatible provider
|
|
||||||
let client = Client::new();
|
let client = Client::new();
|
||||||
let response = client
|
let response = client
|
||||||
.post(&endpoint)
|
.post(&endpoint)
|
||||||
|
|
@ -108,35 +103,50 @@ pub async fn generic_chat_completions(body: web::Bytes, _req: HttpRequest) -> Re
|
||||||
info!("Provider response status: {}", status);
|
info!("Provider response status: {}", status);
|
||||||
info!("Provider response body: {}", raw_response);
|
info!("Provider response body: {}", raw_response);
|
||||||
|
|
||||||
// Convert Llama response to OpenAI format if successful
|
// Convert response to OpenAI format if successful
|
||||||
if status.is_success() {
|
if status.is_success() {
|
||||||
match convert_llama_to_openai_format(&raw_response) {
|
match convert_to_openai_format(&raw_response) {
|
||||||
Ok(openai_response) => Ok(HttpResponse::Ok()
|
Ok(openai_response) => Ok(HttpResponse::Ok()
|
||||||
.content_type("application/json")
|
.content_type("application/json")
|
||||||
.body(openai_response)),
|
.body(openai_response)),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to convert response format: {}", e);
|
error!("Failed to convert response format: {}", e);
|
||||||
Err(actix_web::error::ErrorInternalServerError(
|
// Return the original response if conversion fails
|
||||||
"Response format conversion failed",
|
Ok(HttpResponse::Ok()
|
||||||
))
|
.content_type("application/json")
|
||||||
|
.body(raw_response))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Return error as-is
|
||||||
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
let actix_status = actix_web::http::StatusCode::from_u16(status.as_u16())
|
||||||
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
.unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
|
|
||||||
Ok(HttpResponse::build(actix_status).body(raw_response))
|
Ok(HttpResponse::build(actix_status)
|
||||||
|
.content_type("application/json")
|
||||||
|
.body(raw_response))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts Llama response format to OpenAI-compatible format
|
/// Converts provider response to OpenAI-compatible format
|
||||||
fn convert_llama_to_openai_format(
|
fn convert_to_openai_format(provider_response: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
llama_response: &str,
|
|
||||||
) -> Result<String, Box<dyn std::error::Error>> {
|
|
||||||
#[derive(serde::Deserialize)]
|
#[derive(serde::Deserialize)]
|
||||||
struct LlamaResponse {
|
struct ProviderResponse {
|
||||||
text: String,
|
text: String,
|
||||||
sources: Option<Vec<serde_json::Value>>,
|
#[serde(default)]
|
||||||
|
generated_tokens: Option<u32>,
|
||||||
|
#[serde(default)]
|
||||||
|
input_tokens: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct OpenAIResponse {
|
||||||
|
id: String,
|
||||||
|
object: String,
|
||||||
|
created: u64,
|
||||||
|
model: String,
|
||||||
|
choices: Vec<OpenAIChoice>,
|
||||||
|
usage: OpenAIUsage,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
|
|
@ -159,47 +169,36 @@ fn convert_llama_to_openai_format(
|
||||||
total_tokens: u32,
|
total_tokens: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
// Parse the provider response
|
||||||
struct OpenAIResponse {
|
let provider: ProviderResponse = serde_json::from_str(provider_response)?;
|
||||||
id: String,
|
|
||||||
object: String,
|
|
||||||
created: u64,
|
|
||||||
model: String,
|
|
||||||
choices: Vec<OpenAIChoice>,
|
|
||||||
usage: OpenAIUsage,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the Llama response
|
let completion_tokens = provider
|
||||||
let llama: LlamaResponse = serde_json::from_str(llama_response)?;
|
.generated_tokens
|
||||||
|
.unwrap_or_else(|| provider.text.split_whitespace().count() as u32);
|
||||||
|
|
||||||
// Prepare content and token counts without moving llama.text
|
let prompt_tokens = provider.input_tokens.unwrap_or(0);
|
||||||
let content = llama.text.clone();
|
let total_tokens = prompt_tokens + completion_tokens;
|
||||||
let token_count = content.split_whitespace().count() as u32;
|
|
||||||
|
|
||||||
// Create OpenAI-compatible response
|
|
||||||
let openai_response = OpenAIResponse {
|
let openai_response = OpenAIResponse {
|
||||||
id: format!(
|
id: format!("chatcmpl-{}", uuid::Uuid::new_v4().simple()),
|
||||||
"chatcmpl-{}",
|
|
||||||
uuid::Uuid::new_v4().to_string().replace("-", "")
|
|
||||||
),
|
|
||||||
object: "chat.completion".to_string(),
|
object: "chat.completion".to_string(),
|
||||||
created: std::time::SystemTime::now()
|
created: std::time::SystemTime::now()
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.as_secs(),
|
.as_secs(),
|
||||||
model: "llama".to_string(), // You might want to make this configurable
|
model: "llama".to_string(),
|
||||||
choices: vec![OpenAIChoice {
|
choices: vec![OpenAIChoice {
|
||||||
index: 0,
|
index: 0,
|
||||||
message: OpenAIMessage {
|
message: OpenAIMessage {
|
||||||
role: "assistant".to_string(),
|
role: "assistant".to_string(),
|
||||||
content,
|
content: provider.text,
|
||||||
},
|
},
|
||||||
finish_reason: "stop".to_string(),
|
finish_reason: "stop".to_string(),
|
||||||
}],
|
}],
|
||||||
usage: OpenAIUsage {
|
usage: OpenAIUsage {
|
||||||
prompt_tokens: 0, // You might need to extract these from the Llama response
|
prompt_tokens,
|
||||||
completion_tokens: token_count,
|
completion_tokens,
|
||||||
total_tokens: token_count,
|
total_tokens,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -432,7 +432,7 @@ struct LlamaCppEmbeddingRequest {
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct LlamaCppEmbeddingResponseItem {
|
struct LlamaCppEmbeddingResponseItem {
|
||||||
pub index: usize,
|
pub index: usize,
|
||||||
pub embedding: Vec<Vec<f32>>, // This is the fucked up part - embedding is an array of arrays
|
pub embedding: Vec<Vec<f32>>, // This is the up part - embedding is an array of arrays
|
||||||
}
|
}
|
||||||
|
|
||||||
// Proxy endpoint for embeddings
|
// Proxy endpoint for embeddings
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue