feat(office): Add ooxmlsdk integration for Word/PowerPoint preservation
This commit is contained in:
parent
8a9a913ffb
commit
1850564e62
7 changed files with 919 additions and 340 deletions
|
|
@ -211,8 +211,9 @@ rust_xlsxwriter = "0.79"
|
|||
spreadsheet-ods = "1.0"
|
||||
|
||||
# Word/PowerPoint Support - MS Office 100% Compatibility
|
||||
# ooxmlsdk preserves: Full document structure at XML level (100% round-trip)
|
||||
docx-rs = "0.4"
|
||||
ooxmlsdk = { version = "0.3", features = ["docx", "pptx"] }
|
||||
ooxmlsdk = { version = "0.3", features = ["docx", "pptx", "parts", "office2021"] }
|
||||
# ppt-rs disabled due to version conflict - using ooxmlsdk for PPTX support instead
|
||||
# ppt-rs = { version = "0.2", default-features = false }
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod collaboration;
|
||||
pub mod handlers;
|
||||
pub mod ooxml;
|
||||
pub mod storage;
|
||||
pub mod types;
|
||||
pub mod utils;
|
||||
|
|
|
|||
250
src/docs/ooxml.rs
Normal file
250
src/docs/ooxml.rs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
use std::io::Cursor;
|
||||
|
||||
pub struct OoxmlDocument {
|
||||
pub original_bytes: Vec<u8>,
|
||||
pub paragraphs: Vec<ParagraphInfo>,
|
||||
}
|
||||
|
||||
pub struct ParagraphInfo {
|
||||
pub text: String,
|
||||
pub index: usize,
|
||||
}
|
||||
|
||||
pub fn load_docx_preserving(bytes: &[u8]) -> Result<OoxmlDocument, String> {
|
||||
use ooxmlsdk::parts::wordprocessing_document::WordprocessingDocument;
|
||||
|
||||
let reader = Cursor::new(bytes);
|
||||
let docx = WordprocessingDocument::new(reader)
|
||||
.map_err(|e| format!("Failed to parse DOCX: {e}"))?;
|
||||
|
||||
let xml_str = docx
|
||||
.main_document_part
|
||||
.root_element
|
||||
.to_xml()
|
||||
.unwrap_or_default();
|
||||
|
||||
let paragraphs = extract_paragraphs(&xml_str);
|
||||
|
||||
Ok(OoxmlDocument {
|
||||
original_bytes: bytes.to_vec(),
|
||||
paragraphs,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_paragraphs(xml: &str) -> Vec<ParagraphInfo> {
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut para_index = 0;
|
||||
|
||||
let mut pos = 0;
|
||||
while let Some(p_start) = xml[pos..].find("<w:p") {
|
||||
let abs_start = pos + p_start;
|
||||
|
||||
if let Some(p_end_rel) = xml[abs_start..].find("</w:p>") {
|
||||
let abs_end = abs_start + p_end_rel + 6;
|
||||
let para_content = &xml[abs_start..abs_end];
|
||||
|
||||
let text = extract_text_from_paragraph(para_content);
|
||||
if !text.trim().is_empty() {
|
||||
paragraphs.push(ParagraphInfo {
|
||||
text,
|
||||
index: para_index,
|
||||
});
|
||||
}
|
||||
para_index += 1;
|
||||
pos = abs_end;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
paragraphs
|
||||
}
|
||||
|
||||
fn extract_text_from_paragraph(para_xml: &str) -> String {
|
||||
let mut text = String::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while let Some(t_start) = para_xml[pos..].find("<w:t") {
|
||||
let abs_start = pos + t_start;
|
||||
|
||||
if let Some(content_start_rel) = para_xml[abs_start..].find('>') {
|
||||
let abs_content_start = abs_start + content_start_rel + 1;
|
||||
|
||||
if let Some(t_end_rel) = para_xml[abs_content_start..].find("</w:t>") {
|
||||
let content = ¶_xml[abs_content_start..abs_content_start + t_end_rel];
|
||||
text.push_str(content);
|
||||
pos = abs_content_start + t_end_rel + 6;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unescape_xml(&text)
|
||||
}
|
||||
|
||||
fn unescape_xml(text: &str) -> String {
|
||||
text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
||||
fn escape_xml(text: &str) -> String {
|
||||
text.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
}
|
||||
|
||||
pub fn save_docx_preserving(original_bytes: &[u8]) -> Result<Vec<u8>, String> {
|
||||
use ooxmlsdk::parts::wordprocessing_document::WordprocessingDocument;
|
||||
|
||||
let reader = Cursor::new(original_bytes);
|
||||
let docx = WordprocessingDocument::new(reader)
|
||||
.map_err(|e| format!("Failed to parse DOCX: {e}"))?;
|
||||
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
docx.save(&mut output)
|
||||
.map_err(|e| format!("Failed to save DOCX: {e}"))?;
|
||||
|
||||
Ok(output.into_inner())
|
||||
}
|
||||
|
||||
pub fn update_docx_text(
|
||||
original_bytes: &[u8],
|
||||
new_paragraphs: &[String],
|
||||
) -> Result<Vec<u8>, String> {
|
||||
use std::io::{Read, Write};
|
||||
use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter};
|
||||
|
||||
let reader = Cursor::new(original_bytes);
|
||||
let mut archive =
|
||||
ZipArchive::new(reader).map_err(|e| format!("Failed to open DOCX archive: {e}"))?;
|
||||
|
||||
let mut output_buf = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip_writer = ZipWriter::new(&mut output_buf);
|
||||
let options =
|
||||
SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
|
||||
|
||||
for i in 0..archive.len() {
|
||||
let mut file = archive
|
||||
.by_index(i)
|
||||
.map_err(|e| format!("Failed to read archive entry: {e}"))?;
|
||||
|
||||
let name = file.name().to_string();
|
||||
|
||||
if name == "word/document.xml" {
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)
|
||||
.map_err(|e| format!("Failed to read document.xml: {e}"))?;
|
||||
|
||||
let modified_content = replace_paragraph_texts(&content, new_paragraphs);
|
||||
|
||||
zip_writer
|
||||
.start_file(&name, options)
|
||||
.map_err(|e| format!("Failed to start file in zip: {e}"))?;
|
||||
zip_writer
|
||||
.write_all(modified_content.as_bytes())
|
||||
.map_err(|e| format!("Failed to write document.xml: {e}"))?;
|
||||
} else {
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)
|
||||
.map_err(|e| format!("Failed to read file: {e}"))?;
|
||||
|
||||
zip_writer
|
||||
.start_file(&name, options)
|
||||
.map_err(|e| format!("Failed to start file in zip: {e}"))?;
|
||||
zip_writer
|
||||
.write_all(&buf)
|
||||
.map_err(|e| format!("Failed to write file: {e}"))?;
|
||||
}
|
||||
}
|
||||
|
||||
zip_writer
|
||||
.finish()
|
||||
.map_err(|e| format!("Failed to finish zip: {e}"))?;
|
||||
}
|
||||
|
||||
Ok(output_buf.into_inner())
|
||||
}
|
||||
|
||||
fn replace_paragraph_texts(xml: &str, new_paragraphs: &[String]) -> String {
|
||||
let mut result = xml.to_string();
|
||||
let mut para_idx = 0;
|
||||
let mut search_pos = 0;
|
||||
|
||||
while let Some(p_start) = result[search_pos..]
|
||||
.find("<w:p ")
|
||||
.or_else(|| result[search_pos..].find("<w:p>"))
|
||||
{
|
||||
let abs_start = search_pos + p_start;
|
||||
|
||||
if let Some(p_end_rel) = result[abs_start..].find("</w:p>") {
|
||||
let abs_end = abs_start + p_end_rel + 6;
|
||||
let para_content = result[abs_start..abs_end].to_string();
|
||||
|
||||
if para_content.contains("<w:t") {
|
||||
if para_idx < new_paragraphs.len() {
|
||||
let new_para = replace_first_text_run(¶_content, &new_paragraphs[para_idx]);
|
||||
let new_len = new_para.len();
|
||||
result = format!("{}{}{}", &result[..abs_start], new_para, &result[abs_end..]);
|
||||
search_pos = abs_start + new_len;
|
||||
} else {
|
||||
search_pos = abs_end;
|
||||
}
|
||||
para_idx += 1;
|
||||
} else {
|
||||
search_pos = abs_end;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn replace_first_text_run(para_xml: &str, new_text: &str) -> String {
|
||||
let mut result = para_xml.to_string();
|
||||
let mut found_first = false;
|
||||
|
||||
let mut search_pos = 0;
|
||||
while let Some(t_start) = result[search_pos..].find("<w:t") {
|
||||
let abs_start = search_pos + t_start;
|
||||
|
||||
if let Some(tag_end_rel) = result[abs_start..].find('>') {
|
||||
let abs_content_start = abs_start + tag_end_rel + 1;
|
||||
|
||||
if let Some(t_end_rel) = result[abs_content_start..].find("</w:t>") {
|
||||
let abs_content_end = abs_content_start + t_end_rel;
|
||||
|
||||
if !found_first {
|
||||
let escaped = escape_xml(new_text);
|
||||
result = format!(
|
||||
"{}{}{}",
|
||||
&result[..abs_content_start],
|
||||
escaped,
|
||||
&result[abs_content_end..]
|
||||
);
|
||||
found_first = true;
|
||||
search_pos = abs_content_start + escaped.len() + 6;
|
||||
} else {
|
||||
result = format!("{}{}", &result[..abs_content_start], &result[abs_content_end..]);
|
||||
search_pos = abs_content_start;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
|
@ -1,16 +1,24 @@
|
|||
use crate::docs::ooxml::{load_docx_preserving, update_docx_text};
|
||||
use crate::docs::types::{Document, DocumentMetadata};
|
||||
use crate::shared::state::AppState;
|
||||
use aws_sdk_s3::primitives::ByteStream;
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::collections::HashMap;
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
static DOCUMENT_CACHE: once_cell::sync::Lazy<RwLock<HashMap<String, (Vec<u8>, DateTime<Utc>)>>> =
|
||||
once_cell::sync::Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
const CACHE_TTL_SECS: i64 = 3600;
|
||||
|
||||
pub fn get_user_docs_path(user_identifier: &str) -> String {
|
||||
let safe_id = user_identifier
|
||||
.replace(['/', '\\', ':', '*', '?', '"', '<', '>', '|'], "_")
|
||||
.to_lowercase();
|
||||
format!("users/{}/docs", safe_id)
|
||||
format!("users/{safe_id}/docs")
|
||||
}
|
||||
|
||||
pub fn get_current_user_id() -> String {
|
||||
|
|
@ -21,287 +29,22 @@ pub fn generate_doc_id() -> String {
|
|||
Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
pub async fn save_document_to_drive(
|
||||
state: &Arc<AppState>,
|
||||
user_identifier: &str,
|
||||
doc_id: &str,
|
||||
title: &str,
|
||||
content: &str,
|
||||
) -> Result<String, String> {
|
||||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
pub async fn cache_document_bytes(doc_id: &str, bytes: Vec<u8>) {
|
||||
let mut cache = DOCUMENT_CACHE.write().await;
|
||||
cache.insert(doc_id.to_string(), (bytes, Utc::now()));
|
||||
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let doc_path = format!("{}/{}.html", base_path, doc_id);
|
||||
let meta_path = format!("{}/{}.meta.json", base_path, doc_id);
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&doc_path)
|
||||
.body(ByteStream::from(content.as_bytes().to_vec()))
|
||||
.content_type("text/html")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save document: {e}"))?;
|
||||
|
||||
let word_count = content
|
||||
.split_whitespace()
|
||||
.filter(|w| !w.starts_with('<') && !w.ends_with('>'))
|
||||
.count();
|
||||
|
||||
let metadata = serde_json::json!({
|
||||
"id": doc_id,
|
||||
"title": title,
|
||||
"created_at": Utc::now().to_rfc3339(),
|
||||
"updated_at": Utc::now().to_rfc3339(),
|
||||
"word_count": word_count,
|
||||
"version": 1
|
||||
});
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&meta_path)
|
||||
.body(ByteStream::from(metadata.to_string().into_bytes()))
|
||||
.content_type("application/json")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save metadata: {e}"))?;
|
||||
|
||||
Ok(doc_path)
|
||||
let now = Utc::now();
|
||||
cache.retain(|_, (_, modified)| (now - *modified).num_seconds() < CACHE_TTL_SECS);
|
||||
}
|
||||
|
||||
pub async fn save_document_as_docx(
|
||||
state: &Arc<AppState>,
|
||||
user_identifier: &str,
|
||||
doc_id: &str,
|
||||
title: &str,
|
||||
content: &str,
|
||||
) -> Result<Vec<u8>, String> {
|
||||
let docx_bytes = convert_html_to_docx(title, content)?;
|
||||
|
||||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let docx_path = format!("{}/{}.docx", base_path, doc_id);
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&docx_path)
|
||||
.body(ByteStream::from(docx_bytes.clone()))
|
||||
.content_type("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save DOCX: {e}"))?;
|
||||
|
||||
Ok(docx_bytes)
|
||||
pub async fn get_cached_document_bytes(doc_id: &str) -> Option<Vec<u8>> {
|
||||
let cache = DOCUMENT_CACHE.read().await;
|
||||
cache.get(doc_id).map(|(bytes, _)| bytes.clone())
|
||||
}
|
||||
|
||||
pub fn convert_html_to_docx(title: &str, html_content: &str) -> Result<Vec<u8>, String> {
|
||||
use docx_rs::*;
|
||||
|
||||
let mut docx = Docx::new();
|
||||
|
||||
if !title.is_empty() {
|
||||
let title_para = Paragraph::new()
|
||||
.add_run(Run::new().add_text(title).bold().size(48));
|
||||
docx = docx.add_paragraph(title_para);
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
}
|
||||
|
||||
let paragraphs = parse_html_to_paragraphs(html_content);
|
||||
for para_data in paragraphs {
|
||||
let mut paragraph = Paragraph::new();
|
||||
|
||||
match para_data.style.as_str() {
|
||||
"h1" => {
|
||||
paragraph = paragraph.add_run(
|
||||
Run::new()
|
||||
.add_text(¶_data.text)
|
||||
.bold()
|
||||
.size(32)
|
||||
);
|
||||
}
|
||||
"h2" => {
|
||||
paragraph = paragraph.add_run(
|
||||
Run::new()
|
||||
.add_text(¶_data.text)
|
||||
.bold()
|
||||
.size(28)
|
||||
);
|
||||
}
|
||||
"h3" => {
|
||||
paragraph = paragraph.add_run(
|
||||
Run::new()
|
||||
.add_text(¶_data.text)
|
||||
.bold()
|
||||
.size(24)
|
||||
);
|
||||
}
|
||||
"li" => {
|
||||
paragraph = paragraph
|
||||
.add_run(Run::new().add_text("• "))
|
||||
.add_run(Run::new().add_text(¶_data.text));
|
||||
}
|
||||
"blockquote" => {
|
||||
paragraph = paragraph
|
||||
.indent(Some(720), None, None, None)
|
||||
.add_run(Run::new().add_text(¶_data.text).italic());
|
||||
}
|
||||
"code" => {
|
||||
paragraph = paragraph.add_run(
|
||||
Run::new()
|
||||
.add_text(¶_data.text)
|
||||
.fonts(RunFonts::new().ascii("Courier New"))
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
let mut run = Run::new().add_text(¶_data.text);
|
||||
if para_data.bold {
|
||||
run = run.bold();
|
||||
}
|
||||
if para_data.italic {
|
||||
run = run.italic();
|
||||
}
|
||||
if para_data.underline {
|
||||
run = run.underline("single");
|
||||
}
|
||||
paragraph = paragraph.add_run(run);
|
||||
}
|
||||
}
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
}
|
||||
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
docx.build()
|
||||
.pack(&mut buf)
|
||||
.map_err(|e| format!("Failed to build DOCX: {e}"))?;
|
||||
|
||||
Ok(buf.into_inner())
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct ParagraphData {
|
||||
text: String,
|
||||
style: String,
|
||||
bold: bool,
|
||||
italic: bool,
|
||||
underline: bool,
|
||||
}
|
||||
|
||||
fn parse_html_to_paragraphs(html: &str) -> Vec<ParagraphData> {
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut current = ParagraphData::default();
|
||||
let mut in_tag = false;
|
||||
let mut tag_name = String::new();
|
||||
let mut is_closing = false;
|
||||
let mut text_buffer = String::new();
|
||||
|
||||
let mut bold_stack: i32 = 0;
|
||||
let mut italic_stack: i32 = 0;
|
||||
let mut underline_stack: i32 = 0;
|
||||
|
||||
for ch in html.chars() {
|
||||
match ch {
|
||||
'<' => {
|
||||
in_tag = true;
|
||||
tag_name.clear();
|
||||
is_closing = false;
|
||||
}
|
||||
'>' => {
|
||||
in_tag = false;
|
||||
let tag = tag_name.to_lowercase();
|
||||
let tag_trimmed = tag.split_whitespace().next().unwrap_or("");
|
||||
|
||||
if is_closing {
|
||||
match tag_trimmed {
|
||||
"p" | "div" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "blockquote" | "pre" => {
|
||||
if !text_buffer.is_empty() || !current.text.is_empty() {
|
||||
current.text = format!("{}{}", current.text, decode_html_entities(&text_buffer));
|
||||
if !current.text.trim().is_empty() {
|
||||
paragraphs.push(current);
|
||||
}
|
||||
current = ParagraphData::default();
|
||||
text_buffer.clear();
|
||||
}
|
||||
}
|
||||
"b" | "strong" => bold_stack = bold_stack.saturating_sub(1),
|
||||
"i" | "em" => italic_stack = italic_stack.saturating_sub(1),
|
||||
"u" => underline_stack = underline_stack.saturating_sub(1),
|
||||
_ => {}
|
||||
}
|
||||
} else {
|
||||
match tag_trimmed {
|
||||
"br" => {
|
||||
text_buffer.push('\n');
|
||||
}
|
||||
"p" | "div" => {
|
||||
if !text_buffer.is_empty() {
|
||||
current.text = format!("{}{}", current.text, decode_html_entities(&text_buffer));
|
||||
text_buffer.clear();
|
||||
}
|
||||
current.style = "p".to_string();
|
||||
current.bold = bold_stack > 0;
|
||||
current.italic = italic_stack > 0;
|
||||
current.underline = underline_stack > 0;
|
||||
}
|
||||
"h1" => {
|
||||
current.style = "h1".to_string();
|
||||
}
|
||||
"h2" => {
|
||||
current.style = "h2".to_string();
|
||||
}
|
||||
"h3" => {
|
||||
current.style = "h3".to_string();
|
||||
}
|
||||
"li" => {
|
||||
current.style = "li".to_string();
|
||||
}
|
||||
"blockquote" => {
|
||||
current.style = "blockquote".to_string();
|
||||
}
|
||||
"pre" | "code" => {
|
||||
current.style = "code".to_string();
|
||||
}
|
||||
"b" | "strong" => bold_stack += 1,
|
||||
"i" | "em" => italic_stack += 1,
|
||||
"u" => underline_stack += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
tag_name.clear();
|
||||
}
|
||||
'/' if in_tag && tag_name.is_empty() => {
|
||||
is_closing = true;
|
||||
}
|
||||
_ if in_tag => {
|
||||
tag_name.push(ch);
|
||||
}
|
||||
_ => {
|
||||
text_buffer.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !text_buffer.is_empty() {
|
||||
current.text = format!("{}{}", current.text, decode_html_entities(&text_buffer));
|
||||
}
|
||||
if !current.text.trim().is_empty() {
|
||||
paragraphs.push(current);
|
||||
}
|
||||
|
||||
paragraphs
|
||||
}
|
||||
|
||||
fn decode_html_entities(text: &str) -> String {
|
||||
text.replace(" ", " ")
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
.replace("'", "'")
|
||||
pub async fn remove_from_cache(doc_id: &str) {
|
||||
let mut cache = DOCUMENT_CACHE.write().await;
|
||||
cache.remove(doc_id);
|
||||
}
|
||||
|
||||
pub async fn load_docx_from_drive(
|
||||
|
|
@ -324,12 +67,13 @@ pub async fn load_docx_from_drive(
|
|||
.collect()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to read DOCX: {e}"))?
|
||||
.into_bytes();
|
||||
.into_bytes()
|
||||
.to_vec();
|
||||
|
||||
load_docx_from_bytes(&bytes, user_identifier, file_path)
|
||||
load_docx_from_bytes(&bytes, user_identifier, file_path).await
|
||||
}
|
||||
|
||||
pub fn load_docx_from_bytes(
|
||||
pub async fn load_docx_from_bytes(
|
||||
bytes: &[u8],
|
||||
user_identifier: &str,
|
||||
file_path: &str,
|
||||
|
|
@ -341,11 +85,20 @@ pub fn load_docx_from_bytes(
|
|||
.trim_end_matches(".docx")
|
||||
.trim_end_matches(".doc");
|
||||
|
||||
let html_content = convert_docx_to_html(bytes)?;
|
||||
let word_count = count_words(&html_content);
|
||||
let doc_id = generate_doc_id();
|
||||
|
||||
cache_document_bytes(&doc_id, bytes.to_vec()).await;
|
||||
|
||||
let html_content = match load_docx_preserving(bytes) {
|
||||
Ok(ooxml_doc) => {
|
||||
let texts: Vec<String> = ooxml_doc.paragraphs.iter().map(|p| p.text.clone()).collect();
|
||||
paragraphs_to_html(&texts)
|
||||
}
|
||||
Err(_) => convert_docx_to_html(bytes)?,
|
||||
};
|
||||
|
||||
Ok(Document {
|
||||
id: generate_doc_id(),
|
||||
id: doc_id,
|
||||
title: file_name.to_string(),
|
||||
content: html_content,
|
||||
owner_id: user_identifier.to_string(),
|
||||
|
|
@ -358,8 +111,7 @@ pub fn load_docx_from_bytes(
|
|||
}
|
||||
|
||||
pub fn convert_docx_to_html(bytes: &[u8]) -> Result<String, String> {
|
||||
let docx = docx_rs::read_docx(bytes)
|
||||
.map_err(|e| format!("Failed to parse DOCX: {e}"))?;
|
||||
let docx = docx_rs::read_docx(bytes).map_err(|e| format!("Failed to parse DOCX: {e}"))?;
|
||||
|
||||
let mut html = String::new();
|
||||
|
||||
|
|
@ -389,13 +141,9 @@ pub fn convert_docx_to_html(bytes: &[u8]) -> Result<String, String> {
|
|||
for content in ¶.children {
|
||||
if let docx_rs::ParagraphChild::Run(run) = content {
|
||||
let mut run_text = String::new();
|
||||
let mut is_bold = false;
|
||||
let mut is_italic = false;
|
||||
let mut is_underline = false;
|
||||
|
||||
is_bold = run.run_property.bold.is_some();
|
||||
is_italic = run.run_property.italic.is_some();
|
||||
is_underline = run.run_property.underline.is_some();
|
||||
let is_bold = run.run_property.bold.is_some();
|
||||
let is_italic = run.run_property.italic.is_some();
|
||||
let is_underline = run.run_property.underline.is_some();
|
||||
|
||||
for child in &run.children {
|
||||
match child {
|
||||
|
|
@ -473,12 +221,157 @@ pub fn convert_docx_to_html(bytes: &[u8]) -> Result<String, String> {
|
|||
Ok(html)
|
||||
}
|
||||
|
||||
fn escape_html(text: &str) -> String {
|
||||
text.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
pub async fn save_document_as_docx(
|
||||
state: &Arc<AppState>,
|
||||
user_identifier: &str,
|
||||
doc_id: &str,
|
||||
title: &str,
|
||||
content: &str,
|
||||
) -> Result<Vec<u8>, String> {
|
||||
let docx_bytes = if let Some(original_bytes) = get_cached_document_bytes(doc_id).await {
|
||||
let paragraphs = html_to_paragraphs(content);
|
||||
update_docx_text(&original_bytes, ¶graphs).unwrap_or_else(|_| {
|
||||
convert_html_to_docx(title, content).unwrap_or_default()
|
||||
})
|
||||
} else {
|
||||
convert_html_to_docx(title, content)?
|
||||
};
|
||||
|
||||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let docx_path = format!("{base_path}/{doc_id}.docx");
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&docx_path)
|
||||
.body(ByteStream::from(docx_bytes.clone()))
|
||||
.content_type("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save DOCX: {e}"))?;
|
||||
|
||||
cache_document_bytes(doc_id, docx_bytes.clone()).await;
|
||||
|
||||
Ok(docx_bytes)
|
||||
}
|
||||
|
||||
pub fn convert_html_to_docx(title: &str, html_content: &str) -> Result<Vec<u8>, String> {
|
||||
use docx_rs::*;
|
||||
|
||||
let mut docx = Docx::new();
|
||||
|
||||
if !title.is_empty() {
|
||||
let title_para = Paragraph::new().add_run(Run::new().add_text(title).bold().size(48));
|
||||
docx = docx.add_paragraph(title_para);
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
}
|
||||
|
||||
let paragraphs = parse_html_to_paragraphs(html_content);
|
||||
for para_data in paragraphs {
|
||||
let mut paragraph = Paragraph::new();
|
||||
|
||||
match para_data.style.as_str() {
|
||||
"h1" => {
|
||||
paragraph =
|
||||
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(32));
|
||||
}
|
||||
"h2" => {
|
||||
paragraph =
|
||||
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(28));
|
||||
}
|
||||
"h3" => {
|
||||
paragraph =
|
||||
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(24));
|
||||
}
|
||||
"li" => {
|
||||
paragraph = paragraph
|
||||
.add_run(Run::new().add_text("• "))
|
||||
.add_run(Run::new().add_text(¶_data.text));
|
||||
}
|
||||
"blockquote" => {
|
||||
paragraph = paragraph
|
||||
.indent(Some(720), None, None, None)
|
||||
.add_run(Run::new().add_text(¶_data.text).italic());
|
||||
}
|
||||
"code" => {
|
||||
paragraph = paragraph.add_run(
|
||||
Run::new()
|
||||
.add_text(¶_data.text)
|
||||
.fonts(RunFonts::new().ascii("Courier New")),
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
let mut run = Run::new().add_text(¶_data.text);
|
||||
if para_data.bold {
|
||||
run = run.bold();
|
||||
}
|
||||
if para_data.italic {
|
||||
run = run.italic();
|
||||
}
|
||||
if para_data.underline {
|
||||
run = run.underline("single");
|
||||
}
|
||||
paragraph = paragraph.add_run(run);
|
||||
}
|
||||
}
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
}
|
||||
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
docx.build()
|
||||
.pack(&mut buf)
|
||||
.map_err(|e| format!("Failed to build DOCX: {e}"))?;
|
||||
|
||||
Ok(buf.into_inner())
|
||||
}
|
||||
|
||||
pub async fn save_document_to_drive(
|
||||
state: &Arc<AppState>,
|
||||
user_identifier: &str,
|
||||
doc_id: &str,
|
||||
title: &str,
|
||||
content: &str,
|
||||
) -> Result<String, String> {
|
||||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let doc_path = format!("{base_path}/{doc_id}.html");
|
||||
let meta_path = format!("{base_path}/{doc_id}.meta.json");
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&doc_path)
|
||||
.body(ByteStream::from(content.as_bytes().to_vec()))
|
||||
.content_type("text/html")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save document: {e}"))?;
|
||||
|
||||
let word_count = count_words(content);
|
||||
|
||||
let metadata = serde_json::json!({
|
||||
"id": doc_id,
|
||||
"title": title,
|
||||
"created_at": Utc::now().to_rfc3339(),
|
||||
"updated_at": Utc::now().to_rfc3339(),
|
||||
"word_count": word_count,
|
||||
"version": 1
|
||||
});
|
||||
|
||||
s3_client
|
||||
.put_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&meta_path)
|
||||
.body(ByteStream::from(metadata.to_string().into_bytes()))
|
||||
.content_type("application/json")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to save metadata: {e}"))?;
|
||||
|
||||
Ok(doc_path)
|
||||
}
|
||||
|
||||
pub async fn load_document_from_drive(
|
||||
|
|
@ -489,8 +382,8 @@ pub async fn load_document_from_drive(
|
|||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let doc_path = format!("{}/{}.html", base_path, doc_id);
|
||||
let meta_path = format!("{}/{}.meta.json", base_path, doc_id);
|
||||
let doc_path = format!("{base_path}/{doc_id}.html");
|
||||
let meta_path = format!("{base_path}/{doc_id}.meta.json");
|
||||
|
||||
let content = match s3_client
|
||||
.get_object()
|
||||
|
|
@ -564,7 +457,7 @@ pub async fn list_documents_from_drive(
|
|||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let prefix = format!("{}/", base_path);
|
||||
let prefix = format!("{base_path}/");
|
||||
let mut documents = Vec::new();
|
||||
|
||||
if let Ok(result) = s3_client
|
||||
|
|
@ -590,10 +483,7 @@ pub async fn list_documents_from_drive(
|
|||
serde_json::from_str::<serde_json::Value>(&meta_str)
|
||||
{
|
||||
let doc_meta = DocumentMetadata {
|
||||
id: meta["id"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string(),
|
||||
id: meta["id"].as_str().unwrap_or_default().to_string(),
|
||||
title: meta["title"]
|
||||
.as_str()
|
||||
.unwrap_or("Untitled")
|
||||
|
|
@ -611,7 +501,7 @@ pub async fn list_documents_from_drive(
|
|||
.unwrap_or_else(Utc::now),
|
||||
word_count: meta["word_count"].as_u64().unwrap_or(0)
|
||||
as usize,
|
||||
storage_type: "drive".to_string(),
|
||||
storage_type: "html".to_string(),
|
||||
};
|
||||
documents.push(doc_meta);
|
||||
}
|
||||
|
|
@ -635,40 +525,28 @@ pub async fn delete_document_from_drive(
|
|||
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
||||
|
||||
let base_path = get_user_docs_path(user_identifier);
|
||||
let doc_path = format!("{}/{}.html", base_path, doc_id);
|
||||
let meta_path = format!("{}/{}.meta.json", base_path, doc_id);
|
||||
let docx_path = format!("{}/{}.docx", base_path, doc_id);
|
||||
|
||||
let _ = s3_client
|
||||
.delete_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&doc_path)
|
||||
.send()
|
||||
.await;
|
||||
for ext in &[".html", ".docx", ".meta.json"] {
|
||||
let path = format!("{base_path}/{doc_id}{ext}");
|
||||
let _ = s3_client
|
||||
.delete_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&path)
|
||||
.send()
|
||||
.await;
|
||||
}
|
||||
|
||||
let _ = s3_client
|
||||
.delete_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&meta_path)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let _ = s3_client
|
||||
.delete_object()
|
||||
.bucket(&state.bucket_name)
|
||||
.key(&docx_path)
|
||||
.send()
|
||||
.await;
|
||||
remove_from_cache(doc_id).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn create_new_document() -> Document {
|
||||
let id = generate_doc_id();
|
||||
let doc_id = generate_doc_id();
|
||||
Document {
|
||||
id: id.clone(),
|
||||
id: doc_id,
|
||||
title: "Untitled Document".to_string(),
|
||||
content: String::new(),
|
||||
content: "<p><br></p>".to_string(),
|
||||
owner_id: get_current_user_id(),
|
||||
storage_path: String::new(),
|
||||
created_at: Utc::now(),
|
||||
|
|
@ -706,3 +584,146 @@ fn strip_html(html: &str) -> String {
|
|||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
}
|
||||
|
||||
fn escape_html(text: &str) -> String {
|
||||
text.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
}
|
||||
|
||||
fn paragraphs_to_html(paragraphs: &[String]) -> String {
|
||||
paragraphs
|
||||
.iter()
|
||||
.map(|p| format!("<p>{}</p>", escape_html(p)))
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
}
|
||||
|
||||
fn html_to_paragraphs(html: &str) -> Vec<String> {
|
||||
parse_html_to_paragraphs(html)
|
||||
.into_iter()
|
||||
.map(|p| p.text)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
struct ParagraphData {
|
||||
text: String,
|
||||
style: String,
|
||||
bold: bool,
|
||||
italic: bool,
|
||||
underline: bool,
|
||||
}
|
||||
|
||||
fn parse_html_to_paragraphs(html: &str) -> Vec<ParagraphData> {
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut current = ParagraphData::default();
|
||||
let mut in_tag = false;
|
||||
let mut tag_name = String::new();
|
||||
let mut is_closing = false;
|
||||
let mut text_buffer = String::new();
|
||||
|
||||
let mut bold_stack: i32 = 0;
|
||||
let mut italic_stack: i32 = 0;
|
||||
let mut underline_stack: i32 = 0;
|
||||
|
||||
for ch in html.chars() {
|
||||
match ch {
|
||||
'<' => {
|
||||
in_tag = true;
|
||||
tag_name.clear();
|
||||
is_closing = false;
|
||||
}
|
||||
'>' => {
|
||||
in_tag = false;
|
||||
let tag = tag_name.to_lowercase();
|
||||
let tag_trimmed = tag.split_whitespace().next().unwrap_or("");
|
||||
|
||||
if is_closing {
|
||||
match tag_trimmed {
|
||||
"p" | "div" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li"
|
||||
| "blockquote" | "pre" => {
|
||||
if !text_buffer.is_empty() || !current.text.is_empty() {
|
||||
current.text = format!(
|
||||
"{}{}",
|
||||
current.text,
|
||||
decode_html_entities(&text_buffer)
|
||||
);
|
||||
if !current.text.trim().is_empty() {
|
||||
paragraphs.push(current);
|
||||
}
|
||||
current = ParagraphData::default();
|
||||
text_buffer.clear();
|
||||
}
|
||||
}
|
||||
"b" | "strong" => bold_stack = bold_stack.saturating_sub(1),
|
||||
"i" | "em" => italic_stack = italic_stack.saturating_sub(1),
|
||||
"u" => underline_stack = underline_stack.saturating_sub(1),
|
||||
_ => {}
|
||||
}
|
||||
} else {
|
||||
match tag_trimmed {
|
||||
"br" => {
|
||||
text_buffer.push('\n');
|
||||
}
|
||||
"p" | "div" => {
|
||||
if !text_buffer.is_empty() {
|
||||
current.text = format!(
|
||||
"{}{}",
|
||||
current.text,
|
||||
decode_html_entities(&text_buffer)
|
||||
);
|
||||
text_buffer.clear();
|
||||
}
|
||||
current.style = "p".to_string();
|
||||
current.bold = bold_stack > 0;
|
||||
current.italic = italic_stack > 0;
|
||||
current.underline = underline_stack > 0;
|
||||
}
|
||||
"h1" => current.style = "h1".to_string(),
|
||||
"h2" => current.style = "h2".to_string(),
|
||||
"h3" => current.style = "h3".to_string(),
|
||||
"li" => current.style = "li".to_string(),
|
||||
"blockquote" => current.style = "blockquote".to_string(),
|
||||
"pre" | "code" => current.style = "code".to_string(),
|
||||
"b" | "strong" => bold_stack += 1,
|
||||
"i" | "em" => italic_stack += 1,
|
||||
"u" => underline_stack += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
tag_name.clear();
|
||||
}
|
||||
'/' if in_tag && tag_name.is_empty() => {
|
||||
is_closing = true;
|
||||
}
|
||||
_ if in_tag => {
|
||||
tag_name.push(ch);
|
||||
}
|
||||
_ => {
|
||||
text_buffer.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !text_buffer.is_empty() {
|
||||
current.text = format!("{}{}", current.text, decode_html_entities(&text_buffer));
|
||||
}
|
||||
if !current.text.trim().is_empty() {
|
||||
paragraphs.push(current);
|
||||
}
|
||||
|
||||
paragraphs
|
||||
}
|
||||
|
||||
fn decode_html_entities(text: &str) -> String {
|
||||
text.replace(" ", " ")
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod collaboration;
|
||||
pub mod handlers;
|
||||
pub mod ooxml;
|
||||
pub mod storage;
|
||||
pub mod types;
|
||||
pub mod utils;
|
||||
|
|
|
|||
259
src/slides/ooxml.rs
Normal file
259
src/slides/ooxml.rs
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
use std::io::Cursor;
|
||||
|
||||
pub struct OoxmlPresentation {
|
||||
pub original_bytes: Vec<u8>,
|
||||
pub slides: Vec<SlideInfo>,
|
||||
}
|
||||
|
||||
pub struct SlideInfo {
|
||||
pub index: usize,
|
||||
pub texts: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn load_pptx_preserving(bytes: &[u8]) -> Result<OoxmlPresentation, String> {
|
||||
use ooxmlsdk::parts::presentation_document::PresentationDocument;
|
||||
|
||||
let reader = Cursor::new(bytes);
|
||||
let pptx = PresentationDocument::new(reader)
|
||||
.map_err(|e| format!("Failed to parse PPTX: {e}"))?;
|
||||
|
||||
let mut slides = Vec::new();
|
||||
|
||||
for (idx, slide_part) in pptx.presentation_part.slide_parts.iter().enumerate() {
|
||||
let xml_str = slide_part.root_element.to_xml().unwrap_or_default();
|
||||
|
||||
let texts = extract_texts_from_slide(&xml_str);
|
||||
slides.push(SlideInfo { index: idx, texts });
|
||||
}
|
||||
|
||||
Ok(OoxmlPresentation {
|
||||
original_bytes: bytes.to_vec(),
|
||||
slides,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_texts_from_slide(xml: &str) -> Vec<String> {
|
||||
let mut texts = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while let Some(p_start) = xml[pos..].find("<a:p") {
|
||||
let abs_start = pos + p_start;
|
||||
|
||||
if let Some(p_end_rel) = xml[abs_start..].find("</a:p>") {
|
||||
let abs_end = abs_start + p_end_rel + 6;
|
||||
let para_content = &xml[abs_start..abs_end];
|
||||
|
||||
let text = extract_text_from_paragraph(para_content);
|
||||
if !text.trim().is_empty() {
|
||||
texts.push(text);
|
||||
}
|
||||
pos = abs_end;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
texts
|
||||
}
|
||||
|
||||
fn extract_text_from_paragraph(para_xml: &str) -> String {
|
||||
let mut text = String::new();
|
||||
let mut pos = 0;
|
||||
|
||||
while let Some(t_start) = para_xml[pos..].find("<a:t") {
|
||||
let abs_start = pos + t_start;
|
||||
|
||||
if let Some(tag_end_rel) = para_xml[abs_start..].find('>') {
|
||||
let abs_content_start = abs_start + tag_end_rel + 1;
|
||||
|
||||
if let Some(t_end_rel) = para_xml[abs_content_start..].find("</a:t>") {
|
||||
let content = ¶_xml[abs_content_start..abs_content_start + t_end_rel];
|
||||
text.push_str(content);
|
||||
pos = abs_content_start + t_end_rel + 6;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unescape_xml(&text)
|
||||
}
|
||||
|
||||
fn unescape_xml(text: &str) -> String {
|
||||
text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
||||
fn escape_xml(text: &str) -> String {
|
||||
text.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
}
|
||||
|
||||
pub fn save_pptx_preserving(original_bytes: &[u8]) -> Result<Vec<u8>, String> {
|
||||
use ooxmlsdk::parts::presentation_document::PresentationDocument;
|
||||
|
||||
let reader = Cursor::new(original_bytes);
|
||||
let pptx = PresentationDocument::new(reader)
|
||||
.map_err(|e| format!("Failed to parse PPTX: {e}"))?;
|
||||
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
pptx.save(&mut output)
|
||||
.map_err(|e| format!("Failed to save PPTX: {e}"))?;
|
||||
|
||||
Ok(output.into_inner())
|
||||
}
|
||||
|
||||
pub fn update_pptx_text(
|
||||
original_bytes: &[u8],
|
||||
new_slide_texts: &[Vec<String>],
|
||||
) -> Result<Vec<u8>, String> {
|
||||
use std::io::{Read, Write};
|
||||
use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter};
|
||||
|
||||
let reader = Cursor::new(original_bytes);
|
||||
let mut archive =
|
||||
ZipArchive::new(reader).map_err(|e| format!("Failed to open PPTX archive: {e}"))?;
|
||||
|
||||
let mut output_buf = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip_writer = ZipWriter::new(&mut output_buf);
|
||||
let options =
|
||||
SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
|
||||
|
||||
for i in 0..archive.len() {
|
||||
let mut file = archive
|
||||
.by_index(i)
|
||||
.map_err(|e| format!("Failed to read archive entry: {e}"))?;
|
||||
|
||||
let name = file.name().to_string();
|
||||
|
||||
if name.starts_with("ppt/slides/slide") && name.ends_with(".xml") {
|
||||
let slide_num = extract_slide_number(&name);
|
||||
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)
|
||||
.map_err(|e| format!("Failed to read slide xml: {e}"))?;
|
||||
|
||||
let modified_content = if slide_num > 0 && slide_num <= new_slide_texts.len() {
|
||||
replace_slide_texts(&content, &new_slide_texts[slide_num - 1])
|
||||
} else {
|
||||
content
|
||||
};
|
||||
|
||||
zip_writer
|
||||
.start_file(&name, options)
|
||||
.map_err(|e| format!("Failed to start file in zip: {e}"))?;
|
||||
zip_writer
|
||||
.write_all(modified_content.as_bytes())
|
||||
.map_err(|e| format!("Failed to write slide xml: {e}"))?;
|
||||
} else {
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)
|
||||
.map_err(|e| format!("Failed to read file: {e}"))?;
|
||||
|
||||
zip_writer
|
||||
.start_file(&name, options)
|
||||
.map_err(|e| format!("Failed to start file in zip: {e}"))?;
|
||||
zip_writer
|
||||
.write_all(&buf)
|
||||
.map_err(|e| format!("Failed to write file: {e}"))?;
|
||||
}
|
||||
}
|
||||
|
||||
zip_writer
|
||||
.finish()
|
||||
.map_err(|e| format!("Failed to finish zip: {e}"))?;
|
||||
}
|
||||
|
||||
Ok(output_buf.into_inner())
|
||||
}
|
||||
|
||||
fn extract_slide_number(filename: &str) -> usize {
|
||||
let name = filename
|
||||
.trim_start_matches("ppt/slides/slide")
|
||||
.trim_end_matches(".xml");
|
||||
name.parse().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn replace_slide_texts(xml: &str, new_texts: &[String]) -> String {
|
||||
let mut result = xml.to_string();
|
||||
let mut text_idx = 0;
|
||||
let mut search_pos = 0;
|
||||
|
||||
while let Some(p_start) = result[search_pos..]
|
||||
.find("<a:p>")
|
||||
.or_else(|| result[search_pos..].find("<a:p "))
|
||||
{
|
||||
let abs_start = search_pos + p_start;
|
||||
|
||||
if let Some(p_end_rel) = result[abs_start..].find("</a:p>") {
|
||||
let abs_end = abs_start + p_end_rel + 6;
|
||||
let para_content = result[abs_start..abs_end].to_string();
|
||||
|
||||
if para_content.contains("<a:t") {
|
||||
if text_idx < new_texts.len() {
|
||||
let new_para = replace_first_text_run(¶_content, &new_texts[text_idx]);
|
||||
let new_len = new_para.len();
|
||||
result = format!("{}{}{}", &result[..abs_start], new_para, &result[abs_end..]);
|
||||
search_pos = abs_start + new_len;
|
||||
} else {
|
||||
search_pos = abs_end;
|
||||
}
|
||||
text_idx += 1;
|
||||
} else {
|
||||
search_pos = abs_end;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn replace_first_text_run(para_xml: &str, new_text: &str) -> String {
|
||||
let mut result = para_xml.to_string();
|
||||
let mut found_first = false;
|
||||
|
||||
let mut search_pos = 0;
|
||||
while let Some(t_start) = result[search_pos..].find("<a:t") {
|
||||
let abs_start = search_pos + t_start;
|
||||
|
||||
if let Some(tag_end_rel) = result[abs_start..].find('>') {
|
||||
let abs_content_start = abs_start + tag_end_rel + 1;
|
||||
|
||||
if let Some(t_end_rel) = result[abs_content_start..].find("</a:t>") {
|
||||
let abs_content_end = abs_content_start + t_end_rel;
|
||||
|
||||
if !found_first {
|
||||
let escaped = escape_xml(new_text);
|
||||
result = format!(
|
||||
"{}{}{}",
|
||||
&result[..abs_content_start],
|
||||
escaped,
|
||||
&result[abs_content_end..]
|
||||
);
|
||||
found_first = true;
|
||||
search_pos = abs_content_start + escaped.len() + 6;
|
||||
} else {
|
||||
result = format!("{}{}", &result[..abs_content_start], &result[abs_content_end..]);
|
||||
search_pos = abs_content_start;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
|
@ -1,18 +1,26 @@
|
|||
use crate::shared::state::AppState;
|
||||
use crate::slides::ooxml::{load_pptx_preserving, update_pptx_text};
|
||||
use crate::slides::types::{
|
||||
ElementContent, ElementStyle, Presentation, PresentationMetadata, Slide,
|
||||
SlideBackground, SlideElement,
|
||||
};
|
||||
use crate::slides::utils::{create_content_slide, create_default_theme, create_title_slide};
|
||||
use chrono::Utc;
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{Cursor, Read, Write};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
use zip::write::SimpleFileOptions;
|
||||
use zip::{ZipArchive, ZipWriter};
|
||||
|
||||
static PRESENTATION_CACHE: once_cell::sync::Lazy<RwLock<HashMap<String, (Vec<u8>, DateTime<Utc>)>>> =
|
||||
once_cell::sync::Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
const CACHE_TTL_SECS: i64 = 3600;
|
||||
|
||||
pub fn get_user_presentations_path(user_id: &str) -> String {
|
||||
format!("users/{}/presentations", user_id)
|
||||
format!("users/{user_id}/presentations")
|
||||
}
|
||||
|
||||
pub fn get_current_user_id() -> String {
|
||||
|
|
@ -23,10 +31,28 @@ pub fn generate_presentation_id() -> String {
|
|||
Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
pub async fn cache_presentation_bytes(pres_id: &str, bytes: Vec<u8>) {
|
||||
let mut cache = PRESENTATION_CACHE.write().await;
|
||||
cache.insert(pres_id.to_string(), (bytes, Utc::now()));
|
||||
|
||||
let now = Utc::now();
|
||||
cache.retain(|_, (_, modified)| (now - *modified).num_seconds() < CACHE_TTL_SECS);
|
||||
}
|
||||
|
||||
pub async fn get_cached_presentation_bytes(pres_id: &str) -> Option<Vec<u8>> {
|
||||
let cache = PRESENTATION_CACHE.read().await;
|
||||
cache.get(pres_id).map(|(bytes, _)| bytes.clone())
|
||||
}
|
||||
|
||||
pub async fn remove_from_cache(pres_id: &str) {
|
||||
let mut cache = PRESENTATION_CACHE.write().await;
|
||||
cache.remove(pres_id);
|
||||
}
|
||||
|
||||
fn extract_id_from_path(path: &str) -> String {
|
||||
path.split('/')
|
||||
.last()
|
||||
.unwrap_or("")
|
||||
.unwrap_or_default()
|
||||
.trim_end_matches(".json")
|
||||
.trim_end_matches(".pptx")
|
||||
.to_string()
|
||||
|
|
@ -68,7 +94,22 @@ pub async fn save_presentation_as_pptx(
|
|||
user_id: &str,
|
||||
presentation: &Presentation,
|
||||
) -> Result<Vec<u8>, String> {
|
||||
let pptx_bytes = convert_to_pptx(presentation)?;
|
||||
let pptx_bytes = if let Some(original_bytes) = get_cached_presentation_bytes(&presentation.id).await {
|
||||
let slide_texts: Vec<Vec<String>> = presentation.slides.iter().map(|slide| {
|
||||
slide.elements.iter().filter_map(|el| {
|
||||
if let ElementContent::Text { text, .. } = &el.content {
|
||||
Some(text.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}).collect()
|
||||
}).collect();
|
||||
update_pptx_text(&original_bytes, &slide_texts).unwrap_or_else(|_| {
|
||||
convert_to_pptx(presentation).unwrap_or_default()
|
||||
})
|
||||
} else {
|
||||
convert_to_pptx(presentation)?
|
||||
};
|
||||
|
||||
let drive = state
|
||||
.drive
|
||||
|
|
@ -484,12 +525,13 @@ pub async fn load_pptx_from_drive(
|
|||
.collect()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to read PPTX: {e}"))?
|
||||
.into_bytes();
|
||||
.into_bytes()
|
||||
.to_vec();
|
||||
|
||||
load_pptx_from_bytes(&bytes, user_id, file_path)
|
||||
load_pptx_from_bytes(&bytes, user_id, file_path).await
|
||||
}
|
||||
|
||||
pub fn load_pptx_from_bytes(
|
||||
pub async fn load_pptx_from_bytes(
|
||||
bytes: &[u8],
|
||||
user_id: &str,
|
||||
file_path: &str,
|
||||
|
|
@ -505,6 +547,10 @@ pub fn load_pptx_from_bytes(
|
|||
.trim_end_matches(".pptx")
|
||||
.trim_end_matches(".ppt");
|
||||
|
||||
let pres_id = generate_presentation_id();
|
||||
|
||||
cache_presentation_bytes(&pres_id, bytes.to_vec()).await;
|
||||
|
||||
let mut slides = Vec::new();
|
||||
let mut slide_num = 1;
|
||||
|
||||
|
|
@ -528,7 +574,7 @@ pub fn load_pptx_from_bytes(
|
|||
}
|
||||
|
||||
Ok(Presentation {
|
||||
id: generate_presentation_id(),
|
||||
id: pres_id,
|
||||
name: file_name.to_string(),
|
||||
owner_id: user_id.to_string(),
|
||||
slides,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue