756 lines
25 KiB
Rust
756 lines
25 KiB
Rust
use crate::docs::ooxml::{load_docx_preserving, update_docx_text};
|
|
use crate::docs::types::{Document, DocumentMetadata};
|
|
use crate::shared::state::AppState;
|
|
use aws_sdk_s3::primitives::ByteStream;
|
|
use chrono::{DateTime, Utc};
|
|
use std::collections::HashMap;
|
|
use std::io::Cursor;
|
|
use std::sync::Arc;
|
|
use tokio::sync::RwLock;
|
|
use uuid::Uuid;
|
|
|
|
static DOCUMENT_CACHE: once_cell::sync::Lazy<RwLock<HashMap<String, (Vec<u8>, DateTime<Utc>)>>> =
|
|
once_cell::sync::Lazy::new(|| RwLock::new(HashMap::new()));
|
|
|
|
const CACHE_TTL_SECS: i64 = 3600;
|
|
|
|
pub fn get_user_docs_path(user_identifier: &str) -> String {
|
|
let safe_id = user_identifier
|
|
.replace(['/', '\\', ':', '*', '?', '"', '<', '>', '|'], "_")
|
|
.to_lowercase();
|
|
format!("users/{safe_id}/docs")
|
|
}
|
|
|
|
pub fn get_current_user_id() -> String {
|
|
"default-user".to_string()
|
|
}
|
|
|
|
pub fn generate_doc_id() -> String {
|
|
Uuid::new_v4().to_string()
|
|
}
|
|
|
|
pub async fn cache_document_bytes(doc_id: &str, bytes: Vec<u8>) {
|
|
let mut cache = DOCUMENT_CACHE.write().await;
|
|
cache.insert(doc_id.to_string(), (bytes, Utc::now()));
|
|
|
|
let now = Utc::now();
|
|
cache.retain(|_, (_, modified)| (now - *modified).num_seconds() < CACHE_TTL_SECS);
|
|
}
|
|
|
|
pub async fn get_cached_document_bytes(doc_id: &str) -> Option<Vec<u8>> {
|
|
let cache = DOCUMENT_CACHE.read().await;
|
|
cache.get(doc_id).map(|(bytes, _)| bytes.clone())
|
|
}
|
|
|
|
pub async fn remove_from_cache(doc_id: &str) {
|
|
let mut cache = DOCUMENT_CACHE.write().await;
|
|
cache.remove(doc_id);
|
|
}
|
|
|
|
pub async fn load_docx_from_drive(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
file_path: &str,
|
|
) -> Result<Document, String> {
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
|
|
let result = s3_client
|
|
.get_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(file_path)
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Failed to load DOCX: {e}"))?;
|
|
|
|
let bytes = result
|
|
.body
|
|
.collect()
|
|
.await
|
|
.map_err(|e| format!("Failed to read DOCX: {e}"))?
|
|
.into_bytes()
|
|
.to_vec();
|
|
|
|
load_docx_from_bytes(&bytes, user_identifier, file_path).await
|
|
}
|
|
|
|
pub async fn load_docx_from_bytes(
|
|
bytes: &[u8],
|
|
user_identifier: &str,
|
|
file_path: &str,
|
|
) -> Result<Document, String> {
|
|
let file_name = file_path
|
|
.split('/')
|
|
.last()
|
|
.unwrap_or("Untitled")
|
|
.trim_end_matches(".docx")
|
|
.trim_end_matches(".doc");
|
|
|
|
let doc_id = generate_doc_id();
|
|
|
|
cache_document_bytes(&doc_id, bytes.to_vec()).await;
|
|
|
|
let html_content = match load_docx_preserving(bytes) {
|
|
Ok(ooxml_doc) => {
|
|
let texts: Vec<String> = ooxml_doc.paragraphs.iter().map(|p| p.text.clone()).collect();
|
|
paragraphs_to_html(&texts)
|
|
}
|
|
Err(_) => convert_docx_to_html(bytes)?,
|
|
};
|
|
|
|
Ok(Document {
|
|
id: doc_id,
|
|
title: file_name.to_string(),
|
|
content: html_content,
|
|
owner_id: user_identifier.to_string(),
|
|
storage_path: file_path.to_string(),
|
|
created_at: Utc::now(),
|
|
updated_at: Utc::now(),
|
|
collaborators: Vec::new(),
|
|
version: 1,
|
|
track_changes: None,
|
|
comments: None,
|
|
footnotes: None,
|
|
endnotes: None,
|
|
styles: None,
|
|
toc: None,
|
|
track_changes_enabled: false,
|
|
})
|
|
}
|
|
|
|
pub fn convert_docx_to_html(bytes: &[u8]) -> Result<String, String> {
|
|
let docx = docx_rs::read_docx(bytes).map_err(|e| format!("Failed to parse DOCX: {e}"))?;
|
|
|
|
let mut html = String::new();
|
|
|
|
for child in docx.document.children {
|
|
match child {
|
|
docx_rs::DocumentChild::Paragraph(para) => {
|
|
let mut para_html = String::new();
|
|
let mut is_heading = false;
|
|
let mut heading_level = 0u8;
|
|
|
|
if let Some(style) = ¶.property.style {
|
|
let style_id = style.val.to_lowercase();
|
|
if style_id.starts_with("heading") || style_id.starts_with("title") {
|
|
is_heading = true;
|
|
heading_level = style_id
|
|
.chars()
|
|
.filter(|c| c.is_ascii_digit())
|
|
.collect::<String>()
|
|
.parse()
|
|
.unwrap_or(1);
|
|
if heading_level == 0 {
|
|
heading_level = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for content in ¶.children {
|
|
if let docx_rs::ParagraphChild::Run(run) = content {
|
|
let mut run_text = String::new();
|
|
let is_bold = run.run_property.bold.is_some();
|
|
let is_italic = run.run_property.italic.is_some();
|
|
let is_underline = run.run_property.underline.is_some();
|
|
|
|
for child in &run.children {
|
|
match child {
|
|
docx_rs::RunChild::Text(text) => {
|
|
run_text.push_str(&escape_html(&text.text));
|
|
}
|
|
docx_rs::RunChild::Break(_) => {
|
|
run_text.push_str("<br>");
|
|
}
|
|
docx_rs::RunChild::Tab(_) => {
|
|
run_text.push_str(" ");
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
if !run_text.is_empty() {
|
|
if is_bold {
|
|
run_text = format!("<strong>{run_text}</strong>");
|
|
}
|
|
if is_italic {
|
|
run_text = format!("<em>{run_text}</em>");
|
|
}
|
|
if is_underline {
|
|
run_text = format!("<u>{run_text}</u>");
|
|
}
|
|
para_html.push_str(&run_text);
|
|
}
|
|
}
|
|
}
|
|
|
|
if !para_html.is_empty() {
|
|
if is_heading && heading_level > 0 && heading_level <= 6 {
|
|
html.push_str(&format!("<h{heading_level}>{para_html}</h{heading_level}>"));
|
|
} else {
|
|
html.push_str(&format!("<p>{para_html}</p>"));
|
|
}
|
|
} else {
|
|
html.push_str("<p><br></p>");
|
|
}
|
|
}
|
|
docx_rs::DocumentChild::Table(table) => {
|
|
html.push_str("<table style=\"border-collapse:collapse;width:100%\">");
|
|
for row in &table.rows {
|
|
let docx_rs::TableChild::TableRow(tr) = row;
|
|
html.push_str("<tr>");
|
|
for cell in &tr.cells {
|
|
let docx_rs::TableRowChild::TableCell(tc) = cell;
|
|
html.push_str("<td style=\"border:1px solid #ccc;padding:8px\">");
|
|
for para in &tc.children {
|
|
if let docx_rs::TableCellContent::Paragraph(p) = para {
|
|
for content in &p.children {
|
|
if let docx_rs::ParagraphChild::Run(run) = content {
|
|
for child in &run.children {
|
|
if let docx_rs::RunChild::Text(text) = child {
|
|
html.push_str(&escape_html(&text.text));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
html.push_str("</td>");
|
|
}
|
|
html.push_str("</tr>");
|
|
}
|
|
html.push_str("</table>");
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
Ok(html)
|
|
}
|
|
|
|
pub async fn save_document_as_docx(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
doc_id: &str,
|
|
title: &str,
|
|
content: &str,
|
|
) -> Result<Vec<u8>, String> {
|
|
let docx_bytes = if let Some(original_bytes) = get_cached_document_bytes(doc_id).await {
|
|
let paragraphs = html_to_paragraphs(content);
|
|
update_docx_text(&original_bytes, ¶graphs).unwrap_or_else(|_| {
|
|
convert_html_to_docx(title, content).unwrap_or_default()
|
|
})
|
|
} else {
|
|
convert_html_to_docx(title, content)?
|
|
};
|
|
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
let base_path = get_user_docs_path(user_identifier);
|
|
let docx_path = format!("{base_path}/{doc_id}.docx");
|
|
|
|
s3_client
|
|
.put_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&docx_path)
|
|
.body(ByteStream::from(docx_bytes.clone()))
|
|
.content_type("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Failed to save DOCX: {e}"))?;
|
|
|
|
cache_document_bytes(doc_id, docx_bytes.clone()).await;
|
|
|
|
Ok(docx_bytes)
|
|
}
|
|
|
|
pub fn convert_html_to_docx(title: &str, html_content: &str) -> Result<Vec<u8>, String> {
|
|
use docx_rs::*;
|
|
|
|
let mut docx = Docx::new();
|
|
|
|
if !title.is_empty() {
|
|
let title_para = Paragraph::new().add_run(Run::new().add_text(title).bold().size(48));
|
|
docx = docx.add_paragraph(title_para);
|
|
docx = docx.add_paragraph(Paragraph::new());
|
|
}
|
|
|
|
let paragraphs = parse_html_to_paragraphs(html_content);
|
|
for para_data in paragraphs {
|
|
let mut paragraph = Paragraph::new();
|
|
|
|
match para_data.style.as_str() {
|
|
"h1" => {
|
|
paragraph =
|
|
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(32));
|
|
}
|
|
"h2" => {
|
|
paragraph =
|
|
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(28));
|
|
}
|
|
"h3" => {
|
|
paragraph =
|
|
paragraph.add_run(Run::new().add_text(¶_data.text).bold().size(24));
|
|
}
|
|
"li" => {
|
|
paragraph = paragraph
|
|
.add_run(Run::new().add_text("• "))
|
|
.add_run(Run::new().add_text(¶_data.text));
|
|
}
|
|
"blockquote" => {
|
|
paragraph = paragraph
|
|
.indent(Some(720), None, None, None)
|
|
.add_run(Run::new().add_text(¶_data.text).italic());
|
|
}
|
|
"code" => {
|
|
paragraph = paragraph.add_run(
|
|
Run::new()
|
|
.add_text(¶_data.text)
|
|
.fonts(RunFonts::new().ascii("Courier New")),
|
|
);
|
|
}
|
|
_ => {
|
|
let mut run = Run::new().add_text(¶_data.text);
|
|
if para_data.bold {
|
|
run = run.bold();
|
|
}
|
|
if para_data.italic {
|
|
run = run.italic();
|
|
}
|
|
if para_data.underline {
|
|
run = run.underline("single");
|
|
}
|
|
paragraph = paragraph.add_run(run);
|
|
}
|
|
}
|
|
|
|
docx = docx.add_paragraph(paragraph);
|
|
}
|
|
|
|
let mut buf = Cursor::new(Vec::new());
|
|
docx.build()
|
|
.pack(&mut buf)
|
|
.map_err(|e| format!("Failed to build DOCX: {e}"))?;
|
|
|
|
Ok(buf.into_inner())
|
|
}
|
|
|
|
pub async fn save_document_to_drive(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
doc_id: &str,
|
|
title: &str,
|
|
content: &str,
|
|
) -> Result<String, String> {
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
|
|
let base_path = get_user_docs_path(user_identifier);
|
|
let doc_path = format!("{base_path}/{doc_id}.html");
|
|
let meta_path = format!("{base_path}/{doc_id}.meta.json");
|
|
|
|
s3_client
|
|
.put_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&doc_path)
|
|
.body(ByteStream::from(content.as_bytes().to_vec()))
|
|
.content_type("text/html")
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Failed to save document: {e}"))?;
|
|
|
|
let word_count = count_words(content);
|
|
|
|
let metadata = serde_json::json!({
|
|
"id": doc_id,
|
|
"title": title,
|
|
"created_at": Utc::now().to_rfc3339(),
|
|
"updated_at": Utc::now().to_rfc3339(),
|
|
"word_count": word_count,
|
|
"version": 1
|
|
});
|
|
|
|
s3_client
|
|
.put_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&meta_path)
|
|
.body(ByteStream::from(metadata.to_string().into_bytes()))
|
|
.content_type("application/json")
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Failed to save metadata: {e}"))?;
|
|
|
|
Ok(doc_path)
|
|
}
|
|
|
|
pub async fn save_document(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
doc: &Document,
|
|
) -> Result<String, String> {
|
|
save_document_to_drive(state, user_identifier, &doc.id, &doc.title, &doc.content).await
|
|
}
|
|
|
|
pub async fn load_document_from_drive(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
doc_id: &str,
|
|
) -> Result<Option<Document>, String> {
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
|
|
let base_path = get_user_docs_path(user_identifier);
|
|
let doc_path = format!("{base_path}/{doc_id}.html");
|
|
let meta_path = format!("{base_path}/{doc_id}.meta.json");
|
|
|
|
let content = match s3_client
|
|
.get_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&doc_path)
|
|
.send()
|
|
.await
|
|
{
|
|
Ok(result) => {
|
|
let bytes = result
|
|
.body
|
|
.collect()
|
|
.await
|
|
.map_err(|e| e.to_string())?
|
|
.into_bytes();
|
|
String::from_utf8(bytes.to_vec()).map_err(|e| e.to_string())?
|
|
}
|
|
Err(_) => return Ok(None),
|
|
};
|
|
|
|
let (title, created_at, updated_at) = match s3_client
|
|
.get_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&meta_path)
|
|
.send()
|
|
.await
|
|
{
|
|
Ok(result) => {
|
|
let bytes = result
|
|
.body
|
|
.collect()
|
|
.await
|
|
.map_err(|e| e.to_string())?
|
|
.into_bytes();
|
|
let meta_str = String::from_utf8(bytes.to_vec()).map_err(|e| e.to_string())?;
|
|
let meta: serde_json::Value = serde_json::from_str(&meta_str).unwrap_or_default();
|
|
(
|
|
meta["title"].as_str().unwrap_or("Untitled").to_string(),
|
|
meta["created_at"]
|
|
.as_str()
|
|
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
|
.map(|d| d.with_timezone(&Utc))
|
|
.unwrap_or_else(Utc::now),
|
|
meta["updated_at"]
|
|
.as_str()
|
|
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
|
.map(|d| d.with_timezone(&Utc))
|
|
.unwrap_or_else(Utc::now),
|
|
)
|
|
}
|
|
Err(_) => ("Untitled".to_string(), Utc::now(), Utc::now()),
|
|
};
|
|
|
|
Ok(Some(Document {
|
|
id: doc_id.to_string(),
|
|
title,
|
|
content,
|
|
owner_id: user_identifier.to_string(),
|
|
storage_path: doc_path,
|
|
created_at,
|
|
updated_at,
|
|
collaborators: Vec::new(),
|
|
version: 1,
|
|
track_changes: None,
|
|
comments: None,
|
|
footnotes: None,
|
|
endnotes: None,
|
|
styles: None,
|
|
toc: None,
|
|
track_changes_enabled: false,
|
|
}))
|
|
}
|
|
|
|
pub async fn list_documents_from_drive(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
) -> Result<Vec<DocumentMetadata>, String> {
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
|
|
let base_path = get_user_docs_path(user_identifier);
|
|
let prefix = format!("{base_path}/");
|
|
let mut documents = Vec::new();
|
|
|
|
if let Ok(result) = s3_client
|
|
.list_objects_v2()
|
|
.bucket(&state.bucket_name)
|
|
.prefix(&prefix)
|
|
.send()
|
|
.await
|
|
{
|
|
for obj in result.contents() {
|
|
if let Some(key) = obj.key() {
|
|
if key.ends_with(".meta.json") {
|
|
if let Ok(meta_result) = s3_client
|
|
.get_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(key)
|
|
.send()
|
|
.await
|
|
{
|
|
if let Ok(bytes) = meta_result.body.collect().await {
|
|
if let Ok(meta_str) = String::from_utf8(bytes.into_bytes().to_vec()) {
|
|
if let Ok(meta) =
|
|
serde_json::from_str::<serde_json::Value>(&meta_str)
|
|
{
|
|
let doc_meta = DocumentMetadata {
|
|
id: meta["id"].as_str().unwrap_or_default().to_string(),
|
|
title: meta["title"]
|
|
.as_str()
|
|
.unwrap_or("Untitled")
|
|
.to_string(),
|
|
owner_id: user_identifier.to_string(),
|
|
created_at: meta["created_at"]
|
|
.as_str()
|
|
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
|
.map(|d| d.with_timezone(&Utc))
|
|
.unwrap_or_else(Utc::now),
|
|
updated_at: meta["updated_at"]
|
|
.as_str()
|
|
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
|
.map(|d| d.with_timezone(&Utc))
|
|
.unwrap_or_else(Utc::now),
|
|
word_count: meta["word_count"].as_u64().unwrap_or(0)
|
|
as usize,
|
|
storage_type: "html".to_string(),
|
|
};
|
|
documents.push(doc_meta);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
documents.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
|
|
Ok(documents)
|
|
}
|
|
|
|
pub async fn delete_document_from_drive(
|
|
state: &Arc<AppState>,
|
|
user_identifier: &str,
|
|
doc_id: &str,
|
|
) -> Result<(), String> {
|
|
let s3_client = state.drive.as_ref().ok_or("S3 service not available")?;
|
|
|
|
let base_path = get_user_docs_path(user_identifier);
|
|
|
|
for ext in &[".html", ".docx", ".meta.json"] {
|
|
let path = format!("{base_path}/{doc_id}{ext}");
|
|
let _ = s3_client
|
|
.delete_object()
|
|
.bucket(&state.bucket_name)
|
|
.key(&path)
|
|
.send()
|
|
.await;
|
|
}
|
|
|
|
remove_from_cache(doc_id).await;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn create_new_document() -> Document {
|
|
let doc_id = generate_doc_id();
|
|
Document {
|
|
id: doc_id,
|
|
title: "Untitled Document".to_string(),
|
|
content: "<p><br></p>".to_string(),
|
|
owner_id: get_current_user_id(),
|
|
storage_path: String::new(),
|
|
created_at: Utc::now(),
|
|
updated_at: Utc::now(),
|
|
collaborators: Vec::new(),
|
|
version: 1,
|
|
track_changes: None,
|
|
comments: None,
|
|
footnotes: None,
|
|
endnotes: None,
|
|
styles: None,
|
|
toc: None,
|
|
track_changes_enabled: false,
|
|
}
|
|
}
|
|
|
|
pub fn count_words(content: &str) -> usize {
|
|
let plain_text = strip_html(content);
|
|
plain_text
|
|
.split_whitespace()
|
|
.filter(|s| !s.is_empty())
|
|
.count()
|
|
}
|
|
|
|
fn strip_html(html: &str) -> String {
|
|
let mut result = String::new();
|
|
let mut in_tag = false;
|
|
|
|
for ch in html.chars() {
|
|
match ch {
|
|
'<' => in_tag = true,
|
|
'>' => in_tag = false,
|
|
_ if !in_tag => result.push(ch),
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
result
|
|
.replace(" ", " ")
|
|
.replace("&", "&")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
.replace(""", "\"")
|
|
}
|
|
|
|
fn escape_html(text: &str) -> String {
|
|
text.replace('&', "&")
|
|
.replace('<', "<")
|
|
.replace('>', ">")
|
|
.replace('"', """)
|
|
.replace('\'', "'")
|
|
}
|
|
|
|
fn paragraphs_to_html(paragraphs: &[String]) -> String {
|
|
paragraphs
|
|
.iter()
|
|
.map(|p| format!("<p>{}</p>", escape_html(p)))
|
|
.collect::<Vec<_>>()
|
|
.join("")
|
|
}
|
|
|
|
fn html_to_paragraphs(html: &str) -> Vec<String> {
|
|
parse_html_to_paragraphs(html)
|
|
.into_iter()
|
|
.map(|p| p.text)
|
|
.collect()
|
|
}
|
|
|
|
#[derive(Default, Clone)]
|
|
struct ParagraphData {
|
|
text: String,
|
|
style: String,
|
|
bold: bool,
|
|
italic: bool,
|
|
underline: bool,
|
|
}
|
|
|
|
fn parse_html_to_paragraphs(html: &str) -> Vec<ParagraphData> {
|
|
let mut paragraphs = Vec::new();
|
|
let mut current = ParagraphData::default();
|
|
let mut in_tag = false;
|
|
let mut tag_name = String::new();
|
|
let mut is_closing = false;
|
|
let mut text_buffer = String::new();
|
|
|
|
let mut bold_stack: i32 = 0;
|
|
let mut italic_stack: i32 = 0;
|
|
let mut underline_stack: i32 = 0;
|
|
|
|
for ch in html.chars() {
|
|
match ch {
|
|
'<' => {
|
|
in_tag = true;
|
|
tag_name.clear();
|
|
is_closing = false;
|
|
}
|
|
'>' => {
|
|
in_tag = false;
|
|
let tag = tag_name.to_lowercase();
|
|
let tag_trimmed = tag.split_whitespace().next().unwrap_or("");
|
|
|
|
if is_closing {
|
|
match tag_trimmed {
|
|
"p" | "div" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li"
|
|
| "blockquote" | "pre" => {
|
|
if !text_buffer.is_empty() || !current.text.is_empty() {
|
|
current.text = format!(
|
|
"{}{}",
|
|
current.text,
|
|
decode_html_entities(&text_buffer)
|
|
);
|
|
if !current.text.trim().is_empty() {
|
|
paragraphs.push(current);
|
|
}
|
|
current = ParagraphData::default();
|
|
text_buffer.clear();
|
|
}
|
|
}
|
|
"b" | "strong" => bold_stack = bold_stack.saturating_sub(1),
|
|
"i" | "em" => italic_stack = italic_stack.saturating_sub(1),
|
|
"u" => underline_stack = underline_stack.saturating_sub(1),
|
|
_ => {}
|
|
}
|
|
} else {
|
|
match tag_trimmed {
|
|
"br" => {
|
|
text_buffer.push('\n');
|
|
}
|
|
"p" | "div" => {
|
|
if !text_buffer.is_empty() {
|
|
current.text = format!(
|
|
"{}{}",
|
|
current.text,
|
|
decode_html_entities(&text_buffer)
|
|
);
|
|
text_buffer.clear();
|
|
}
|
|
current.style = "p".to_string();
|
|
current.bold = bold_stack > 0;
|
|
current.italic = italic_stack > 0;
|
|
current.underline = underline_stack > 0;
|
|
}
|
|
"h1" => current.style = "h1".to_string(),
|
|
"h2" => current.style = "h2".to_string(),
|
|
"h3" => current.style = "h3".to_string(),
|
|
"li" => current.style = "li".to_string(),
|
|
"blockquote" => current.style = "blockquote".to_string(),
|
|
"pre" | "code" => current.style = "code".to_string(),
|
|
"b" | "strong" => bold_stack += 1,
|
|
"i" | "em" => italic_stack += 1,
|
|
"u" => underline_stack += 1,
|
|
_ => {}
|
|
}
|
|
}
|
|
tag_name.clear();
|
|
}
|
|
'/' if in_tag && tag_name.is_empty() => {
|
|
is_closing = true;
|
|
}
|
|
_ if in_tag => {
|
|
tag_name.push(ch);
|
|
}
|
|
_ => {
|
|
text_buffer.push(ch);
|
|
}
|
|
}
|
|
}
|
|
|
|
if !text_buffer.is_empty() {
|
|
current.text = format!("{}{}", current.text, decode_html_entities(&text_buffer));
|
|
}
|
|
if !current.text.trim().is_empty() {
|
|
paragraphs.push(current);
|
|
}
|
|
|
|
paragraphs
|
|
}
|
|
|
|
fn decode_html_entities(text: &str) -> String {
|
|
text.replace(" ", " ")
|
|
.replace("&", "&")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
.replace(""", "\"")
|
|
.replace("'", "'")
|
|
.replace("'", "'")
|
|
}
|