From 73f1898b629b141b64508ee1dfbd9d82d5b3b7f0 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Sun, 12 Apr 2026 09:36:39 -0300 Subject: [PATCH] Add fail_count and last_failed_at to kb_documents Simplified KB indexing state tracking - added columns directly to kb_documents instead of separate table. This enables per-file backoff retry logic. --- migrations/6.1.8-01-research/up.sql | 3 +++ src/core/shared/schema/research.rs | 16 ++++++++++++++++ src/drive/drive_monitor/mod.rs | 11 ++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/migrations/6.1.8-01-research/up.sql b/migrations/6.1.8-01-research/up.sql index a8a6b34b..c296eb07 100644 --- a/migrations/6.1.8-01-research/up.sql +++ b/migrations/6.1.8-01-research/up.sql @@ -11,6 +11,8 @@ CREATE TABLE IF NOT EXISTS kb_documents ( first_published_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), last_modified_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), indexed_at TIMESTAMPTZ, + fail_count INT NOT NULL DEFAULT 0, + last_failed_at TIMESTAMPTZ, metadata JSONB DEFAULT '{}'::jsonb, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), @@ -21,6 +23,7 @@ CREATE INDEX IF NOT EXISTS idx_kb_documents_bot_id ON kb_documents(bot_id); CREATE INDEX IF NOT EXISTS idx_kb_documents_collection ON kb_documents(collection_name); CREATE INDEX IF NOT EXISTS idx_kb_documents_hash ON kb_documents(file_hash); CREATE INDEX IF NOT EXISTS idx_kb_documents_indexed_at ON kb_documents(indexed_at); +CREATE INDEX IF NOT EXISTS idx_kb_documents_fail ON kb_documents(bot_id, collection_name, fail_count) WHERE fail_count > 0; -- Knowledge Base Collections CREATE TABLE IF NOT EXISTS kb_collections ( diff --git a/src/core/shared/schema/research.rs b/src/core/shared/schema/research.rs index 2d7d07a3..8b23df21 100644 --- a/src/core/shared/schema/research.rs +++ b/src/core/shared/schema/research.rs @@ -12,6 +12,8 @@ diesel::table! { first_published_at -> Timestamptz, last_modified_at -> Timestamptz, indexed_at -> Nullable, + fail_count -> Int4, + last_failed_at -> Nullable, metadata -> Nullable, created_at -> Timestamptz, updated_at -> Timestamptz, @@ -171,3 +173,17 @@ diesel::joinable!(research_findings -> research_projects (project_id)); diesel::joinable!(research_citations -> research_sources (source_id)); diesel::joinable!(research_collaborators -> research_projects (project_id)); diesel::joinable!(research_exports -> research_projects (project_id)); + +diesel::allow_tables_to_appear_in_same_query!( + kb_documents, + kb_collections, + kb_group_associations, + user_kb_associations, + research_projects, + research_sources, + research_notes, + research_findings, + research_citations, + research_collaborators, + research_exports, +); diff --git a/src/drive/drive_monitor/mod.rs b/src/drive/drive_monitor/mod.rs index 00a9e203..c38e1948 100644 --- a/src/drive/drive_monitor/mod.rs +++ b/src/drive/drive_monitor/mod.rs @@ -565,10 +565,19 @@ impl DriveMonitor { file_states.remove(&path); } } - for (path, mut state) in current_files { + for (path, new_state) in current_files { + let mut state = new_state; if path.ends_with(".bas") { state.indexed = true; } + // Preserve fail_count and last_failed_at for existing files that weren't modified + if let Some(prev_state) = file_states.get(&path) { + if prev_state.etag == state.etag { + // File wasn't modified - preserve fail_count and last_failed_at + state.fail_count = prev_state.fail_count; + state.last_failed_at = prev_state.last_failed_at; + } + } file_states.insert(path, state); } // Save file states to disk in background to avoid blocking