fix: CI failures - shutdown hang, bottest compile errors, botui deploy
- Add shutdown tracing and 15s forced exit to prevent SIGTERM hangs - Fix E0583: remove self-referential mod declarations in bottest integration files - Fix E0599: correct .status() call on Result in performance.rs - Fix botui CI deploy: use systemctl stop/start instead of pkill+nohup - Update PROD.md with DB-driven CI log retrieval method
This commit is contained in:
parent
1b25559a1b
commit
eea9b24ef0
9 changed files with 111 additions and 33 deletions
|
|
@ -23,12 +23,12 @@ jobs:
|
|||
run: |
|
||||
cd /opt/gbo/work/generalbots
|
||||
CARGO_BUILD_JOBS=4 cargo build -p botui --bin botui
|
||||
- name: Deploy
|
||||
run: |
|
||||
BINARY=/opt/gbo/work/target/debug/botui
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "pkill -x botui || true"
|
||||
sleep 2
|
||||
scp -o StrictHostKeyChecking=no "$BINARY" $SYSTEM_HOST:/opt/gbo/bin/botui
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "cd /opt/gbo/bin && RUST_LOG=info nohup ./botui >> /opt/gbo/logs/stdout.log 2>> /opt/gbo/logs/stderr.log &"
|
||||
sleep 3
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "pgrep -x botui && echo 'BotUI Deployed' || echo 'Failed'"
|
||||
- name: Deploy
|
||||
run: |
|
||||
BINARY=/opt/gbo/work/target/debug/botui
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "sudo systemctl stop botui"
|
||||
sleep 2
|
||||
scp -o StrictHostKeyChecking=no "$BINARY" $SYSTEM_HOST:/opt/gbo/bin/botui
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "sudo systemctl start botui"
|
||||
sleep 3
|
||||
ssh -o StrictHostKeyChecking=no $SYSTEM_HOST "pgrep -x botui && echo 'BotUI Deployed' || echo 'Failed'"
|
||||
|
|
|
|||
82
PROD.md
82
PROD.md
|
|
@ -385,15 +385,85 @@ curl -X DELETE "http://<directory-ip>:8080/v2/users/<user-id>" \
|
|||
| List users | `POST /v2/users` |
|
||||
| Update password | `POST /v2/users/{id}/password` |
|
||||
|
||||
# /tmp permission denied for build.log
|
||||
sudo incus exec alm-ci -- chmod 1777 /tmp
|
||||
sudo incus exec alm-ci -- touch /tmp/build.log && chmod 666 /tmp/build.log
|
||||
### CI/CD Log Retrieval from Database (PREFERRED METHOD)
|
||||
|
||||
# Clean old CI runs (keep recent)
|
||||
sudo incus exec tables -- bash -c 'export PGPASSWORD=<postgres-password>; psql -h localhost -U postgres -d PROD-ALM -c "DELETE FROM action_run WHERE id < <RECENT_ID>;"'
|
||||
sudo incus exec tables -- bash -c 'export PGPASSWORD=<postgres-password>; psql -h localhost -U postgres -d PROD-ALM -c "DELETE FROM action_run_job WHERE run_id < <RECENT_ID>;"'
|
||||
The most reliable way to get CI build logs — including compiler errors — is from the Forgejo ALM database and compressed log files. The runner logs (`/opt/gbo/logs/forgejo-runner.log`) show live activity but scroll away quickly. The database retains everything.
|
||||
|
||||
**Status codes:** 0=pending, 1=success, 2=failure, 3=cancelled, 6=running
|
||||
|
||||
**Step 1 — List recent runs with workflow name and status:**
|
||||
```sql
|
||||
-- Connect to ALM database
|
||||
sudo incus exec tables -- psql -h localhost -U postgres -d PROD-ALM
|
||||
|
||||
SELECT ar.id, ar.title, ar.workflow_id, ar.status,
|
||||
to_timestamp(ar.created) AS created_at
|
||||
FROM action_run ar
|
||||
ORDER BY ar.id DESC LIMIT 10;
|
||||
```
|
||||
|
||||
**Step 2 — Get job/task IDs for a failed run:**
|
||||
```sql
|
||||
SELECT arj.id AS job_id, arj.name, arj.status, arj.task_id
|
||||
FROM action_run_job arj
|
||||
WHERE arj.run_id = <FAILED_RUN_ID>;
|
||||
```
|
||||
|
||||
**Step 3 — Get step-level status (which step failed):**
|
||||
```sql
|
||||
SELECT ats.name, ats.status, ats.log_index, ats.log_length
|
||||
FROM action_task_step ats
|
||||
WHERE ats.task_id = <TASK_ID>
|
||||
ORDER BY ats.index;
|
||||
```
|
||||
|
||||
**Step 4 — Read the full build log (contains compiler errors):**
|
||||
```bash
|
||||
# 1. Get the log filename from action_task
|
||||
sudo incus exec tables -- psql -h localhost -U postgres -d PROD-ALM \
|
||||
-c "SELECT log_filename FROM action_task WHERE id = <TASK_ID>;"
|
||||
|
||||
# 2. Pull and decompress the log from alm container
|
||||
# Log files are zstd-compressed at: /opt/gbo/data/data/actions_log/<repo-path>/<task_id>.log.zst
|
||||
sudo incus file pull alm/opt/gbo/data/data/actions_log/<LOG_FILENAME> /tmp/ci-log.log.zst
|
||||
zstd -d /tmp/ci-log.log.zst -o /tmp/ci-log.log
|
||||
cat /tmp/ci-log.log
|
||||
```
|
||||
|
||||
**One-liner to read latest failed run log:**
|
||||
```bash
|
||||
TASK_ID=$(sudo incus exec tables -- psql -h localhost -U postgres -d PROD-ALM -t -c \
|
||||
"SELECT at.id FROM action_task at JOIN action_run_job arj ON at.job_id = arj.id \
|
||||
JOIN action_run ar ON arj.run_id = ar.id \
|
||||
WHERE ar.status = 2 ORDER BY at.id DESC LIMIT 1;" | tr -d ' ')
|
||||
LOG_FILE=$(sudo incus exec tables -- psql -h localhost -U postgres -d PROD-ALM -t -c \
|
||||
"SELECT log_filename FROM action_task WHERE id = $TASK_ID;" | tr -d ' ')
|
||||
sudo incus file pull "alm/opt/gbo/data/data/actions_log/$LOG_FILE" /tmp/ci-log.log.zst
|
||||
zstd -d /tmp/ci-log.log.zst -o /tmp/ci-log.log 2>/dev/null && cat /tmp/ci-log.log
|
||||
```
|
||||
|
||||
**Watch CI in real-time (supplementary):**
|
||||
```bash
|
||||
# Tail runner logs (live but ephemeral)
|
||||
sudo incus exec alm-ci -- tail -f /opt/gbo/logs/forgejo-runner.log
|
||||
|
||||
# Watch for new runs
|
||||
sudo incus exec tables -- psql -h localhost -U postgres -d PROD-ALM \
|
||||
-c "SELECT id, title, workflow_id, status FROM action_run ORDER BY id DESC LIMIT 5;"
|
||||
```
|
||||
|
||||
**Verify binary was updated after deploy:**
|
||||
```bash
|
||||
sudo incus exec system -- stat -c '%y' /opt/gbo/bin/botserver
|
||||
sudo incus exec system -- systemctl status botserver --no-pager
|
||||
curl -sf https://<system-domain>/api/health && echo "OK" || echo "FAILED"
|
||||
```
|
||||
|
||||
**Understand build timing:**
|
||||
- **Rust compilation**: 2-5 minutes (cold build), 30-60 seconds (incremental)
|
||||
- **Deploy step**: ~5 seconds
|
||||
- **Total CI time**: 2-6 minutes depending on cache
|
||||
|
||||
**Watch CI in real-time:**
|
||||
```bash
|
||||
# Tail runner logs
|
||||
|
|
|
|||
|
|
@ -626,8 +626,9 @@ pub async fn run_axum_server(
|
|||
|
||||
tokio::spawn(async move {
|
||||
shutdown_signal().await;
|
||||
info!("Shutting down HTTPS server...");
|
||||
info!("Shutting down HTTPS server - draining active connections (10s timeout)...");
|
||||
handle_clone.graceful_shutdown(Some(std::time::Duration::from_secs(10)));
|
||||
info!("HTTPS graceful shutdown initiated, waiting for connections to drain...");
|
||||
});
|
||||
|
||||
axum_server::bind_rustls(addr, tls_config)
|
||||
|
|
@ -656,9 +657,14 @@ pub async fn run_axum_server(
|
|||
}
|
||||
};
|
||||
info!("HTTP server listening on {}", addr);
|
||||
axum::serve(listener, app.into_make_service())
|
||||
info!("Server ready - shutdown via SIGINT (Ctrl+C) or SIGTERM (systemctl stop)");
|
||||
let result = axum::serve(listener, app.into_make_service())
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
.map_err(std::io::Error::other)
|
||||
.await;
|
||||
match &result {
|
||||
Ok(()) => info!("HTTP server shut down gracefully"),
|
||||
Err(e) => error!("HTTP server shutdown with error: {}", e),
|
||||
}
|
||||
result.map_err(std::io::Error::other)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//! Shutdown signal handling
|
||||
|
||||
use log::{error, info};
|
||||
use log::{error, info, warn};
|
||||
|
||||
pub fn print_shutdown_message() {
|
||||
println!();
|
||||
|
|
@ -9,6 +9,8 @@ pub fn print_shutdown_message() {
|
|||
}
|
||||
|
||||
pub async fn shutdown_signal() {
|
||||
info!("Shutdown signal handler installed, waiting for SIGINT or SIGTERM...");
|
||||
|
||||
let ctrl_c = async {
|
||||
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||
error!("Failed to install Ctrl+C handler: {}", e);
|
||||
|
|
@ -19,6 +21,7 @@ pub async fn shutdown_signal() {
|
|||
let terminate = async {
|
||||
match tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) {
|
||||
Ok(mut signal) => {
|
||||
info!("SIGTERM handler installed successfully");
|
||||
signal.recv().await;
|
||||
}
|
||||
Err(e) => {
|
||||
|
|
@ -32,12 +35,21 @@ pub async fn shutdown_signal() {
|
|||
|
||||
tokio::select! {
|
||||
_ = ctrl_c => {
|
||||
info!("Received Ctrl+C, initiating graceful shutdown...");
|
||||
info!("Received SIGINT (Ctrl+C), initiating graceful shutdown...");
|
||||
}
|
||||
_ = terminate => {
|
||||
info!("Received SIGTERM, initiating graceful shutdown...");
|
||||
info!("Received SIGTERM (systemctl stop), initiating graceful shutdown...");
|
||||
}
|
||||
}
|
||||
|
||||
info!("Shutdown signal received - server will stop accepting new connections");
|
||||
warn!("Graceful shutdown timeout is 10s for HTTPS, after which process will exit");
|
||||
|
||||
print_shutdown_message();
|
||||
|
||||
tokio::spawn(async {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(15)).await;
|
||||
warn!("Graceful shutdown exceeded 15s - forcing process exit to prevent hang");
|
||||
std::process::exit(0);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
mod accessibility;
|
||||
|
||||
use bottest::prelude::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
mod compliance;
|
||||
|
||||
use bottest::prelude::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
mod internationalization;
|
||||
|
||||
use bottest::prelude::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
mod performance;
|
||||
|
||||
use bottest::prelude::*;
|
||||
use reqwest::Client;
|
||||
use std::time::{Duration, Instant};
|
||||
|
|
@ -71,7 +69,7 @@ async fn test_concurrent_requests_handled() {
|
|||
|
||||
let successes = results
|
||||
.iter()
|
||||
.filter(|r| r.as_ref().map(|resp| resp.status().is_success()).unwrap_or(false))
|
||||
.filter(|r| r.as_ref().is_ok_and(|resp| resp.status().is_success()))
|
||||
.count();
|
||||
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
mod security;
|
||||
|
||||
use bottest::prelude::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue