Merge pull request #3015 from aronprins/feature/backups-configuration

feat(backups): gzip compression and tiered retention with UI controls
This commit is contained in:
Dotta
2026-04-10 11:56:12 -05:00
committed by GitHub
15 changed files with 291 additions and 34 deletions

View File

@@ -73,7 +73,7 @@ export async function dbBackupCommand(opts: DbBackupOptions): Promise<void> {
const result = await runDatabaseBackup({
connectionString: connection.value,
backupDir,
retentionDays,
retention: { dailyDays: retentionDays, weeklyWeeks: 4, monthlyMonths: 1 },
filenamePrefix,
});
spinner.stop(`Backup saved: ${formatDatabaseBackupResult(result)}`);

View File

@@ -959,7 +959,7 @@ async function seedWorktreeDatabase(input: {
const backup = await runDatabaseBackup({
connectionString: sourceConnectionString,
backupDir: path.resolve(input.targetPaths.backupDir, "seed"),
retentionDays: 7,
retention: { dailyDays: 7, weeklyWeeks: 4, monthlyMonths: 1 },
filenamePrefix: `${input.instanceId}-seed`,
includeMigrationJournal: true,
excludeTables: seedPlan.excludedTables,

View File

@@ -125,12 +125,12 @@ describeEmbeddedPostgres("runDatabaseBackup", () => {
const result = await runDatabaseBackup({
connectionString: sourceConnectionString,
backupDir,
retentionDays: 7,
retention: { dailyDays: 7, weeklyWeeks: 4, monthlyMonths: 1 },
filenamePrefix: "paperclip-test",
});
expect(result.backupFile).toMatch(/paperclip-test-.*\.sql$/);
expect(result.sizeBytes).toBeGreaterThan(1024 * 1024);
expect(result.backupFile).toMatch(/paperclip-test-.*\.sql\.gz$/);
expect(result.sizeBytes).toBeGreaterThan(0);
expect(fs.existsSync(result.backupFile)).toBe(true);
await runDatabaseRestore({

View File

@@ -1,12 +1,20 @@
import { createReadStream, createWriteStream, existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
import { basename, resolve } from "node:path";
import { createInterface } from "node:readline";
import { pipeline } from "node:stream/promises";
import { createGunzip, createGzip } from "node:zlib";
import postgres from "postgres";
export type BackupRetentionPolicy = {
dailyDays: number;
weeklyWeeks: number;
monthlyMonths: number;
};
export type RunDatabaseBackupOptions = {
connectionString: string;
backupDir: string;
retentionDays: number;
retention: BackupRetentionPolicy;
filenamePrefix?: string;
connectTimeoutSeconds?: number;
includeMigrationJournal?: boolean;
@@ -75,23 +83,91 @@ function timestamp(date: Date = new Date()): string {
return `${date.getFullYear()}${pad(date.getMonth() + 1)}${pad(date.getDate())}-${pad(date.getHours())}${pad(date.getMinutes())}${pad(date.getSeconds())}`;
}
function pruneOldBackups(backupDir: string, retentionDays: number, filenamePrefix: string): number {
/**
* ISO week key for grouping backups by calendar week (ISO 8601).
*/
function isoWeekKey(date: Date): string {
const d = new Date(Date.UTC(date.getFullYear(), date.getMonth(), date.getDate()));
d.setUTCDate(d.getUTCDate() + 4 - (d.getUTCDay() || 7));
const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1));
const weekNo = Math.ceil(((d.getTime() - yearStart.getTime()) / 86400000 + 1) / 7);
return `${d.getUTCFullYear()}-W${String(weekNo).padStart(2, "0")}`;
}
function monthKey(date: Date): string {
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, "0")}`;
}
/**
* Tiered backup pruning:
* - Daily tier: keep ALL backups from the last `dailyDays` days
* - Weekly tier: keep the NEWEST backup per calendar week for `weeklyWeeks` weeks
* - Monthly tier: keep the NEWEST backup per calendar month for `monthlyMonths` months
* - Everything else is deleted
*/
function pruneOldBackups(backupDir: string, retention: BackupRetentionPolicy, filenamePrefix: string): number {
if (!existsSync(backupDir)) return 0;
const safeRetention = Math.max(1, Math.trunc(retentionDays));
const cutoff = Date.now() - safeRetention * 24 * 60 * 60 * 1000;
let pruned = 0;
const now = Date.now();
const dailyCutoff = now - Math.max(1, retention.dailyDays) * 24 * 60 * 60 * 1000;
const weeklyCutoff = now - Math.max(1, retention.weeklyWeeks) * 7 * 24 * 60 * 60 * 1000;
const monthlyCutoff = now - Math.max(1, retention.monthlyMonths) * 30 * 24 * 60 * 60 * 1000;
type BackupEntry = { name: string; fullPath: string; mtimeMs: number };
const entries: BackupEntry[] = [];
for (const name of readdirSync(backupDir)) {
if (!name.startsWith(`${filenamePrefix}-`) || !name.endsWith(".sql")) continue;
if (!name.startsWith(`${filenamePrefix}-`)) continue;
if (!name.endsWith(".sql") && !name.endsWith(".sql.gz")) continue;
const fullPath = resolve(backupDir, name);
const stat = statSync(fullPath);
if (stat.mtimeMs < cutoff) {
unlinkSync(fullPath);
pruned++;
}
entries.push({ name, fullPath, mtimeMs: stat.mtimeMs });
}
return pruned;
// Sort newest first so the first entry per week/month bucket is the one we keep
entries.sort((a, b) => b.mtimeMs - a.mtimeMs);
const keepWeekBuckets = new Set<string>();
const keepMonthBuckets = new Set<string>();
const toDelete: string[] = [];
for (const entry of entries) {
// Daily tier — keep everything within dailyDays
if (entry.mtimeMs >= dailyCutoff) continue;
const date = new Date(entry.mtimeMs);
const week = isoWeekKey(date);
const month = monthKey(date);
// Weekly tier — keep newest per calendar week
if (entry.mtimeMs >= weeklyCutoff) {
if (keepWeekBuckets.has(week)) {
toDelete.push(entry.fullPath);
} else {
keepWeekBuckets.add(week);
}
continue;
}
// Monthly tier — keep newest per calendar month
if (entry.mtimeMs >= monthlyCutoff) {
if (keepMonthBuckets.has(month)) {
toDelete.push(entry.fullPath);
} else {
keepMonthBuckets.add(month);
}
continue;
}
// Beyond all retention tiers — delete
toDelete.push(entry.fullPath);
}
for (const filePath of toDelete) {
unlinkSync(filePath);
}
return toDelete.length;
}
function formatBackupSize(sizeBytes: number): string {
@@ -148,7 +224,9 @@ function tableKey(schemaName: string, tableName: string): string {
}
async function* readRestoreStatements(backupFile: string): AsyncGenerator<string> {
const stream = createReadStream(backupFile, { encoding: "utf8" });
const raw = createReadStream(backupFile);
const stream = backupFile.endsWith(".gz") ? raw.pipe(createGunzip()) : raw;
stream.setEncoding("utf8");
const reader = createInterface({
input: stream,
crlfDelay: Infinity,
@@ -180,6 +258,7 @@ async function* readRestoreStatements(backupFile: string): AsyncGenerator<string
} finally {
reader.close();
stream.destroy();
raw.destroy();
}
}
@@ -281,15 +360,16 @@ export function createBufferedTextFileWriter(filePath: string, maxBufferedBytes
export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise<RunDatabaseBackupResult> {
const filenamePrefix = opts.filenamePrefix ?? "paperclip";
const retentionDays = Math.max(1, Math.trunc(opts.retentionDays));
const retention = opts.retention;
const connectTimeout = Math.max(1, Math.trunc(opts.connectTimeoutSeconds ?? 5));
const includeMigrationJournal = opts.includeMigrationJournal === true;
const excludedTableNames = normalizeTableNameSet(opts.excludeTables);
const nullifiedColumnsByTable = normalizeNullifyColumnMap(opts.nullifyColumns);
const sql = postgres(opts.connectionString, { max: 1, connect_timeout: connectTimeout });
mkdirSync(opts.backupDir, { recursive: true });
const backupFile = resolve(opts.backupDir, `${filenamePrefix}-${timestamp()}.sql`);
const writer = createBufferedTextFileWriter(backupFile);
const sqlFile = resolve(opts.backupDir, `${filenamePrefix}-${timestamp()}.sql`);
const backupFile = `${sqlFile}.gz`;
const writer = createBufferedTextFileWriter(sqlFile);
try {
await sql`SELECT 1`;
@@ -664,8 +744,14 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
await writer.close();
// Compress the SQL file with gzip
const sqlReadStream = createReadStream(sqlFile);
const gzWriteStream = createWriteStream(backupFile);
await pipeline(sqlReadStream, createGzip(), gzWriteStream);
unlinkSync(sqlFile);
const sizeBytes = statSync(backupFile).size;
const prunedCount = pruneOldBackups(opts.backupDir, retentionDays, filenamePrefix);
const prunedCount = pruneOldBackups(opts.backupDir, retention, filenamePrefix);
return {
backupFile,
@@ -674,6 +760,12 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise
};
} catch (error) {
await writer.abort();
if (existsSync(backupFile)) {
try { unlinkSync(backupFile); } catch { /* ignore */ }
}
if (existsSync(sqlFile)) {
try { unlinkSync(sqlFile); } catch { /* ignore */ }
}
throw error;
} finally {
await sql.end();

View File

@@ -85,7 +85,7 @@ function resolveBackupDir(config: PartialConfig | null): string {
}
function resolveRetentionDays(config: PartialConfig | null): number {
return asPositiveInt(config?.database?.backup?.retentionDays) ?? 30;
return asPositiveInt(config?.database?.backup?.retentionDays) ?? 7;
}
async function main() {
@@ -103,7 +103,7 @@ async function main() {
const result = await runDatabaseBackup({
connectionString,
backupDir,
retentionDays,
retention: { dailyDays: retentionDays, weeklyWeeks: 4, monthlyMonths: 1 },
filenamePrefix: "paperclip",
});

View File

@@ -21,6 +21,7 @@ export {
runDatabaseBackup,
runDatabaseRestore,
formatDatabaseBackupResult,
type BackupRetentionPolicy,
type RunDatabaseBackupOptions,
type RunDatabaseBackupResult,
type RunDatabaseRestoreOptions,

View File

@@ -21,7 +21,7 @@ export const llmConfigSchema = z.object({
export const databaseBackupConfigSchema = z.object({
enabled: z.boolean().default(true),
intervalMinutes: z.number().int().min(1).max(7 * 24 * 60).default(60),
retentionDays: z.number().int().min(1).max(3650).default(30),
retentionDays: z.number().int().min(1).max(3650).default(7),
dir: z.string().default("~/.paperclip/instances/default/data/backups"),
});
@@ -33,7 +33,7 @@ export const databaseConfigSchema = z.object({
backup: databaseBackupConfigSchema.default({
enabled: true,
intervalMinutes: 60,
retentionDays: 30,
retentionDays: 7,
dir: "~/.paperclip/instances/default/data/backups",
}),
});

View File

@@ -189,6 +189,7 @@ export type {
InstanceExperimentalSettings,
InstanceGeneralSettings,
InstanceSettings,
BackupRetentionPolicy,
Agent,
AgentAccessState,
AgentChainOfCommandEntry,
@@ -370,6 +371,13 @@ export {
DEFAULT_FEEDBACK_DATA_SHARING_TERMS_VERSION,
} from "./types/feedback.js";
export {
DAILY_RETENTION_PRESETS,
WEEKLY_RETENTION_PRESETS,
MONTHLY_RETENTION_PRESETS,
DEFAULT_BACKUP_RETENTION,
} from "./types/instance.js";
export {
getClosedIsolatedExecutionWorkspaceMessage,
isClosedIsolatedExecutionWorkspace,

View File

@@ -11,7 +11,8 @@ export type {
FeedbackTraceBundleFile,
FeedbackTraceBundle,
} from "./feedback.js";
export type { InstanceExperimentalSettings, InstanceGeneralSettings, InstanceSettings } from "./instance.js";
export type { InstanceExperimentalSettings, InstanceGeneralSettings, InstanceSettings, BackupRetentionPolicy } from "./instance.js";
export { DAILY_RETENTION_PRESETS, WEEKLY_RETENTION_PRESETS, MONTHLY_RETENTION_PRESETS, DEFAULT_BACKUP_RETENTION } from "./instance.js";
export type {
CompanySkillSourceType,
CompanySkillTrustLevel,

View File

@@ -1,9 +1,26 @@
import type { FeedbackDataSharingPreference } from "./feedback.js";
export const DAILY_RETENTION_PRESETS = [3, 7, 14] as const;
export const WEEKLY_RETENTION_PRESETS = [1, 2, 4] as const;
export const MONTHLY_RETENTION_PRESETS = [1, 3, 6] as const;
export interface BackupRetentionPolicy {
dailyDays: (typeof DAILY_RETENTION_PRESETS)[number];
weeklyWeeks: (typeof WEEKLY_RETENTION_PRESETS)[number];
monthlyMonths: (typeof MONTHLY_RETENTION_PRESETS)[number];
}
export const DEFAULT_BACKUP_RETENTION: BackupRetentionPolicy = {
dailyDays: 7,
weeklyWeeks: 4,
monthlyMonths: 1,
};
export interface InstanceGeneralSettings {
censorUsernameInLogs: boolean;
keyboardShortcuts: boolean;
feedbackDataSharingPreference: FeedbackDataSharingPreference;
backupRetention: BackupRetentionPolicy;
}
export interface InstanceExperimentalSettings {

View File

@@ -1,13 +1,33 @@
import { z } from "zod";
import { DEFAULT_FEEDBACK_DATA_SHARING_PREFERENCE } from "../types/feedback.js";
import {
DAILY_RETENTION_PRESETS,
WEEKLY_RETENTION_PRESETS,
MONTHLY_RETENTION_PRESETS,
DEFAULT_BACKUP_RETENTION,
} from "../types/instance.js";
import { feedbackDataSharingPreferenceSchema } from "./feedback.js";
function presetSchema<T extends readonly number[]>(presets: T, label: string) {
return z.number().refine(
(v): v is T[number] => (presets as readonly number[]).includes(v),
{ message: `${label} must be one of: ${presets.join(", ")}` },
);
}
export const backupRetentionPolicySchema = z.object({
dailyDays: presetSchema(DAILY_RETENTION_PRESETS, "dailyDays").default(DEFAULT_BACKUP_RETENTION.dailyDays),
weeklyWeeks: presetSchema(WEEKLY_RETENTION_PRESETS, "weeklyWeeks").default(DEFAULT_BACKUP_RETENTION.weeklyWeeks),
monthlyMonths: presetSchema(MONTHLY_RETENTION_PRESETS, "monthlyMonths").default(DEFAULT_BACKUP_RETENTION.monthlyMonths),
});
export const instanceGeneralSettingsSchema = z.object({
censorUsernameInLogs: z.boolean().default(false),
keyboardShortcuts: z.boolean().default(false),
feedbackDataSharingPreference: feedbackDataSharingPreferenceSchema.default(
DEFAULT_FEEDBACK_DATA_SHARING_PREFERENCE,
),
backupRetention: backupRetentionPolicySchema.default(DEFAULT_BACKUP_RETENTION),
}).strict();
export const patchInstanceGeneralSettingsSchema = instanceGeneralSettingsSchema.partial();

View File

@@ -216,7 +216,7 @@ export function loadConfig(): Config {
1,
Number(process.env.PAPERCLIP_DB_BACKUP_RETENTION_DAYS) ||
fileDatabaseBackup?.retentionDays ||
30,
7,
);
const databaseBackupDir = resolveHomeAwarePath(
process.env.PAPERCLIP_DB_BACKUP_DIR ??

View File

@@ -31,6 +31,7 @@ import { setupLiveEventsWebSocketServer } from "./realtime/live-events-ws.js";
import {
feedbackService,
heartbeatService,
instanceSettingsService,
reconcilePersistedRuntimeServicesOnStartup,
routineService,
} from "./services/index.js";
@@ -621,20 +622,25 @@ export async function startServer(): Promise<StartedServer> {
if (config.databaseBackupEnabled) {
const backupIntervalMs = config.databaseBackupIntervalMinutes * 60 * 1000;
const settingsSvc = instanceSettingsService(db);
let backupInFlight = false;
const runScheduledBackup = async () => {
if (backupInFlight) {
logger.warn("Skipping scheduled database backup because a previous backup is still running");
return;
}
backupInFlight = true;
try {
// Read retention from Instance Settings (DB) so changes take effect without restart
const generalSettings = await settingsSvc.getGeneral();
const retention = generalSettings.backupRetention;
const result = await runDatabaseBackup({
connectionString: activeDatabaseConnectionString,
backupDir: config.databaseBackupDir,
retentionDays: config.databaseBackupRetentionDays,
retention,
filenamePrefix: "paperclip",
});
logger.info(
@@ -643,7 +649,7 @@ export async function startServer(): Promise<StartedServer> {
sizeBytes: result.sizeBytes,
prunedCount: result.prunedCount,
backupDir: config.databaseBackupDir,
retentionDays: config.databaseBackupRetentionDays,
retention,
},
`Automatic database backup complete: ${formatDatabaseBackupResult(result)}`,
);
@@ -653,11 +659,11 @@ export async function startServer(): Promise<StartedServer> {
backupInFlight = false;
}
};
logger.info(
{
intervalMinutes: config.databaseBackupIntervalMinutes,
retentionDays: config.databaseBackupRetentionDays,
retentionSource: "instance-settings-db",
backupDir: config.databaseBackupDir,
},
"Automatic database backups enabled",

View File

@@ -2,6 +2,7 @@ import type { Db } from "@paperclipai/db";
import { companies, instanceSettings } from "@paperclipai/db";
import {
DEFAULT_FEEDBACK_DATA_SHARING_PREFERENCE,
DEFAULT_BACKUP_RETENTION,
instanceGeneralSettingsSchema,
type InstanceGeneralSettings,
instanceExperimentalSettingsSchema,
@@ -22,12 +23,14 @@ function normalizeGeneralSettings(raw: unknown): InstanceGeneralSettings {
keyboardShortcuts: parsed.data.keyboardShortcuts ?? false,
feedbackDataSharingPreference:
parsed.data.feedbackDataSharingPreference ?? DEFAULT_FEEDBACK_DATA_SHARING_PREFERENCE,
backupRetention: parsed.data.backupRetention ?? DEFAULT_BACKUP_RETENTION,
};
}
return {
censorUsernameInLogs: false,
keyboardShortcuts: false,
feedbackDataSharingPreference: DEFAULT_FEEDBACK_DATA_SHARING_PREFERENCE,
backupRetention: DEFAULT_BACKUP_RETENTION,
};
}

View File

@@ -1,6 +1,12 @@
import { useEffect, useState } from "react";
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
import type { PatchInstanceGeneralSettings } from "@paperclipai/shared";
import type { PatchInstanceGeneralSettings, BackupRetentionPolicy } from "@paperclipai/shared";
import {
DAILY_RETENTION_PRESETS,
WEEKLY_RETENTION_PRESETS,
MONTHLY_RETENTION_PRESETS,
DEFAULT_BACKUP_RETENTION,
} from "@paperclipai/shared";
import { LogOut, SlidersHorizontal } from "lucide-react";
import { authApi } from "@/api/auth";
import { instanceSettingsApi } from "@/api/instanceSettings";
@@ -67,6 +73,7 @@ export function InstanceGeneralSettings() {
const censorUsernameInLogs = generalQuery.data?.censorUsernameInLogs === true;
const keyboardShortcuts = generalQuery.data?.keyboardShortcuts === true;
const feedbackDataSharingPreference = generalQuery.data?.feedbackDataSharingPreference ?? "prompt";
const backupRetention: BackupRetentionPolicy = generalQuery.data?.backupRetention ?? DEFAULT_BACKUP_RETENTION;
return (
<div className="max-w-4xl space-y-6">
@@ -123,6 +130,108 @@ export function InstanceGeneralSettings() {
</div>
</section>
<section className="rounded-xl border border-border bg-card p-5">
<div className="space-y-5">
<div className="space-y-1.5">
<h2 className="text-sm font-semibold">Backup retention</h2>
<p className="max-w-2xl text-sm text-muted-foreground">
Configure how long to keep automatic database backups at each tier. Daily backups
are kept in full, then thinned to one per week and one per month. Backups are
compressed with gzip.
</p>
</div>
<div className="space-y-1.5">
<h3 className="text-xs font-medium text-muted-foreground uppercase tracking-wide">Daily</h3>
<div className="flex flex-wrap gap-2">
{DAILY_RETENTION_PRESETS.map((days) => {
const active = backupRetention.dailyDays === days;
return (
<button
key={days}
type="button"
disabled={updateGeneralMutation.isPending}
className={cn(
"rounded-lg border px-3 py-2 text-left transition-colors disabled:cursor-not-allowed disabled:opacity-60",
active
? "border-foreground bg-accent text-foreground"
: "border-border bg-background hover:bg-accent/50",
)}
onClick={() =>
updateGeneralMutation.mutate({
backupRetention: { ...backupRetention, dailyDays: days },
})
}
>
<div className="text-sm font-medium">{days} days</div>
</button>
);
})}
</div>
</div>
<div className="space-y-1.5">
<h3 className="text-xs font-medium text-muted-foreground uppercase tracking-wide">Weekly</h3>
<div className="flex flex-wrap gap-2">
{WEEKLY_RETENTION_PRESETS.map((weeks) => {
const active = backupRetention.weeklyWeeks === weeks;
const label = weeks === 1 ? "1 week" : `${weeks} weeks`;
return (
<button
key={weeks}
type="button"
disabled={updateGeneralMutation.isPending}
className={cn(
"rounded-lg border px-3 py-2 text-left transition-colors disabled:cursor-not-allowed disabled:opacity-60",
active
? "border-foreground bg-accent text-foreground"
: "border-border bg-background hover:bg-accent/50",
)}
onClick={() =>
updateGeneralMutation.mutate({
backupRetention: { ...backupRetention, weeklyWeeks: weeks },
})
}
>
<div className="text-sm font-medium">{label}</div>
</button>
);
})}
</div>
</div>
<div className="space-y-1.5">
<h3 className="text-xs font-medium text-muted-foreground uppercase tracking-wide">Monthly</h3>
<div className="flex flex-wrap gap-2">
{MONTHLY_RETENTION_PRESETS.map((months) => {
const active = backupRetention.monthlyMonths === months;
const label = months === 1 ? "1 month" : `${months} months`;
return (
<button
key={months}
type="button"
disabled={updateGeneralMutation.isPending}
className={cn(
"rounded-lg border px-3 py-2 text-left transition-colors disabled:cursor-not-allowed disabled:opacity-60",
active
? "border-foreground bg-accent text-foreground"
: "border-border bg-background hover:bg-accent/50",
)}
onClick={() =>
updateGeneralMutation.mutate({
backupRetention: { ...backupRetention, monthlyMonths: months },
})
}
>
<div className="text-sm font-medium">{label}</div>
</button>
);
})}
</div>
</div>
</div>
</section>
<section className="rounded-xl border border-border bg-card p-5">
<div className="space-y-4">
<div className="space-y-1.5">