fix config persistence, PowerShell bypass, WS 404 race, feishu panic, Revolt self-hosted

This commit is contained in:
jaberjaber23
2026-04-10 20:51:27 +03:00
parent 0bccba10cf
commit 9323edccf4
8 changed files with 358 additions and 15 deletions

View File

@@ -1470,8 +1470,15 @@ pub async fn start_channel_bridge_with_config(
// Revolt
if let Some(ref rv_config) = config.revolt {
if let Some(token) = read_token(&rv_config.bot_token_env, "Revolt") {
let adapter = Arc::new(RevoltAdapter::new(token));
adapters.push((adapter, rv_config.default_agent.clone()));
let mut adapter = RevoltAdapter::with_urls(
token,
rv_config.api_url.clone(),
rv_config.ws_url.clone(),
);
if !rv_config.allowed_channels.is_empty() {
adapter.set_allowed_channels(rv_config.allowed_channels.clone());
}
adapters.push((Arc::new(adapter), rv_config.default_agent.clone()));
}
}

View File

@@ -5847,6 +5847,11 @@ pub async fn patch_agent(
// Persist updated entry to SQLite
if let Some(entry) = state.kernel.registry.get(agent_id) {
let _ = state.kernel.memory.save_agent(&entry);
// Write updated manifest to agent.toml on disk so disk doesn't override
// dashboard changes on next boot (#996, #1018).
state.kernel.persist_manifest_to_disk(agent_id);
(
StatusCode::OK,
Json(
@@ -8992,6 +8997,10 @@ pub async fn patch_agent_config(
}
}
// Write updated manifest to agent.toml on disk so disk doesn't override
// dashboard changes on next boot (#996, #1018).
state.kernel.persist_manifest_to_disk(agent_id);
(
StatusCode::OK,
Json(serde_json::json!({"status": "ok", "agent_id": id})),

View File

@@ -197,9 +197,29 @@ pub async fn agent_ws(
}
};
// Verify agent exists
if state.kernel.registry.get(agent_id).is_none() {
return axum::http::StatusCode::NOT_FOUND.into_response();
// Verify agent exists.
// Retry up to 5 times with 200ms backoff to handle a timing race where
// the client connects before the agent finishes registering (#804).
{
let mut found = state.kernel.registry.get(agent_id).is_some();
if !found {
for attempt in 1..=4 {
debug!(
agent_id = %id,
attempt,
"Agent not found yet, retrying in 200ms"
);
tokio::time::sleep(Duration::from_millis(200)).await;
if state.kernel.registry.get(agent_id).is_some() {
found = true;
break;
}
}
}
if !found {
warn!(agent_id = %id, "Agent not found after 5 lookup attempts");
return axum::http::StatusCode::NOT_FOUND.into_response();
}
}
let id_str = id.clone();

View File

@@ -160,7 +160,14 @@ impl DedupCache {
/// Returns `true` if the ID was already seen (duplicate).
fn check_and_insert(&self, id: &str) -> bool {
let mut ids = self.ids.lock().unwrap();
let mut ids = match self.ids.lock() {
Ok(guard) => guard,
Err(poisoned) => {
// Recover from a poisoned mutex rather than panicking.
warn!("Dedup cache mutex was poisoned, recovering");
poisoned.into_inner()
}
};
if ids.iter().any(|s| s == id) {
return true;
}
@@ -1030,7 +1037,10 @@ fn combine_payload(
*entry = vec![Vec::new(); sum];
}
entry[seq] = payload;
match entry.get_mut(seq) {
Some(slot) => *slot = payload,
None => return None,
}
if entry.iter().any(|part| part.is_empty()) {
return None;
@@ -1105,7 +1115,10 @@ fn extract_text_from_post(content: &serde_json::Value) -> Option<String> {
let mut text_parts = Vec::new();
for paragraph in paragraphs {
let elements = paragraph.as_array()?;
let elements = match paragraph.as_array() {
Some(elems) => elems,
None => continue,
};
for element in elements {
let tag = element["tag"].as_str().unwrap_or("");
match tag {
@@ -1165,8 +1178,10 @@ fn should_respond_in_group(text: &str, mentions: &serde_json::Value, bot_names:
/// Strip @mention placeholders from text (`@_user_N` format).
fn strip_mention_placeholders(text: &str) -> String {
let re = regex_lite::Regex::new(r"@_user_\d+\s*").unwrap();
re.replace_all(text, "").trim().to_string()
match regex_lite::Regex::new(r"@_user_\d+\s*") {
Ok(re) => re.replace_all(text, "").trim().to_string(),
Err(_) => text.trim().to_string(),
}
}
/// Decrypt an AES-256-CBC encrypted event payload.

View File

@@ -94,6 +94,11 @@ impl RevoltAdapter {
adapter
}
/// Set allowed channel IDs (empty = all channels the bot is in).
pub fn set_allowed_channels(&mut self, channels: Vec<String>) {
self.allowed_channels = channels;
}
/// Add the bot token header to a request builder.
fn auth_header(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
builder.header("x-bot-token", self.bot_token.as_str())

View File

@@ -2986,6 +2986,36 @@ impl OpenFangKernel {
);
}
/// Persist an agent's manifest to its `agent.toml` on disk so that
/// dashboard-driven config changes (model, provider, fallback, etc.)
/// survive a restart. The on-disk file lives at
/// `<home_dir>/agents/<name>/agent.toml`.
///
/// This is best-effort: a failure to write is logged but does not
/// propagate as an error — the authoritative copy lives in SQLite.
pub fn persist_manifest_to_disk(&self, agent_id: AgentId) {
if let Some(entry) = self.registry.get(agent_id) {
let dir = self.config.home_dir.join("agents").join(&entry.name);
let toml_path = dir.join("agent.toml");
match toml::to_string_pretty(&entry.manifest) {
Ok(toml_str) => {
if let Err(e) = std::fs::create_dir_all(&dir) {
warn!(agent = %entry.name, "Failed to create agent dir for manifest persist: {e}");
return;
}
if let Err(e) = std::fs::write(&toml_path, toml_str) {
warn!(agent = %entry.name, "Failed to persist manifest to disk: {e}");
} else {
debug!(agent = %entry.name, path = %toml_path.display(), "Persisted manifest to disk");
}
}
Err(e) => {
warn!(agent = %entry.name, "Failed to serialize manifest to TOML: {e}");
}
}
}
}
/// Switch an agent's model.
///
/// When `explicit_provider` is `Some`, that provider name is used as-is
@@ -3072,6 +3102,9 @@ impl OpenFangKernel {
let _ = self.memory.save_agent(&entry);
}
// Write updated manifest to agent.toml so changes survive restart (#996, #1018)
self.persist_manifest_to_disk(agent_id);
// Clear canonical session to prevent memory poisoning from old model's responses
let _ = self.memory.delete_canonical_session(agent_id);
debug!(agent_id = %agent_id, "Cleared canonical session after model switch");

View File

@@ -164,6 +164,102 @@ fn extract_base_command(cmd: &str) -> &str {
.unwrap_or(first_word)
}
/// Known shell wrappers that can execute inline scripts via flags.
const SHELL_WRAPPERS: &[&str] = &["powershell", "pwsh", "cmd", "bash", "sh", "zsh"];
/// Known flags that pass inline scripts to shell wrappers.
/// Each entry is (wrapper_names, flag).
const SHELL_INLINE_FLAGS: &[(&[&str], &str)] = &[
(&["powershell", "pwsh"], "-Command"),
(&["powershell", "pwsh"], "-command"),
(&["powershell", "pwsh"], "-c"),
(&["cmd"], "/c"),
(&["cmd"], "/C"),
(&["bash", "sh", "zsh"], "-c"),
(&["bash", "sh", "zsh"], "--command"),
];
/// If the base command is a known shell wrapper, extract any inline script
/// passed via -Command / -c / /c flags and return the commands within it.
///
/// Returns the list of base command names found inside the inline script,
/// or an empty vec if the command is not a shell wrapper or has no inline flag.
fn extract_shell_wrapper_commands(command: &str) -> Vec<String> {
let trimmed = command.trim();
let base = extract_base_command(trimmed);
// Check if the base command is a known shell wrapper (case-insensitive)
let base_lower = base.to_lowercase();
// Also strip .exe suffix for Windows
let base_normalized = base_lower.strip_suffix(".exe").unwrap_or(&base_lower);
if !SHELL_WRAPPERS.iter().any(|w| *w == base_normalized) {
return Vec::new();
}
// Find the inline flag and extract everything after it
for (wrappers, flag) in SHELL_INLINE_FLAGS {
if !wrappers.iter().any(|w| *w == base_normalized) {
continue;
}
// Search for the flag in the command args (case-insensitive for PowerShell)
let rest = trimmed.split_whitespace().skip(1); // skip the base command
let args: Vec<&str> = rest.collect();
for (i, arg) in args.iter().enumerate() {
if arg.eq_ignore_ascii_case(flag) {
// Everything after this flag is the inline script
if i + 1 < args.len() {
let script = args[i + 1..].join(" ");
// Strip surrounding quotes if present
let script = script.trim();
let script = if (script.starts_with('"') && script.ends_with('"'))
|| (script.starts_with('\'') && script.ends_with('\''))
{
&script[1..script.len() - 1]
} else {
script
};
// Extract commands from the inline script
// For PowerShell, commands can be separated by `;`
// For POSIX shells, by `;`, `&&`, `||`, `|`
return extract_inner_script_commands(script);
}
}
}
}
Vec::new()
}
/// Extract base command names from an inline script string.
/// Splits on `;`, `&&`, `||`, `|` and returns the base command of each segment.
fn extract_inner_script_commands(script: &str) -> Vec<String> {
let mut commands = Vec::new();
let mut rest = script;
while !rest.is_empty() {
let separators: &[&str] = &["&&", "||", "|", ";"];
let mut earliest_pos = rest.len();
let mut earliest_len = 0;
for sep in separators {
if let Some(pos) = rest.find(sep) {
if pos < earliest_pos {
earliest_pos = pos;
earliest_len = sep.len();
}
}
}
let segment = &rest[..earliest_pos];
let base = extract_base_command(segment);
if !base.is_empty() {
commands.push(base.to_string());
}
if earliest_pos + earliest_len >= rest.len() {
break;
}
rest = &rest[earliest_pos + earliest_len..];
}
commands
}
/// Extract all commands from a shell command string.
/// Handles pipes (`|`), semicolons (`;`), `&&`, and `||`.
fn extract_all_commands(command: &str) -> Vec<&str> {
@@ -215,11 +311,22 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result<
ExecSecurityMode::Allowlist => {
// SECURITY: Check for shell metacharacters BEFORE base-command extraction.
// These can smuggle commands inside arguments of allowed binaries.
if let Some(reason) = contains_shell_metacharacters(command) {
return Err(format!(
"Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode."
));
//
// However, we must skip this check for commands wrapped in a known
// shell wrapper (e.g. `powershell -Command "..."`) because the
// inline script naturally contains metacharacters (quotes, semicolons).
// Those inner commands are validated separately below.
let inner_commands = extract_shell_wrapper_commands(command);
let is_shell_wrapper = !inner_commands.is_empty();
if !is_shell_wrapper {
if let Some(reason) = contains_shell_metacharacters(command) {
return Err(format!(
"Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode."
));
}
}
let base_commands = extract_all_commands(command);
for base in &base_commands {
// Check safe_bins first
@@ -235,6 +342,28 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result<
base
));
}
// SECURITY (#794): If the outer command is a shell wrapper
// (powershell, cmd, bash, etc.), also validate all commands
// found inside the inline script. This prevents bypassing the
// allowlist by wrapping disallowed commands inside an allowed
// shell.
if is_shell_wrapper {
for inner_cmd in &inner_commands {
if policy.safe_bins.iter().any(|sb| sb == inner_cmd) {
continue;
}
if policy.allowed_commands.iter().any(|ac| ac == inner_cmd) {
continue;
}
return Err(format!(
"Command '{}' (inside shell wrapper) is not in the exec allowlist. \
Add it to exec_policy.allowed_commands or exec_policy.safe_bins.",
inner_cmd
));
}
}
Ok(())
}
}
@@ -928,6 +1057,124 @@ mod tests {
);
}
// ── Shell wrapper bypass tests (issue #794) ────────────────────────
#[test]
fn test_issue_794_powershell_command_bypass_blocked() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["powershell".to_string()],
..ExecPolicy::default()
};
// "Remove-Item" is NOT in allowed_commands — must be blocked
let result = validate_command_allowlist(
r#"powershell -Command "Remove-Item -Recurse -Force C:\temp""#,
&policy,
);
assert!(
result.is_err(),
"Remove-Item inside powershell -Command must be blocked (issue #794)"
);
let err = result.unwrap_err();
assert!(
err.contains("Remove-Item"),
"Error should name the blocked inner command, got: {err}"
);
}
#[test]
fn test_powershell_command_allowed_when_inner_listed() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["powershell".to_string(), "Get-Process".to_string()],
..ExecPolicy::default()
};
let result = validate_command_allowlist(
r#"powershell -Command "Get-Process""#,
&policy,
);
assert!(
result.is_ok(),
"Get-Process should be allowed when in allowed_commands"
);
}
#[test]
fn test_cmd_c_bypass_blocked() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["cmd".to_string()],
..ExecPolicy::default()
};
let result = validate_command_allowlist(
r#"cmd /C "del /F /Q C:\temp\secret.txt""#,
&policy,
);
assert!(
result.is_err(),
"del inside cmd /C must be blocked when not in allowlist"
);
}
#[test]
fn test_bash_c_bypass_blocked() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["bash".to_string()],
..ExecPolicy::default()
};
let result = validate_command_allowlist(
r#"bash -c "curl https://evil.com""#,
&policy,
);
assert!(
result.is_err(),
"curl inside bash -c must be blocked when not in allowlist"
);
}
#[test]
fn test_bash_c_allowed_when_inner_listed() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["bash".to_string()],
..ExecPolicy::default()
};
// "echo" is in safe_bins by default
let result = validate_command_allowlist(
r#"bash -c "echo hello""#,
&policy,
);
assert!(
result.is_ok(),
"echo inside bash -c should be allowed (echo is in safe_bins)"
);
}
#[test]
fn test_pwsh_command_bypass_blocked() {
let policy = ExecPolicy {
mode: ExecSecurityMode::Allowlist,
allowed_commands: vec!["pwsh".to_string()],
..ExecPolicy::default()
};
let result = validate_command_allowlist(
r#"pwsh -Command "Invoke-WebRequest https://evil.com""#,
&policy,
);
assert!(
result.is_err(),
"Invoke-WebRequest inside pwsh must be blocked"
);
}
#[test]
fn test_shell_wrapper_extract_no_flag() {
// When powershell is called without -Command, no inner commands are extracted
let cmds = extract_shell_wrapper_commands("powershell script.ps1");
assert!(cmds.is_empty());
}
#[test]
fn test_extract_all_commands_cjk_separators() {
// Ensure extract_all_commands handles CJK content between separators

View File

@@ -2618,8 +2618,13 @@ impl Default for MqttConfig {
pub struct RevoltConfig {
/// Env var name holding the bot token.
pub bot_token_env: String,
/// Revolt API URL.
/// Revolt API URL (set to your self-hosted instance URL if not using revolt.chat).
pub api_url: String,
/// Revolt WebSocket URL (set to your self-hosted instance WS URL if not using revolt.chat).
pub ws_url: String,
/// Restrict to specific channel IDs (empty = all channels the bot is in).
#[serde(default)]
pub allowed_channels: Vec<String>,
/// Default agent name to route messages to.
pub default_agent: Option<String>,
/// Per-channel behavior overrides.
@@ -2632,6 +2637,8 @@ impl Default for RevoltConfig {
Self {
bot_token_env: "REVOLT_BOT_TOKEN".to_string(),
api_url: "https://api.revolt.chat".to_string(),
ws_url: "wss://ws.revolt.chat".to_string(),
allowed_channels: Vec::new(),
default_agent: None,
overrides: ChannelOverrides::default(),
}