diff --git a/crates/openfang-api/src/channel_bridge.rs b/crates/openfang-api/src/channel_bridge.rs index 3ad3a8d3..325c5e63 100644 --- a/crates/openfang-api/src/channel_bridge.rs +++ b/crates/openfang-api/src/channel_bridge.rs @@ -1470,8 +1470,15 @@ pub async fn start_channel_bridge_with_config( // Revolt if let Some(ref rv_config) = config.revolt { if let Some(token) = read_token(&rv_config.bot_token_env, "Revolt") { - let adapter = Arc::new(RevoltAdapter::new(token)); - adapters.push((adapter, rv_config.default_agent.clone())); + let mut adapter = RevoltAdapter::with_urls( + token, + rv_config.api_url.clone(), + rv_config.ws_url.clone(), + ); + if !rv_config.allowed_channels.is_empty() { + adapter.set_allowed_channels(rv_config.allowed_channels.clone()); + } + adapters.push((Arc::new(adapter), rv_config.default_agent.clone())); } } diff --git a/crates/openfang-api/src/routes.rs b/crates/openfang-api/src/routes.rs index 98d9f129..6876bbf2 100644 --- a/crates/openfang-api/src/routes.rs +++ b/crates/openfang-api/src/routes.rs @@ -5847,6 +5847,11 @@ pub async fn patch_agent( // Persist updated entry to SQLite if let Some(entry) = state.kernel.registry.get(agent_id) { let _ = state.kernel.memory.save_agent(&entry); + + // Write updated manifest to agent.toml on disk so disk doesn't override + // dashboard changes on next boot (#996, #1018). + state.kernel.persist_manifest_to_disk(agent_id); + ( StatusCode::OK, Json( @@ -8992,6 +8997,10 @@ pub async fn patch_agent_config( } } + // Write updated manifest to agent.toml on disk so disk doesn't override + // dashboard changes on next boot (#996, #1018). + state.kernel.persist_manifest_to_disk(agent_id); + ( StatusCode::OK, Json(serde_json::json!({"status": "ok", "agent_id": id})), diff --git a/crates/openfang-api/src/ws.rs b/crates/openfang-api/src/ws.rs index 095d2411..9f776ba8 100644 --- a/crates/openfang-api/src/ws.rs +++ b/crates/openfang-api/src/ws.rs @@ -197,9 +197,29 @@ pub async fn agent_ws( } }; - // Verify agent exists - if state.kernel.registry.get(agent_id).is_none() { - return axum::http::StatusCode::NOT_FOUND.into_response(); + // Verify agent exists. + // Retry up to 5 times with 200ms backoff to handle a timing race where + // the client connects before the agent finishes registering (#804). + { + let mut found = state.kernel.registry.get(agent_id).is_some(); + if !found { + for attempt in 1..=4 { + debug!( + agent_id = %id, + attempt, + "Agent not found yet, retrying in 200ms" + ); + tokio::time::sleep(Duration::from_millis(200)).await; + if state.kernel.registry.get(agent_id).is_some() { + found = true; + break; + } + } + } + if !found { + warn!(agent_id = %id, "Agent not found after 5 lookup attempts"); + return axum::http::StatusCode::NOT_FOUND.into_response(); + } } let id_str = id.clone(); diff --git a/crates/openfang-channels/src/feishu.rs b/crates/openfang-channels/src/feishu.rs index 4264da69..c8aaeec9 100644 --- a/crates/openfang-channels/src/feishu.rs +++ b/crates/openfang-channels/src/feishu.rs @@ -160,7 +160,14 @@ impl DedupCache { /// Returns `true` if the ID was already seen (duplicate). fn check_and_insert(&self, id: &str) -> bool { - let mut ids = self.ids.lock().unwrap(); + let mut ids = match self.ids.lock() { + Ok(guard) => guard, + Err(poisoned) => { + // Recover from a poisoned mutex rather than panicking. + warn!("Dedup cache mutex was poisoned, recovering"); + poisoned.into_inner() + } + }; if ids.iter().any(|s| s == id) { return true; } @@ -1030,7 +1037,10 @@ fn combine_payload( *entry = vec![Vec::new(); sum]; } - entry[seq] = payload; + match entry.get_mut(seq) { + Some(slot) => *slot = payload, + None => return None, + } if entry.iter().any(|part| part.is_empty()) { return None; @@ -1105,7 +1115,10 @@ fn extract_text_from_post(content: &serde_json::Value) -> Option { let mut text_parts = Vec::new(); for paragraph in paragraphs { - let elements = paragraph.as_array()?; + let elements = match paragraph.as_array() { + Some(elems) => elems, + None => continue, + }; for element in elements { let tag = element["tag"].as_str().unwrap_or(""); match tag { @@ -1165,8 +1178,10 @@ fn should_respond_in_group(text: &str, mentions: &serde_json::Value, bot_names: /// Strip @mention placeholders from text (`@_user_N` format). fn strip_mention_placeholders(text: &str) -> String { - let re = regex_lite::Regex::new(r"@_user_\d+\s*").unwrap(); - re.replace_all(text, "").trim().to_string() + match regex_lite::Regex::new(r"@_user_\d+\s*") { + Ok(re) => re.replace_all(text, "").trim().to_string(), + Err(_) => text.trim().to_string(), + } } /// Decrypt an AES-256-CBC encrypted event payload. diff --git a/crates/openfang-channels/src/revolt.rs b/crates/openfang-channels/src/revolt.rs index 59321db0..d2b7202f 100644 --- a/crates/openfang-channels/src/revolt.rs +++ b/crates/openfang-channels/src/revolt.rs @@ -94,6 +94,11 @@ impl RevoltAdapter { adapter } + /// Set allowed channel IDs (empty = all channels the bot is in). + pub fn set_allowed_channels(&mut self, channels: Vec) { + self.allowed_channels = channels; + } + /// Add the bot token header to a request builder. fn auth_header(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder { builder.header("x-bot-token", self.bot_token.as_str()) diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index 6cb84ea5..599541ee 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -2986,6 +2986,36 @@ impl OpenFangKernel { ); } + /// Persist an agent's manifest to its `agent.toml` on disk so that + /// dashboard-driven config changes (model, provider, fallback, etc.) + /// survive a restart. The on-disk file lives at + /// `/agents//agent.toml`. + /// + /// This is best-effort: a failure to write is logged but does not + /// propagate as an error — the authoritative copy lives in SQLite. + pub fn persist_manifest_to_disk(&self, agent_id: AgentId) { + if let Some(entry) = self.registry.get(agent_id) { + let dir = self.config.home_dir.join("agents").join(&entry.name); + let toml_path = dir.join("agent.toml"); + match toml::to_string_pretty(&entry.manifest) { + Ok(toml_str) => { + if let Err(e) = std::fs::create_dir_all(&dir) { + warn!(agent = %entry.name, "Failed to create agent dir for manifest persist: {e}"); + return; + } + if let Err(e) = std::fs::write(&toml_path, toml_str) { + warn!(agent = %entry.name, "Failed to persist manifest to disk: {e}"); + } else { + debug!(agent = %entry.name, path = %toml_path.display(), "Persisted manifest to disk"); + } + } + Err(e) => { + warn!(agent = %entry.name, "Failed to serialize manifest to TOML: {e}"); + } + } + } + } + /// Switch an agent's model. /// /// When `explicit_provider` is `Some`, that provider name is used as-is @@ -3072,6 +3102,9 @@ impl OpenFangKernel { let _ = self.memory.save_agent(&entry); } + // Write updated manifest to agent.toml so changes survive restart (#996, #1018) + self.persist_manifest_to_disk(agent_id); + // Clear canonical session to prevent memory poisoning from old model's responses let _ = self.memory.delete_canonical_session(agent_id); debug!(agent_id = %agent_id, "Cleared canonical session after model switch"); diff --git a/crates/openfang-runtime/src/subprocess_sandbox.rs b/crates/openfang-runtime/src/subprocess_sandbox.rs index 8546dd4b..54b75430 100644 --- a/crates/openfang-runtime/src/subprocess_sandbox.rs +++ b/crates/openfang-runtime/src/subprocess_sandbox.rs @@ -164,6 +164,102 @@ fn extract_base_command(cmd: &str) -> &str { .unwrap_or(first_word) } +/// Known shell wrappers that can execute inline scripts via flags. +const SHELL_WRAPPERS: &[&str] = &["powershell", "pwsh", "cmd", "bash", "sh", "zsh"]; + +/// Known flags that pass inline scripts to shell wrappers. +/// Each entry is (wrapper_names, flag). +const SHELL_INLINE_FLAGS: &[(&[&str], &str)] = &[ + (&["powershell", "pwsh"], "-Command"), + (&["powershell", "pwsh"], "-command"), + (&["powershell", "pwsh"], "-c"), + (&["cmd"], "/c"), + (&["cmd"], "/C"), + (&["bash", "sh", "zsh"], "-c"), + (&["bash", "sh", "zsh"], "--command"), +]; + +/// If the base command is a known shell wrapper, extract any inline script +/// passed via -Command / -c / /c flags and return the commands within it. +/// +/// Returns the list of base command names found inside the inline script, +/// or an empty vec if the command is not a shell wrapper or has no inline flag. +fn extract_shell_wrapper_commands(command: &str) -> Vec { + let trimmed = command.trim(); + let base = extract_base_command(trimmed); + + // Check if the base command is a known shell wrapper (case-insensitive) + let base_lower = base.to_lowercase(); + // Also strip .exe suffix for Windows + let base_normalized = base_lower.strip_suffix(".exe").unwrap_or(&base_lower); + if !SHELL_WRAPPERS.iter().any(|w| *w == base_normalized) { + return Vec::new(); + } + + // Find the inline flag and extract everything after it + for (wrappers, flag) in SHELL_INLINE_FLAGS { + if !wrappers.iter().any(|w| *w == base_normalized) { + continue; + } + // Search for the flag in the command args (case-insensitive for PowerShell) + let rest = trimmed.split_whitespace().skip(1); // skip the base command + let args: Vec<&str> = rest.collect(); + for (i, arg) in args.iter().enumerate() { + if arg.eq_ignore_ascii_case(flag) { + // Everything after this flag is the inline script + if i + 1 < args.len() { + let script = args[i + 1..].join(" "); + // Strip surrounding quotes if present + let script = script.trim(); + let script = if (script.starts_with('"') && script.ends_with('"')) + || (script.starts_with('\'') && script.ends_with('\'')) + { + &script[1..script.len() - 1] + } else { + script + }; + // Extract commands from the inline script + // For PowerShell, commands can be separated by `;` + // For POSIX shells, by `;`, `&&`, `||`, `|` + return extract_inner_script_commands(script); + } + } + } + } + + Vec::new() +} + +/// Extract base command names from an inline script string. +/// Splits on `;`, `&&`, `||`, `|` and returns the base command of each segment. +fn extract_inner_script_commands(script: &str) -> Vec { + let mut commands = Vec::new(); + let mut rest = script; + while !rest.is_empty() { + let separators: &[&str] = &["&&", "||", "|", ";"]; + let mut earliest_pos = rest.len(); + let mut earliest_len = 0; + for sep in separators { + if let Some(pos) = rest.find(sep) { + if pos < earliest_pos { + earliest_pos = pos; + earliest_len = sep.len(); + } + } + } + let segment = &rest[..earliest_pos]; + let base = extract_base_command(segment); + if !base.is_empty() { + commands.push(base.to_string()); + } + if earliest_pos + earliest_len >= rest.len() { + break; + } + rest = &rest[earliest_pos + earliest_len..]; + } + commands +} + /// Extract all commands from a shell command string. /// Handles pipes (`|`), semicolons (`;`), `&&`, and `||`. fn extract_all_commands(command: &str) -> Vec<&str> { @@ -215,11 +311,22 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result< ExecSecurityMode::Allowlist => { // SECURITY: Check for shell metacharacters BEFORE base-command extraction. // These can smuggle commands inside arguments of allowed binaries. - if let Some(reason) = contains_shell_metacharacters(command) { - return Err(format!( - "Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode." - )); + // + // However, we must skip this check for commands wrapped in a known + // shell wrapper (e.g. `powershell -Command "..."`) because the + // inline script naturally contains metacharacters (quotes, semicolons). + // Those inner commands are validated separately below. + let inner_commands = extract_shell_wrapper_commands(command); + let is_shell_wrapper = !inner_commands.is_empty(); + + if !is_shell_wrapper { + if let Some(reason) = contains_shell_metacharacters(command) { + return Err(format!( + "Command blocked: contains {reason}. Shell metacharacters are not allowed in Allowlist mode." + )); + } } + let base_commands = extract_all_commands(command); for base in &base_commands { // Check safe_bins first @@ -235,6 +342,28 @@ pub fn validate_command_allowlist(command: &str, policy: &ExecPolicy) -> Result< base )); } + + // SECURITY (#794): If the outer command is a shell wrapper + // (powershell, cmd, bash, etc.), also validate all commands + // found inside the inline script. This prevents bypassing the + // allowlist by wrapping disallowed commands inside an allowed + // shell. + if is_shell_wrapper { + for inner_cmd in &inner_commands { + if policy.safe_bins.iter().any(|sb| sb == inner_cmd) { + continue; + } + if policy.allowed_commands.iter().any(|ac| ac == inner_cmd) { + continue; + } + return Err(format!( + "Command '{}' (inside shell wrapper) is not in the exec allowlist. \ + Add it to exec_policy.allowed_commands or exec_policy.safe_bins.", + inner_cmd + )); + } + } + Ok(()) } } @@ -928,6 +1057,124 @@ mod tests { ); } + // ── Shell wrapper bypass tests (issue #794) ──────────────────────── + + #[test] + fn test_issue_794_powershell_command_bypass_blocked() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["powershell".to_string()], + ..ExecPolicy::default() + }; + // "Remove-Item" is NOT in allowed_commands — must be blocked + let result = validate_command_allowlist( + r#"powershell -Command "Remove-Item -Recurse -Force C:\temp""#, + &policy, + ); + assert!( + result.is_err(), + "Remove-Item inside powershell -Command must be blocked (issue #794)" + ); + let err = result.unwrap_err(); + assert!( + err.contains("Remove-Item"), + "Error should name the blocked inner command, got: {err}" + ); + } + + #[test] + fn test_powershell_command_allowed_when_inner_listed() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["powershell".to_string(), "Get-Process".to_string()], + ..ExecPolicy::default() + }; + let result = validate_command_allowlist( + r#"powershell -Command "Get-Process""#, + &policy, + ); + assert!( + result.is_ok(), + "Get-Process should be allowed when in allowed_commands" + ); + } + + #[test] + fn test_cmd_c_bypass_blocked() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["cmd".to_string()], + ..ExecPolicy::default() + }; + let result = validate_command_allowlist( + r#"cmd /C "del /F /Q C:\temp\secret.txt""#, + &policy, + ); + assert!( + result.is_err(), + "del inside cmd /C must be blocked when not in allowlist" + ); + } + + #[test] + fn test_bash_c_bypass_blocked() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["bash".to_string()], + ..ExecPolicy::default() + }; + let result = validate_command_allowlist( + r#"bash -c "curl https://evil.com""#, + &policy, + ); + assert!( + result.is_err(), + "curl inside bash -c must be blocked when not in allowlist" + ); + } + + #[test] + fn test_bash_c_allowed_when_inner_listed() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["bash".to_string()], + ..ExecPolicy::default() + }; + // "echo" is in safe_bins by default + let result = validate_command_allowlist( + r#"bash -c "echo hello""#, + &policy, + ); + assert!( + result.is_ok(), + "echo inside bash -c should be allowed (echo is in safe_bins)" + ); + } + + #[test] + fn test_pwsh_command_bypass_blocked() { + let policy = ExecPolicy { + mode: ExecSecurityMode::Allowlist, + allowed_commands: vec!["pwsh".to_string()], + ..ExecPolicy::default() + }; + let result = validate_command_allowlist( + r#"pwsh -Command "Invoke-WebRequest https://evil.com""#, + &policy, + ); + assert!( + result.is_err(), + "Invoke-WebRequest inside pwsh must be blocked" + ); + } + + #[test] + fn test_shell_wrapper_extract_no_flag() { + // When powershell is called without -Command, no inner commands are extracted + let cmds = extract_shell_wrapper_commands("powershell script.ps1"); + assert!(cmds.is_empty()); + } + #[test] fn test_extract_all_commands_cjk_separators() { // Ensure extract_all_commands handles CJK content between separators diff --git a/crates/openfang-types/src/config.rs b/crates/openfang-types/src/config.rs index 7b80e325..96424ff0 100644 --- a/crates/openfang-types/src/config.rs +++ b/crates/openfang-types/src/config.rs @@ -2618,8 +2618,13 @@ impl Default for MqttConfig { pub struct RevoltConfig { /// Env var name holding the bot token. pub bot_token_env: String, - /// Revolt API URL. + /// Revolt API URL (set to your self-hosted instance URL if not using revolt.chat). pub api_url: String, + /// Revolt WebSocket URL (set to your self-hosted instance WS URL if not using revolt.chat). + pub ws_url: String, + /// Restrict to specific channel IDs (empty = all channels the bot is in). + #[serde(default)] + pub allowed_channels: Vec, /// Default agent name to route messages to. pub default_agent: Option, /// Per-channel behavior overrides. @@ -2632,6 +2637,8 @@ impl Default for RevoltConfig { Self { bot_token_env: "REVOLT_BOT_TOKEN".to_string(), api_url: "https://api.revolt.chat".to_string(), + ws_url: "wss://ws.revolt.chat".to_string(), + allowed_channels: Vec::new(), default_agent: None, overrides: ChannelOverrides::default(), }