feat: add Snapchat extractor, improve browser auth and XenForo support

- Add new Snapchat story extractor with spotlight and user story support - Expand browser cookie extraction to support Zen Browser and multi-platform profiles - Significantly enhance XenForo extractor with gallery, media, and attachment support - Add APPDATA-based profile discovery for Windows browsers - Update main.rs with new extractor wiring and improved CLI handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 16:29:16 +01:00
parent e4dae6de12
commit ca342ee3a3
6 changed files with 1441 additions and 243 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 archive/
 .claude/
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/src/auth/browser.rs
+++ b/src/auth/browser.rs
@@ -1,8 +1,15 @@
-//! Browser cookie extraction for Firefox and Chrome
+//! Browser cookie extraction for Firefox-based browsers and Chrome
 //!
 //! This module provides functionality to extract cookies directly from
 //! browser SQLite cookie databases, enabling seamless authentication
 //! without manual cookie file exports.
 //!
 //! Supported browsers:
 //! - Firefox (all platforms)
 //! - Zen Browser (Firefox-based)
 //! - LibreWolf (Firefox-based)
 //! - Waterfox (Firefox-based)
 //! - Chrome / Chromium (all platforms, plaintext cookies only)
 use rusqlite::Connection;
 use std::collections::HashMap;
@@ -60,6 +67,17 @@ fn get_home_dir() -> Result<PathBuf, BrowserError> {
        .ok_or_else(|| BrowserError::Other("Could not determine home directory".to_string()))
 }
 /// Get the APPDATA directory (Windows only, falls back to home)
 fn get_appdata_dir() -> Result<PathBuf, BrowserError> {
    // Try APPDATA env var first (Windows)
    if let Ok(appdata) = std::env::var("APPDATA") {
        return Ok(PathBuf::from(appdata));
    }
    // Fallback: use dirs crate
    dirs::config_dir()
        .ok_or_else(|| BrowserError::Other("Could not determine config directory".to_string()))
 }
 /// Copy a file to a temporary location to avoid locking issues
 fn copy_to_temp<P: AsRef<std::path::Path>>(path: P) -> Result<tempfile::TempPath, BrowserError> {
    let temp_file = tempfile::NamedTempFile::new()?;
@@ -67,32 +85,26 @@ fn copy_to_temp<P: AsRef<std::path::Path>>(path: P) -> Result<tempfile::TempPath
    Ok(temp_file.into_temp_path())
 }
-/// Find the Firefox profile directory
+/// Find a profile directory from a list of candidate parent directories.
 ///
-/// Searches in ~/.mozilla/firefox/ for profiles
+/// Searches each candidate for subdirectories containing `cookies.sqlite`.
-pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
+/// Prefers `default-release` profiles, then `default` profiles.
-    let home = get_home_dir()?;
+fn find_profile_in_dirs(candidate_dirs: &[PathBuf]) -> Result<PathBuf, BrowserError> {
-    let firefox_dir = home.join(".mozilla").join("firefox");
+    for dir in candidate_dirs {
        if !dir.exists() {
            continue;
        }
-    if !firefox_dir.exists() {
+        let entries = match fs::read_dir(dir) {
-        return Err(BrowserError::ProfileNotFound(format!(
+            Ok(e) => e,
-            "Firefox directory not found: {:?}",
+            Err(_) => continue,
-            firefox_dir
+        };
        )));
    }
-    // Read directory entries
+        let mut profile_dirs: Vec<(String, PathBuf)> = Vec::new();
    let entries = fs::read_dir(&firefox_dir).map_err(|e| BrowserError::Io(e))?;
-    let mut profile_dirs: Vec<(String, PathBuf)> = Vec::new();
+        for entry in entries.flatten() {
-
+            let path = entry.path();
-    for entry in entries.flatten() {
+            if path.is_dir() && path.join("cookies.sqlite").exists() {
        let path = entry.path();
        if path.is_dir() {
            // Check if this is a profile directory (contains cookies.sqlite)
            let cookies_path = path.join("cookies.sqlite");
            if cookies_path.exists() {
                // Get the profile name from the directory name
                let name = path
                    .file_name()
                    .and_then(|n| n.to_str())
@@ -101,46 +113,112 @@ pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
                profile_dirs.push((name, path));
            }
        }
    }
-    if profile_dirs.is_empty() {
+        if profile_dirs.is_empty() {
-        return Err(BrowserError::ProfileNotFound(
+            continue;
            "No Firefox profiles with cookies found".to_string(),
        ));
    }
    // Prefer default-release profile, otherwise use first available
    profile_dirs.sort_by(|a, b| {
        let a_default = a.0.contains("default-release");
        let b_default = b.0.contains("default-release");
        match (a_default, b_default) {
            (true, false) => std::cmp::Ordering::Less,
            (false, true) => std::cmp::Ordering::Greater,
            _ => std::cmp::Ordering::Equal,
        }
    });
-    let selected = &profile_dirs[0].1;
+        // Sort: prefer default-release > default > anything else
-    log::info!("Found Firefox profile: {:?}", selected);
+        profile_dirs.sort_by(|a, b| {
-    Ok(selected.clone())
+            fn rank(name: &str) -> u8 {
                if name.contains("default-release") { 0 }
                else if name.contains("default") { 1 }
                else { 2 }
            }
            rank(&a.0).cmp(&rank(&b.0))
        });
        let selected = &profile_dirs[0].1;
        log::info!("Found browser profile: {:?}", selected);
        return Ok(selected.clone());
    }
    Err(BrowserError::ProfileNotFound(format!(
        "No profiles with cookies found. Searched: {:?}",
        candidate_dirs
    )))
 }
-/// Extract cookies from Firefox profile
+/// Get candidate profile directories for Firefox
-///
+fn firefox_profile_dirs() -> Vec<PathBuf> {
-/// # Arguments
+    let mut dirs = Vec::new();
-/// * `domain` - Optional domain to filter cookies (e.g., ".twitter.com")
+    if let Ok(home) = get_home_dir() {
-///
+        // Windows
-/// Returns a HashMap of cookie name -> value
+        if let Ok(appdata) = get_appdata_dir() {
-pub fn extract_firefox_cookies(
+            dirs.push(appdata.join("Mozilla").join("Firefox").join("Profiles"));
        }
        // Linux
        dirs.push(home.join(".mozilla").join("firefox"));
        // macOS
        dirs.push(home.join("Library").join("Application Support").join("Firefox").join("Profiles"));
    }
    dirs
 }
 /// Get candidate profile directories for Zen Browser
 fn zen_profile_dirs() -> Vec<PathBuf> {
    let mut dirs = Vec::new();
    if let Ok(home) = get_home_dir() {
        // Windows
        if let Ok(appdata) = get_appdata_dir() {
            dirs.push(appdata.join("zen").join("Profiles"));
        }
        // Linux
        dirs.push(home.join(".zen"));
        // macOS
        dirs.push(home.join("Library").join("Application Support").join("zen").join("Profiles"));
    }
    dirs
 }
 /// Get candidate profile directories for LibreWolf
 fn librewolf_profile_dirs() -> Vec<PathBuf> {
    let mut dirs = Vec::new();
    if let Ok(home) = get_home_dir() {
        if let Ok(appdata) = get_appdata_dir() {
            dirs.push(appdata.join("librewolf").join("Profiles"));
        }
        dirs.push(home.join(".librewolf"));
        dirs.push(home.join("Library").join("Application Support").join("librewolf").join("Profiles"));
    }
    dirs
 }
 /// Get candidate profile directories for Waterfox
 fn waterfox_profile_dirs() -> Vec<PathBuf> {
    let mut dirs = Vec::new();
    if let Ok(home) = get_home_dir() {
        if let Ok(appdata) = get_appdata_dir() {
            dirs.push(appdata.join("Waterfox").join("Profiles"));
        }
        dirs.push(home.join(".waterfox"));
        dirs.push(home.join("Library").join("Application Support").join("Waterfox").join("Profiles"));
    }
    dirs
 }
 /// Find a Firefox profile directory (searches standard Firefox locations)
 pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
    find_profile_in_dirs(&firefox_profile_dirs())
 }
 /// Find a Zen Browser profile directory
 pub fn find_zen_profile() -> Result<PathBuf, BrowserError> {
    find_profile_in_dirs(&zen_profile_dirs())
 }
 /// Extract cookies from a Firefox-compatible SQLite database (moz_cookies table)
 fn extract_moz_cookies(
    profile_dir: &PathBuf,
    domain: Option<&str>,
    browser_name: &str,
 ) -> Result<HashMap<String, String>, BrowserError> {
    let profile_dir = find_firefox_profile()?;
    let cookies_path = profile_dir.join("cookies.sqlite");
    if !cookies_path.exists() {
        return Err(BrowserError::DatabaseNotFound(format!(
-            "Firefox cookies database not found: {:?}",
+            "{} cookies database not found: {:?}",
-            cookies_path
+            browser_name, cookies_path
        )));
    }
@@ -150,7 +228,6 @@ pub fn extract_firefox_cookies(
    let cookies: HashMap<String, String> = match domain {
        Some(d) => {
            // Query with domain filter
            let pattern = format!("%{}", d);
            let mut stmt = conn.prepare("SELECT name, value FROM moz_cookies WHERE host LIKE ?")?;
            let mut cookies = HashMap::new();
@@ -163,7 +240,6 @@ pub fn extract_firefox_cookies(
            cookies
        }
        None => {
            // Get all cookies
            let mut stmt = conn.prepare("SELECT name, value FROM moz_cookies")?;
            let mut cookies = HashMap::new();
            let rows = stmt.query_map([], |row| {
@@ -176,24 +252,61 @@ pub fn extract_firefox_cookies(
        }
    };
-    log::info!("Extracted {} cookies from Firefox", cookies.len());
+    log::info!("Extracted {} cookies from {}", cookies.len(), browser_name);
    Ok(cookies)
 }
 /// Extract cookies from Firefox
 pub fn extract_firefox_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    let profile_dir = find_profile_in_dirs(&firefox_profile_dirs())?;
    extract_moz_cookies(&profile_dir, domain, "Firefox")
 }
 /// Extract cookies from Zen Browser
 pub fn extract_zen_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    let profile_dir = find_profile_in_dirs(&zen_profile_dirs())?;
    extract_moz_cookies(&profile_dir, domain, "Zen Browser")
 }
 /// Extract cookies from LibreWolf
 pub fn extract_librewolf_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    let profile_dir = find_profile_in_dirs(&librewolf_profile_dirs())?;
    extract_moz_cookies(&profile_dir, domain, "LibreWolf")
 }
 /// Extract cookies from Waterfox
 pub fn extract_waterfox_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    let profile_dir = find_profile_in_dirs(&waterfox_profile_dirs())?;
    extract_moz_cookies(&profile_dir, domain, "Waterfox")
 }
 /// Find the Chrome profile directory
 ///
 /// Searches in ~/.config/google-chrome/ for Default profile
 pub fn find_chrome_profile() -> Result<PathBuf, BrowserError> {
    let home = get_home_dir()?;
-    // Try different possible Chrome config locations
+    let mut possible_paths = Vec::new();
-    let possible_paths = vec![
+
-        home.join(".config").join("google-chrome"),
+    // Windows
-        home.join(".config").join("chromium"),
+    if let Ok(local_appdata) = std::env::var("LOCALAPPDATA") {
-        home.join("Library")
+        let local = PathBuf::from(local_appdata);
-            .join("Application Support")
+        possible_paths.push(local.join("Google").join("Chrome").join("User Data"));
-            .join("Google Chrome"),
+        possible_paths.push(local.join("Chromium").join("User Data"));
-    ];
+    }
    // Linux
    possible_paths.push(home.join(".config").join("google-chrome"));
    possible_paths.push(home.join(".config").join("chromium"));
    // macOS
    possible_paths.push(home.join("Library").join("Application Support").join("Google Chrome"));
    for chrome_dir in possible_paths {
        if chrome_dir.exists() {
@@ -215,13 +328,8 @@ pub fn find_chrome_profile() -> Result<PathBuf, BrowserError> {
 /// Extract cookies from Chrome profile
 ///
-/// Note: Chrome stores some cookies with encrypted values using the OS keyring.
+/// Note: Chrome encrypts most cookies using the OS keyring.
-/// This function extracts plaintext cookies and logs a warning for encrypted ones.
+/// This function extracts plaintext cookies and skips encrypted ones.
 ///
 /// # Arguments
 /// * `domain` - Optional domain to filter cookies (e.g., ".twitter.com")
 ///
 /// Returns a HashMap of cookie name -> value
 pub fn extract_chrome_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
@@ -235,14 +343,12 @@ pub fn extract_chrome_cookies(
        )));
    }
    // Copy to temp to avoid locking
    let temp_path = copy_to_temp(&cookies_path)?;
    let conn = Connection::open(&temp_path)?;
    let mut cookies = HashMap::new();
    let mut encrypted_count = 0;
    // Chrome uses different table schema - check for encrypted_value column
    let has_encrypted = conn
        .query_row(
            "SELECT COUNT(*) FROM pragma_table_info('cookies') WHERE name='encrypted_value'",
@@ -252,14 +358,13 @@ pub fn extract_chrome_cookies(
        .unwrap_or(0)
        > 0;
    // Always select with domain filter (use wildcard for all)
    let domain_pattern = match domain {
        Some(d) => format!("%{}%", d),
        None => "%".to_string(),
    };
    let mut stmt =
-        conn.prepare("SELECT name, value, encrypted_value FROM cookies WHERE host LIKE ?")?;
+        conn.prepare("SELECT name, value, encrypted_value FROM cookies WHERE host_key LIKE ?")?;
    let rows = stmt.query_map([domain_pattern], |row| {
        let name: String = row.get(0)?;
@@ -271,12 +376,11 @@ pub fn extract_chrome_cookies(
    for row_result in rows {
        let (name, value, encrypted) = row_result?;
        // Check if cookie has encrypted value
        if has_encrypted {
            if let Some(enc) = encrypted {
                if !enc.is_empty() {
                    encrypted_count += 1;
-                    continue; // Skip encrypted cookies
+                    continue;
                }
            }
        }
@@ -287,7 +391,7 @@ pub fn extract_chrome_cookies(
    if encrypted_count > 0 {
        log::warn!(
            "Skipped {} encrypted Chrome cookies (OS keyring required). \
-            Run with --cookies-file for encrypted cookies.",
+            Use --cookies with a cookies.txt file instead.",
            encrypted_count
        );
    }
@@ -300,32 +404,22 @@ pub fn extract_chrome_cookies(
    Ok(cookies)
 }
-/// Extract cookies from a browser
+/// Extract cookies from a browser by name
 ///
-/// # Arguments
+/// Supported browsers: firefox, zen, librewolf, waterfox, chrome, chromium
 /// * `browser` - Browser name: "firefox", "chrome", or "chromium"
 /// * `domain` - Optional domain to filter cookies
 ///
 /// # Example
 /// ```no_run
 /// use gallery_dl::auth::extract_browser_cookies;
 ///
 /// // Get all cookies from Firefox
 /// let cookies = extract_browser_cookies("firefox", None).unwrap();
 ///
 /// // Get Twitter cookies from Chrome
 /// let twitter_cookies = extract_browser_cookies("chrome", Some("twitter.com")).unwrap();
 /// ```
 pub fn extract_browser_cookies(
    browser: &str,
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    match browser.to_lowercase().as_str() {
        "firefox" | "ff" => extract_firefox_cookies(domain),
        "zen" | "zen-browser" => extract_zen_cookies(domain),
        "librewolf" => extract_librewolf_cookies(domain),
        "waterfox" => extract_waterfox_cookies(domain),
        "chrome" | "google-chrome" => extract_chrome_cookies(domain),
        "chromium" => extract_chrome_cookies(domain),
        _ => Err(BrowserError::Other(format!(
-            "Unsupported browser: {}. Supported: firefox, chrome, chromium",
+            "Unsupported browser: '{}'. Supported: firefox, zen, librewolf, waterfox, chrome, chromium",
            browser
        ))),
    }
@@ -334,7 +428,6 @@ pub fn extract_browser_cookies(
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::env;
    #[test]
    fn test_get_home_dir() {
@@ -350,25 +443,26 @@ mod tests {
    #[test]
    fn test_extract_browser_cookies_case_insensitive() {
        // Should not error, just return empty or ProfileNotFound
        let result = extract_browser_cookies("FIREFOX", None);
-        // Either works or profile not found (acceptable in test env)
+        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }
    #[test]
    fn test_zen_browser_recognized() {
        let result = extract_browser_cookies("zen", None);
        // Should be ProfileNotFound (not unsupported browser error)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }
    #[test]
    fn test_firefox_cookies_with_domain() {
        // Should not error even if profile not found in test env
        let result = extract_firefox_cookies(Some("twitter.com"));
        // Either works or profile not found (acceptable in test env)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }
    #[test]
    fn test_chrome_cookies_with_domain() {
        // Should not error even if profile not found in test env
        let result = extract_chrome_cookies(Some("twitter.com"));
        // Either works or profile not found (acceptable in test env)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }
 }
--- a/src/extractor/extractors/mod.rs
+++ b/src/extractor/extractors/mod.rs
@@ -246,6 +246,7 @@ mod rawkuma;
 mod readcomiconline;
 mod schalenetwork;
 mod shimmie2;
 mod snapchat;
 mod tungsten;
 mod weebdex;
 mod xenforo;
@@ -861,6 +862,10 @@ pub fn register_all() {
    // Register SimplyHentai extractors (simplyhentai.com)
    register(simplyhentai::SimplyhentaiExtractor::new().expect("Failed to create SimplyHentai extractor"));
    // Register Snapchat extractors (snapchat.com)
    register(snapchat::SnapchatSpotlightExtractor::new());
    register(snapchat::SnapchatProfileExtractor::new());
    // Register Skeb extractors (skeb.jp)
    register(skeb::SkebExtractor::new());
--- a/src/extractor/extractors/snapchat.rs
+++ b/src/extractor/extractors/snapchat.rs
@@ -0,0 +1,523 @@
 //! Snapchat extractor implementation
 //!
 //! Supports public Snapchat content:
 //! - Spotlight videos: `snapchat.com/spotlight/{id}`
 //! - Public profiles/stories: `snapchat.com/add/{username}`
 //!
 //! Data is extracted from the `__NEXT_DATA__` JSON embedded in the page HTML
 //! (Next.js server-side rendering). No authentication required for public content.
 use async_trait::async_trait;
 use regex::Regex;
 use serde_json::Value;
 use std::collections::HashMap;
 use crate::extractor::{Extractor, ExtractorError, ExtractorMatch, Message};
 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
 /// Extract the `__NEXT_DATA__` JSON blob from a Snapchat page
 fn extract_next_data(html: &str) -> Option<Value> {
    let re = Regex::new(r#"<script\s+id="__NEXT_DATA__"\s+type="application/json"[^>]*>(.*?)</script>"#).ok()?;
    let caps = re.captures(html)?;
    let json_str = caps.get(1)?.as_str();
    serde_json::from_str(json_str).ok()
 }
 /// Recursively search a JSON value for all occurrences of a key
 fn find_all_values<'a>(json: &'a Value, key: &str) -> Vec<&'a Value> {
    let mut results = Vec::new();
    match json {
        Value::Object(map) => {
            for (k, v) in map {
                if k == key {
                    results.push(v);
                }
                results.extend(find_all_values(v, key));
            }
        }
        Value::Array(arr) => {
            for v in arr {
                results.extend(find_all_values(v, key));
            }
        }
        _ => {}
    }
    results
 }
 /// Extract a filename from a CDN URL
 /// e.g. `https://cf-st.sc-cdn.net/d/ABCDEF.27.IRZXSOY?mo=...` -> `ABCDEF.mp4`
 fn cdn_filename(url: &str) -> Option<String> {
    let parsed = url::Url::parse(url).ok()?;
    let path = parsed.path();
    // Path is like /d/HASH.27.IRZXSOY or /TYPE/HASH.27.IRZXSOY
    let segment = path.rsplit('/').next()?;
    // Take everything before the first dot as the hash ID
    let hash = segment.split('.').next()?;
    if hash.is_empty() {
        return None;
    }
    Some(format!("{}.mp4", hash))
 }
 // ============================================================================
 // SnapchatSpotlightExtractor — single spotlight video
 // ============================================================================
 #[derive(Clone)]
 pub struct SnapchatSpotlightExtractor {
    pattern: Regex,
    spotlight_id: Option<String>,
 }
 impl SnapchatSpotlightExtractor {
    pub fn new() -> Self {
        Self {
            pattern: Regex::new(
                r"(?:https?://)?(?:www\.)?snapchat\.com/spotlight/([A-Za-z0-9_-]+)"
            ).expect("Failed to compile Snapchat spotlight pattern"),
            spotlight_id: None,
        }
    }
    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
        reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(std::time::Duration::from_secs(30))
            .redirect(reqwest::redirect::Policy::limited(10))
            .build()
            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
    }
    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
        let client = self.create_client()?;
        let response = client.get(url).send().await
            .map_err(ExtractorError::RequestFailed)?;
        let status = response.status();
        if status.as_u16() == 404 {
            return Err(ExtractorError::NotFound(format!("Spotlight not found: {}", url)));
        }
        if !status.is_success() {
            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
        }
        response.text().await
            .map_err(|e| ExtractorError::ParseError(e.to_string()))
    }
    fn extract_videos_from_next_data(&self, data: &Value) -> Vec<(String, HashMap<String, Value>)> {
        let mut videos = Vec::new();
        // Look for contentUrl fields (direct video URLs)
        let content_urls = find_all_values(data, "contentUrl");
        for url_val in &content_urls {
            if let Some(url) = url_val.as_str() {
                if url.contains("sc-cdn.net") || url.contains(".mp4") {
                    let mut meta = HashMap::new();
                    videos.push((url.to_string(), meta));
                }
            }
        }
        // Also check mediaUrl as a fallback
        if videos.is_empty() {
            let media_urls = find_all_values(data, "mediaUrl");
            for url_val in &media_urls {
                if let Some(url) = url_val.as_str() {
                    if url.contains("sc-cdn.net") || url.contains(".mp4") {
                        let meta = HashMap::new();
                        videos.push((url.to_string(), meta));
                    }
                }
            }
        }
        // Enrich with metadata from the same JSON tree
        // Try to find upload date, view count, creator info
        let upload_dates = find_all_values(data, "uploadDateMs");
        let view_counts = find_all_values(data, "viewCount");
        let usernames = find_all_values(data, "username");
        let display_names = find_all_values(data, "displayName");
        for (i, (_url, meta)) in videos.iter_mut().enumerate() {
            if let Some(date_val) = upload_dates.get(i) {
                meta.insert("upload_date".to_string(), (*date_val).clone());
            } else if let Some(date_val) = upload_dates.first() {
                meta.insert("upload_date".to_string(), (*date_val).clone());
            }
            if let Some(count_val) = view_counts.get(i) {
                meta.insert("view_count".to_string(), (*count_val).clone());
            } else if let Some(count_val) = view_counts.first() {
                meta.insert("view_count".to_string(), (*count_val).clone());
            }
            if let Some(user_val) = usernames.get(i).or(usernames.first()) {
                meta.insert("username".to_string(), (*user_val).clone());
            }
            if let Some(name_val) = display_names.get(i).or(display_names.first()) {
                meta.insert("display_name".to_string(), (*name_val).clone());
            }
        }
        videos
    }
 }
 #[async_trait]
 impl Extractor for SnapchatSpotlightExtractor {
    fn category(&self) -> &str { "snapchat" }
    fn subcategory(&self) -> &str { "spotlight" }
    fn root(&self) -> &str { "https://www.snapchat.com" }
    fn pattern(&self) -> &Regex { &self.pattern }
    fn clone_extractor(&self) -> Box<dyn Extractor> {
        Box::new(self.clone())
    }
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(caps) = self.pattern.captures(&m.url) {
            self.spotlight_id = caps.get(1).map(|m| m.as_str().to_string());
        }
        Ok(())
    }
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        let spotlight_id = self.spotlight_id.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("spotlight_id not set".to_string()))?;
        let url = format!("https://www.snapchat.com/spotlight/{}", spotlight_id);
        log::info!("Fetching Snapchat spotlight: {}", url);
        let html = self.fetch_page(&url).await?;
        let next_data = extract_next_data(&html)
            .ok_or_else(|| ExtractorError::ParseError(
                "Could not find __NEXT_DATA__ in page HTML".to_string()
            ))?;
        let videos = self.extract_videos_from_next_data(&next_data);
        if videos.is_empty() {
            return Err(ExtractorError::ParseError(
                "No video URLs found in spotlight data".to_string()
            ));
        }
        let mut messages = Vec::new();
        // Directory message
        let creator = videos.first()
            .and_then(|(_, meta)| meta.get("username"))
            .and_then(|v| v.as_str())
            .unwrap_or("unknown");
        let dir_msg = Message::directory("")
            .with_metadata("category", serde_json::json!("snapchat"))
            .with_metadata("subcategory", serde_json::json!("spotlight"))
            .with_metadata("title", serde_json::json!(format!("spotlight_{}", spotlight_id)))
            .with_metadata("creator", serde_json::json!(creator));
        messages.push(dir_msg);
        for (video_url, meta) in &videos {
            let filename = cdn_filename(video_url)
                .unwrap_or_else(|| format!("{}.mp4", spotlight_id));
            let mut msg = Message::url(video_url)
                .with_filename(&filename);
            for (key, val) in meta {
                msg = msg.with_metadata(key, val.clone());
            }
            messages.push(msg);
        }
        log::info!("Found {} video(s) in spotlight {}", videos.len(), spotlight_id);
        Ok(messages)
    }
 }
 // ============================================================================
 // SnapchatProfileExtractor — public profile stories
 // ============================================================================
 #[derive(Clone)]
 pub struct SnapchatProfileExtractor {
    pattern: Regex,
    username: Option<String>,
 }
 impl SnapchatProfileExtractor {
    pub fn new() -> Self {
        Self {
            pattern: Regex::new(
                r"(?:https?://)?(?:www\.)?snapchat\.com/add/([A-Za-z0-9._-]+)"
            ).expect("Failed to compile Snapchat profile pattern"),
            username: None,
        }
    }
    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
        reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(std::time::Duration::from_secs(30))
            .redirect(reqwest::redirect::Policy::limited(10))
            .build()
            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
    }
    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
        let client = self.create_client()?;
        let response = client.get(url).send().await
            .map_err(ExtractorError::RequestFailed)?;
        let status = response.status();
        if status.as_u16() == 404 {
            return Err(ExtractorError::NotFound(format!("Profile not found: {}", url)));
        }
        if !status.is_success() {
            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
        }
        response.text().await
            .map_err(|e| ExtractorError::ParseError(e.to_string()))
    }
 }
 #[async_trait]
 impl Extractor for SnapchatProfileExtractor {
    fn category(&self) -> &str { "snapchat" }
    fn subcategory(&self) -> &str { "profile" }
    fn root(&self) -> &str { "https://www.snapchat.com" }
    fn pattern(&self) -> &Regex { &self.pattern }
    fn clone_extractor(&self) -> Box<dyn Extractor> {
        Box::new(self.clone())
    }
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(caps) = self.pattern.captures(&m.url) {
            self.username = caps.get(1).map(|m| m.as_str().to_string());
        }
        Ok(())
    }
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        let username = self.username.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("username not set".to_string()))?;
        let url = format!("https://www.snapchat.com/add/{}", username);
        log::info!("Fetching Snapchat profile: {}", url);
        let html = self.fetch_page(&url).await?;
        let next_data = extract_next_data(&html)
            .ok_or_else(|| ExtractorError::ParseError(
                "Could not find __NEXT_DATA__ in page HTML. Profile may be private or empty.".to_string()
            ))?;
        // Extract all media URLs from the profile data
        let mut media_urls: Vec<String> = Vec::new();
        // Look for contentUrl (videos)
        for val in find_all_values(&next_data, "contentUrl") {
            if let Some(url) = val.as_str() {
                if url.contains("sc-cdn.net") || url.contains(".mp4") {
                    media_urls.push(url.to_string());
                }
            }
        }
        // Look for mediaUrl (alternate/additional media)
        for val in find_all_values(&next_data, "mediaUrl") {
            if let Some(url) = val.as_str() {
                if (url.contains("sc-cdn.net") || url.contains(".mp4") || url.contains(".jpg") || url.contains(".png"))
                    && !media_urls.contains(&url.to_string())
                {
                    media_urls.push(url.to_string());
                }
            }
        }
        // Look for snapMediaUrl (story media)
        for val in find_all_values(&next_data, "snapMediaUrl") {
            if let Some(url) = val.as_str() {
                if !media_urls.contains(&url.to_string()) {
                    media_urls.push(url.to_string());
                }
            }
        }
        // Look for thumbnailUrl (image previews)
        for val in find_all_values(&next_data, "thumbnailUrl") {
            if let Some(url) = val.as_str() {
                if !media_urls.contains(&url.to_string()) {
                    media_urls.push(url.to_string());
                }
            }
        }
        let mut messages = Vec::new();
        // Directory message
        let display_name = find_all_values(&next_data, "displayName")
            .first()
            .and_then(|v| v.as_str())
            .unwrap_or(username.as_str())
            .to_string();
        let dir_msg = Message::directory("")
            .with_metadata("category", serde_json::json!("snapchat"))
            .with_metadata("subcategory", serde_json::json!("profile"))
            .with_metadata("title", serde_json::json!(&display_name))
            .with_metadata("username", serde_json::json!(username));
        messages.push(dir_msg);
        if media_urls.is_empty() {
            log::warn!("No media found on profile {}. It may be private or have no public stories.", username);
            return Ok(messages);
        }
        for (i, media_url) in media_urls.iter().enumerate() {
            let filename = cdn_filename(media_url)
                .unwrap_or_else(|| {
                    let ext = if media_url.contains(".mp4") { "mp4" }
                        else if media_url.contains(".jpg") || media_url.contains(".jpeg") { "jpg" }
                        else if media_url.contains(".png") { "png" }
                        else { "mp4" };
                    format!("{}_{:03}.{}", username, i + 1, ext)
                });
            let msg = Message::url(media_url)
                .with_filename(&filename)
                .with_metadata("username", serde_json::json!(username))
                .with_metadata("num", serde_json::json!(i + 1));
            messages.push(msg);
        }
        log::info!("Found {} media item(s) on profile {}", media_urls.len(), username);
        Ok(messages)
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_spotlight_pattern() {
        let ext = SnapchatSpotlightExtractor::new();
        assert!(ext.pattern.is_match("https://www.snapchat.com/spotlight/ABC123_def"));
        assert!(ext.pattern.is_match("https://snapchat.com/spotlight/ABC123"));
        assert!(ext.pattern.is_match("http://www.snapchat.com/spotlight/test-id_123"));
        assert!(!ext.pattern.is_match("https://snapchat.com/add/username"));
        assert!(!ext.pattern.is_match("https://snapchat.com/"));
    }
    #[test]
    fn test_profile_pattern() {
        let ext = SnapchatProfileExtractor::new();
        assert!(ext.pattern.is_match("https://www.snapchat.com/add/john_doe"));
        assert!(ext.pattern.is_match("https://snapchat.com/add/user.name"));
        assert!(ext.pattern.is_match("http://www.snapchat.com/add/test-user"));
        assert!(!ext.pattern.is_match("https://snapchat.com/spotlight/ABC123"));
        assert!(!ext.pattern.is_match("https://snapchat.com/"));
    }
    #[test]
    fn test_extract_next_data() {
        let html = r#"<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"story":{"contentUrl":"https://cf-st.sc-cdn.net/d/HASH123.27.IRZXSOY?mo=test"}}}}</script></head></html>"#;
        let data = extract_next_data(html);
        assert!(data.is_some());
        let data = data.unwrap();
        let urls = find_all_values(&data, "contentUrl");
        assert_eq!(urls.len(), 1);
        assert_eq!(urls[0].as_str().unwrap(), "https://cf-st.sc-cdn.net/d/HASH123.27.IRZXSOY?mo=test");
    }
    #[test]
    fn test_extract_next_data_missing() {
        let html = r#"<html><head></head><body>No next data here</body></html>"#;
        assert!(extract_next_data(html).is_none());
    }
    #[test]
    fn test_find_all_values() {
        let json: Value = serde_json::json!({
            "a": {
                "contentUrl": "url1",
                "nested": {
                    "contentUrl": "url2"
                }
            },
            "b": [
                {"contentUrl": "url3"},
                {"other": "ignored"}
            ]
        });
        let urls = find_all_values(&json, "contentUrl");
        assert_eq!(urls.len(), 3);
    }
    #[test]
    fn test_cdn_filename() {
        assert_eq!(
            cdn_filename("https://cf-st.sc-cdn.net/d/ABCDEF.27.IRZXSOY?mo=test&uc=46"),
            Some("ABCDEF.mp4".to_string())
        );
        assert_eq!(
            cdn_filename("https://bolt-gcdn.sc-cdn.net/video/HASH123.27.IRZXSOY?mo=test"),
            Some("HASH123.mp4".to_string())
        );
    }
    #[test]
    fn test_cdn_filename_no_hash() {
        // Should still extract something from normal URLs
        assert!(cdn_filename("https://example.com/some/path/file.mp4").is_some());
    }
    #[test]
    fn test_spotlight_extract_videos() {
        let ext = SnapchatSpotlightExtractor::new();
        let data: Value = serde_json::json!({
            "props": {
                "pageProps": {
                    "story": {
                        "contentUrl": "https://cf-st.sc-cdn.net/d/ABC.27.IRZXSOY?mo=test",
                        "uploadDateMs": 1700000000000_u64,
                        "viewCount": 50000,
                        "username": "testuser",
                        "displayName": "Test User"
                    }
                }
            }
        });
        let videos = ext.extract_videos_from_next_data(&data);
        assert_eq!(videos.len(), 1);
        assert!(videos[0].0.contains("sc-cdn.net"));
        assert!(videos[0].1.contains_key("username"));
    }
    #[test]
    fn test_spotlight_mediaurl_fallback() {
        let ext = SnapchatSpotlightExtractor::new();
        let data: Value = serde_json::json!({
            "props": {
                "pageProps": {
                    "media": {
                        "mediaUrl": "https://cf-st.sc-cdn.net/d/FALLBACK.27.IRZXSOY?mo=x"
                    }
                }
            }
        });
        let videos = ext.extract_videos_from_next_data(&data);
        assert_eq!(videos.len(), 1);
        assert!(videos[0].0.contains("FALLBACK"));
    }
 }
--- a/src/extractor/extractors/xenforo.rs
+++ b/src/extractor/extractors/xenforo.rs
@@ -1,40 +1,462 @@
 //! XenForo extractor implementation
 //!
 //! Supports XenForo forums (simpcity.cr, nudostar.com/forum, etc.)
 //! Extracts images and videos from thread posts with pagination support.
 use async_trait::async_trait;
 use regex::Regex;
 use std::collections::{HashMap, HashSet};
 use crate::extractor::{
-    Extractor, ExtractorError, ExtractorMatch, HttpClient, Message,
+    Extractor, ExtractorError, ExtractorMatch, Message, MessageKind,
 };
-pub struct XenforoPostExtractor {
+const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
-    pattern: Regex,
+
-    category: String,
+/// Known XenForo domains and their root URLs
-    subcategory: String,
+fn root_for_domain(domain: &str) -> String {
-    root_url: String,
+    match domain {
-    post_id: Option<String>,
+        "simpcity.cr" | "simpcity.su" => format!("https://{}", domain),
-    client: HttpClient,
+        "nudostar.com/forum" => "https://nudostar.com/forum".to_string(),
        "allthefallen.moe/forum" => "https://allthefallen.moe/forum".to_string(),
        "celebforum.to" => "https://celebforum.to".to_string(),
        "forums.socialmediagirls.com" => "https://forums.socialmediagirls.com".to_string(),
        _ => format!("https://{}", domain),
    }
 }
 /// Build a cookie header string from a HashMap
 fn cookie_header(cookies: &HashMap<String, String>) -> String {
    cookies.iter()
        .map(|(k, v)| format!("{}={}", k, v))
        .collect::<Vec<_>>()
        .join("; ")
 }
 /// Extract media URLs from HTML content.
 ///
 /// Finds all media by matching multiple patterns:
 /// - `<img class="bbImage" src="...">` — inline images
 /// - `<video src="...">` — inline videos
 /// - `<a href=".../attachments/...">` — file attachments
 /// - `<iframe src="...">` — embedded media
 /// - `loadMedia(this, '...')` — lazy-loaded embeds
 fn extract_media_from_html(html: &str, root_url: &str) -> Vec<String> {
    let mut urls = Vec::new();
    // 1. bbImage: <img ... class="bbImage" ... src="URL"> or data-url="URL"
    let img_re = Regex::new(r#"<img[^>]+class="bbImage[^"]*"[^>]*(?:data-url|src)="([^"]+)"|<img[^>]*(?:data-url|src)="([^"]+)"[^>]*class="bbImage[^"]*""#).unwrap();
    for caps in img_re.captures_iter(html) {
        if let Some(m) = caps.get(1).or(caps.get(2)) {
            urls.push(m.as_str().to_string());
        }
    }
    // 2. Video src
    let video_re = Regex::new(r#"<video[^>]+src="([^"]+)"#).unwrap();
    for caps in video_re.captures_iter(html) {
        if let Some(m) = caps.get(1) {
            urls.push(m.as_str().to_string());
        }
    }
    // 3. Attachments
    let attach_re = Regex::new(r#"<a[^>]+href="([^"]+/attachments/[^"]+)"#).unwrap();
    for caps in attach_re.captures_iter(html) {
        if let Some(m) = caps.get(1) {
            urls.push(m.as_str().to_string());
        }
    }
    // 4. Iframes
    let iframe_re = Regex::new(r#"<iframe[^>]+src="([^"]+)"#).unwrap();
    for caps in iframe_re.captures_iter(html) {
        if let Some(m) = caps.get(1) {
            urls.push(m.as_str().to_string());
        }
    }
    // 5. Lazy-loaded media
    let lazy_re = Regex::new(r#"loadMedia\(this,\s*'([^']+)'"#).unwrap();
    for caps in lazy_re.captures_iter(html) {
        if let Some(m) = caps.get(1) {
            urls.push(m.as_str().to_string());
        }
    }
    // Normalize and filter
    urls.into_iter()
        .filter_map(|u| normalize_url(&u, root_url))
        .collect()
 }
 /// Normalize a URL: resolve relative paths, upgrade protocol, skip junk
 fn normalize_url(url: &str, root_url: &str) -> Option<String> {
    // Skip smilies, avatars, style assets, base64 data URIs
    if url.contains("/styles/") || url.contains("/smilies/")
        || url.contains("data/avatars/") || url.contains("data:image")
        || url.contains("/icons/") || url.contains("reaction-sprite")
    {
        return None;
    }
    let mut u = url.to_string();
    if u.starts_with("//") {
        u = format!("https:{}", u);
    } else if u.starts_with('/') {
        u = format!("{}{}", root_url, u);
    }
    if !u.starts_with("http://") && !u.starts_with("https://") {
        return None;
    }
    // Upgrade .md.jpg thumbnails to full size (simpcity CDN pattern)
    u = upgrade_thumbnail(&u);
    Some(u)
 }
 /// Upgrade simpcity CDN thumbnail URLs to full-size
 /// e.g. image.md.jpg -> image.jpg
 fn upgrade_thumbnail(url: &str) -> String {
    let re = Regex::new(r"\.md\.(jpg|jpeg|png|gif|webp)(\?|$)").unwrap();
    re.replace(url, ".$1$2").into_owned()
 }
 /// Decode common HTML entities
 fn decode_html_entities(s: &str) -> String {
    s.replace("&nbsp;", " ")
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#39;", "'")
        .replace("&#x27;", "'")
 }
 /// Extract the thread title from the page HTML
 fn extract_thread_title(html: &str) -> Option<String> {
    let re = Regex::new(r#"<h1[^>]*class="[^"]*p-title-value[^"]*"[^>]*>(.*?)</h1>"#).ok()?;
    re.captures(html)
        .and_then(|c| c.get(1))
        .map(|m| {
            // Strip inner tags like <span>
            let tag_re = Regex::new(r"<[^>]+>").unwrap();
            let title = tag_re.replace_all(m.as_str().trim(), "").trim().to_string();
            // Decode HTML entities
            decode_html_entities(&title)
        })
 }
 /// Find the next page URL from pagination
 fn find_next_page(html: &str) -> Option<String> {
    // Handle both attribute orderings: class before href, or href before class
    let re = Regex::new(
        r#"<a[^>]*href="([^"]+)"[^>]*class="[^"]*pageNav-jump--next[^"]*"|<a[^>]*class="[^"]*pageNav-jump--next[^"]*"[^>]*href="([^"]+)""#
    ).ok()?;
    re.captures(html).and_then(|c| {
        c.get(1).or(c.get(2))
    }).map(|m| {
        m.as_str().replace("&amp;", "&")
    })
 }
 /// Extract individual post blocks from the page HTML.
 ///
 /// XenForo posts are `<article>` elements with `data-content="post-NNNNN"`.
 /// We split the HTML at each post boundary and extract the content between them.
 fn extract_posts(html: &str) -> Vec<(String, String)> {
    let boundary_re = Regex::new(r#"data-content="post-(\d+)""#)
        .expect("Failed to compile post boundary regex");
    let matches: Vec<_> = boundary_re.captures_iter(html)
        .filter_map(|c| {
            let full = c.get(0)?;
            let id = c.get(1)?.as_str().to_string();
            Some((id, full.start()))
        })
        .collect();
    if matches.is_empty() {
        return Vec::new();
    }
    let mut posts = Vec::new();
    for i in 0..matches.len() {
        let (ref id, start) = matches[i];
        let end = if i + 1 < matches.len() {
            matches[i + 1].1
        } else {
            html.len()
        };
        let post_html = &html[start..end];
        posts.push((id.clone(), post_html.to_string()));
    }
    posts
 }
 // ============================================================================
 // XenforoThreadExtractor
 // ============================================================================
 pub struct XenforoThreadExtractor {
    pattern: Regex,
    category: String,
    subcategory: String,
    root_url: String,
    domain: Option<String>,
    thread_path: Option<String>,
    thread_id: Option<String>,
    page: Option<i64>,
-    client: HttpClient,
+    cookies: HashMap<String, String>,
 }
-pub struct XenforoForumExtractor {
+impl XenforoThreadExtractor {
    pub fn new() -> Result<Self, ExtractorError> {
        let pattern = Regex::new(
            r"(?:https?://)?(?:www\.)?(simpcity\.cr|simpcity\.su|nudostar\.com/forum|allthefallen\.moe/forum|celebforum\.to|titsintops\.com/phpBB2|forums\.socialmediagirls\.com)(/(?:index\.php\?)?threads/(?:[^/?#]+\.)?(\d+))(?:/page-(\d+))?"
        ).map_err(|e| ExtractorError::ConfigError(e.to_string()))?;
        Ok(Self {
            pattern,
            category: "xenforo".to_string(),
            subcategory: "thread".to_string(),
            root_url: "https://simpcity.cr".to_string(),
            domain: None,
            thread_path: None,
            thread_id: None,
            page: None,
            cookies: HashMap::new(),
        })
    }
    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
        reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(std::time::Duration::from_secs(30))
            .redirect(reqwest::redirect::Policy::limited(10))
            .build()
            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
    }
    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
        let client = self.create_client()?;
        let mut request = client.get(url);
        if !self.cookies.is_empty() {
            request = request.header("Cookie", cookie_header(&self.cookies));
        }
        let response = request.send().await
            .map_err(ExtractorError::RequestFailed)?;
        let status = response.status();
        if status.as_u16() == 403 || status.as_u16() == 401 {
            return Err(ExtractorError::ConfigError(format!(
                "Authentication required (HTTP {}). Set cookies in config: \
                 extractor.xenforo.cookies.xf_user = \"your_cookie_value\"",
                status.as_u16()
            )));
        }
        if !status.is_success() {
            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
        }
        response.text().await
            .map_err(|e| ExtractorError::ParseError(e.to_string()))
    }
    async fn extract_thread(&self) -> Result<Vec<Message>, ExtractorError> {
        let thread_path = self.thread_path.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("thread_path not set".to_string()))?;
        let mut messages = Vec::new();
        let mut seen_urls: HashSet<String> = HashSet::new();
        // Build the starting URL
        let start_url = if let Some(page) = self.page {
            format!("{}{}/page-{}", self.root_url, thread_path, page)
        } else {
            format!("{}{}/", self.root_url, thread_path)
        };
        let mut current_url = Some(start_url);
        let mut page_num = self.page.unwrap_or(1);
        let mut total_media = 0;
        while let Some(url) = current_url.take() {
            log::info!("Fetching page {} of thread: {}", page_num, url);
            let html = self.fetch_page(&url).await?;
            // Extract thread title on first page for the directory message
            if page_num <= 1 || (self.page.is_some() && page_num == self.page.unwrap()) {
                let title = extract_thread_title(&html)
                    .unwrap_or_else(|| "unknown".to_string());
                log::info!("Thread title: {}", title);
                let mut dir_msg = Message::directory("");
                dir_msg.metadata.insert("thread_id".to_string(),
                    serde_json::json!(self.thread_id.as_deref().unwrap_or("0")));
                dir_msg.metadata.insert("title".to_string(), serde_json::json!(title));
                dir_msg.metadata.insert("category".to_string(), serde_json::json!("xenforo"));
                messages.push(dir_msg);
            }
            // Extract posts and their media
            let posts = extract_posts(&html);
            log::info!("Found {} posts on page {}", posts.len(), page_num);
            for (post_id, post_html) in &posts {
                let media_urls = extract_media_from_html(post_html, &self.root_url);
                for media_url in media_urls {
                    if seen_urls.contains(&media_url) {
                        continue;
                    }
                    seen_urls.insert(media_url.clone());
                    let msg = Message::url(&media_url)
                        .with_metadata("post_id", serde_json::json!(post_id))
                        .with_metadata("thread_id",
                            serde_json::json!(self.thread_id.as_deref().unwrap_or("0")));
                    // Try to extract a filename from the URL
                    if let Some(filename) = url_filename(&media_url) {
                        messages.push(msg.with_filename(filename));
                    } else {
                        messages.push(msg);
                    }
                    total_media += 1;
                }
            }
            // If no posts found at all, try a simpler fallback: just extract all media from the page
            if posts.is_empty() {
                log::warn!("No post blocks found on page {} — trying full-page scan", page_num);
                let media_urls = extract_media_from_html(&html, &self.root_url);
                for media_url in media_urls {
                    if seen_urls.contains(&media_url) {
                        continue;
                    }
                    seen_urls.insert(media_url.clone());
                    let msg = Message::url(&media_url);
                    if let Some(filename) = url_filename(&media_url) {
                        messages.push(msg.with_filename(filename));
                    } else {
                        messages.push(msg);
                    }
                    total_media += 1;
                }
            }
            // Check for next page
            if let Some(next_href) = find_next_page(&html) {
                let next_url = if next_href.starts_with("http") {
                    next_href
                } else {
                    format!("{}{}", self.root_url, next_href)
                };
                current_url = Some(next_url);
                page_num += 1;
            }
        }
        log::info!("Extracted {} media URLs across {} pages", total_media, page_num);
        Ok(messages)
    }
 }
 /// Try to extract a usable filename from a URL
 fn url_filename(url: &str) -> Option<String> {
    let path = url::Url::parse(url).ok()?.path().to_string();
    let segment = path.rsplit('/').next()?;
    if segment.is_empty() || !segment.contains('.') {
        return None;
    }
    // URL-decode the filename
    let decoded = urlencoding::decode(segment).ok()?;
    Some(decoded.into_owned())
 }
 impl Default for XenforoThreadExtractor {
    fn default() -> Self {
        Self::new().expect("Failed to create XenforoThreadExtractor")
    }
 }
 impl Clone for XenforoThreadExtractor {
    fn clone(&self) -> Self {
        Self {
            pattern: self.pattern.clone(),
            category: self.category.clone(),
            subcategory: self.subcategory.clone(),
            root_url: self.root_url.clone(),
            domain: self.domain.clone(),
            thread_path: self.thread_path.clone(),
            thread_id: self.thread_id.clone(),
            page: self.page,
            cookies: self.cookies.clone(),
        }
    }
 }
 #[async_trait]
 impl Extractor for XenforoThreadExtractor {
    fn category(&self) -> &str { &self.category }
    fn subcategory(&self) -> &str { &self.subcategory }
    fn root(&self) -> &str { &self.root_url }
    fn pattern(&self) -> &Regex { &self.pattern }
    fn clone_extractor(&self) -> Box<dyn Extractor> { Box::new(self.clone()) }
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
            if let Some(domain) = captures.get(1) {
                self.domain = Some(domain.as_str().to_string());
                self.root_url = root_for_domain(domain.as_str());
            }
            if let Some(path) = captures.get(2) {
                self.thread_path = Some(path.as_str().to_string());
            }
            if let Some(id) = captures.get(3) {
                self.thread_id = Some(id.as_str().to_string());
            }
            if let Some(page) = captures.get(4) {
                self.page = Some(page.as_str().parse::<i64>().unwrap_or(1));
            }
        }
        log::info!("Initialized XenForo thread extractor: path={:?} id={:?} page={:?}",
            self.thread_path, self.thread_id, self.page);
        Ok(())
    }
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        self.extract_thread().await
    }
    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
        self.cookies = cookies;
        log::debug!("XenForo cookies set: {} entries", self.cookies.len());
    }
 }
 // ============================================================================
 // XenforoPostExtractor
 // ============================================================================
 pub struct XenforoPostExtractor {
    pattern: Regex,
    category: String,
    subcategory: String,
    root_url: String,
-    forum_id: Option<String>,
+    domain: Option<String>,
-    client: HttpClient,
+    post_id: Option<String>,
    post_url_prefix: Option<String>,
    cookies: HashMap<String, String>,
 }
 impl XenforoPostExtractor {
@@ -48,27 +470,79 @@ impl XenforoPostExtractor {
            category: "xenforo".to_string(),
            subcategory: "post".to_string(),
            root_url: "https://simpcity.cr".to_string(),
            domain: None,
            post_id: None,
-            client: HttpClient::builder()
+            post_url_prefix: None,
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+            cookies: HashMap::new(),
                .build()
                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
        })
    }
    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
        reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(std::time::Duration::from_secs(30))
            .redirect(reqwest::redirect::Policy::limited(10))
            .build()
            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
    }
    async fn extract_post(&self) -> Result<Vec<Message>, ExtractorError> {
        let post_id = self.post_id.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("post_id not set".to_string()))?;
        log::info!("Extracting XenForo post: {}", post_id);
-        let mut messages = Vec::new();
+        // Fetch the post page
        let url = format!("{}/posts/{}/", self.root_url, post_id);
        let client = self.create_client()?;
        let mut request = client.get(&url);
        if !self.cookies.is_empty() {
            request = request.header("Cookie", cookie_header(&self.cookies));
        }
        let response = request.send().await
            .map_err(ExtractorError::RequestFailed)?;
        let status = response.status();
        if !status.is_success() {
            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
        }
        let html = response.text().await
            .map_err(|e| ExtractorError::ParseError(e.to_string()))?;
        let mut messages = Vec::new();
        let mut seen_urls: HashSet<String> = HashSet::new();
        // Directory message
        let mut dir_msg = Message::directory("");
-        dir_msg.metadata.insert("post_id".to_string(), serde_json::json!(post_id.parse::<i64>().unwrap_or(0)));
+        dir_msg.metadata.insert("post_id".to_string(), serde_json::json!(post_id));
        messages.push(dir_msg);
-        log::info!("Found XenForo post {}", post_id);
+        // Try to find just the target post
        let posts = extract_posts(&html);
        let target_html = posts.iter()
            .find(|(id, _)| id == post_id)
            .map(|(_, content)| content.as_str())
            .unwrap_or(&html);
        let media_urls = extract_media_from_html(target_html, &self.root_url);
        for media_url in media_urls {
            if seen_urls.contains(&media_url) {
                continue;
            }
            seen_urls.insert(media_url.clone());
            let msg = Message::url(&media_url)
                .with_metadata("post_id", serde_json::json!(post_id));
            if let Some(filename) = url_filename(&media_url) {
                messages.push(msg.with_filename(filename));
            } else {
                messages.push(msg);
            }
        }
        log::info!("Extracted {} media URLs from post {}", messages.len() - 1, post_id);
        Ok(messages)
    }
 }
@@ -86,11 +560,10 @@ impl Clone for XenforoPostExtractor {
            category: self.category.clone(),
            subcategory: self.subcategory.clone(),
            root_url: self.root_url.clone(),
            domain: self.domain.clone(),
            post_id: self.post_id.clone(),
-            client: HttpClient::builder()
+            post_url_prefix: self.post_url_prefix.clone(),
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+            cookies: self.cookies.clone(),
                .build()
                .expect("Failed to create HTTP client"),
        }
    }
 }
@@ -105,7 +578,14 @@ impl Extractor for XenforoPostExtractor {
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
-            if let Some(id) = captures.get(2) {
+            if let Some(domain) = captures.get(1) {
                self.domain = Some(domain.as_str().to_string());
                self.root_url = root_for_domain(domain.as_str());
            }
            if let Some(prefix) = captures.get(2) {
                self.post_url_prefix = Some(prefix.as_str().to_string());
            }
            if let Some(id) = captures.get(3) {
                self.post_id = Some(id.as_str().to_string());
            }
        }
@@ -115,91 +595,24 @@ impl Extractor for XenforoPostExtractor {
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        self.extract_post().await
    }
 }
-impl XenforoThreadExtractor {
+    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
-    pub fn new() -> Result<Self, ExtractorError> {
+        self.cookies = cookies;
        let pattern = Regex::new(
            r"(?:https?://)?(?:www\.)?(simpcity\.cr|simpcity\.su|nudostar\.com/forum|allthefallen\.moe/forum|celebforum\.to|titsintops\.com/phpBB2|forums\.socialmediagirls\.com)(/(?:index\.php\?)?threads/(?:[^/?#]+\.)?(\d+))(?:/page-(\d+))?"
        ).map_err(|e| ExtractorError::ConfigError(e.to_string()))?;
        Ok(Self {
            pattern,
            category: "xenforo".to_string(),
            subcategory: "thread".to_string(),
            root_url: "https://simpcity.cr".to_string(),
            thread_id: None,
            page: None,
            client: HttpClient::builder()
                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
                .build()
                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
        })
    }
    async fn extract_thread(&self) -> Result<Vec<Message>, ExtractorError> {
        let thread_id = self.thread_id.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("thread_id not set".to_string()))?;
        log::info!("Extracting XenForo thread: {}", thread_id);
        let mut messages = Vec::new();
        let mut dir_msg = Message::directory("");
        dir_msg.metadata.insert("thread_id".to_string(), serde_json::json!(thread_id.parse::<i64>().unwrap_or(0)));
        messages.push(dir_msg);
        log::info!("Found XenForo thread {}", thread_id);
        Ok(messages)
    }
 }
-impl Default for XenforoThreadExtractor {
+// ============================================================================
-    fn default() -> Self {
+// XenforoForumExtractor
-        Self::new().expect("Failed to create XenforoThreadExtractor")
+// ============================================================================
    }
 }
-impl Clone for XenforoThreadExtractor {
+pub struct XenforoForumExtractor {
-    fn clone(&self) -> Self {
+    pattern: Regex,
-        Self {
+    category: String,
-            pattern: self.pattern.clone(),
+    subcategory: String,
-            category: self.category.clone(),
+    root_url: String,
-            subcategory: self.subcategory.clone(),
+    domain: Option<String>,
-            root_url: self.root_url.clone(),
+    forum_path: Option<String>,
-            thread_id: self.thread_id.clone(),
+    cookies: HashMap<String, String>,
            page: self.page.clone(),
            client: HttpClient::builder()
                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
                .build()
                .expect("Failed to create HTTP client"),
        }
    }
 }
 #[async_trait]
 impl Extractor for XenforoThreadExtractor {
    fn category(&self) -> &str { &self.category }
    fn subcategory(&self) -> &str { &self.subcategory }
    fn root(&self) -> &str { &self.root_url }
    fn pattern(&self) -> &Regex { &self.pattern }
    fn clone_extractor(&self) -> Box<dyn Extractor> { Box::new(self.clone()) }
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
            if let Some(id) = captures.get(2) {
                self.thread_id = Some(id.as_str().to_string());
            }
            if let Some(page) = captures.get(3) {
                self.page = Some(page.as_str().parse::<i64>().unwrap_or(1));
            }
        }
        Ok(())
    }
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        self.extract_thread().await
    }
 }
 impl XenforoForumExtractor {
@@ -213,11 +626,9 @@ impl XenforoForumExtractor {
            category: "xenforo".to_string(),
            subcategory: "forum".to_string(),
            root_url: "https://simpcity.cr".to_string(),
-            forum_id: None,
+            domain: None,
-            client: HttpClient::builder()
+            forum_path: None,
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+            cookies: HashMap::new(),
                .build()
                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
        })
    }
 }
@@ -235,11 +646,9 @@ impl Clone for XenforoForumExtractor {
            category: self.category.clone(),
            subcategory: self.subcategory.clone(),
            root_url: self.root_url.clone(),
-            forum_id: self.forum_id.clone(),
+            domain: self.domain.clone(),
-            client: HttpClient::builder()
+            forum_path: self.forum_path.clone(),
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+            cookies: self.cookies.clone(),
                .build()
                .expect("Failed to create HTTP client"),
        }
    }
 }
@@ -254,17 +663,25 @@ impl Extractor for XenforoForumExtractor {
    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
-            if let Some(id) = captures.get(1) {
+            if let Some(domain) = captures.get(1) {
-                self.forum_id = Some(id.as_str().to_string());
+                self.domain = Some(domain.as_str().to_string());
                self.root_url = root_for_domain(domain.as_str());
            }
            if let Some(path) = captures.get(2) {
                self.forum_path = Some(path.as_str().to_string());
            }
        }
        Ok(())
    }
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
-        log::info!("Extracting XenForo forum");
+        log::info!("XenForo forum extractor not yet implemented");
        Ok(vec![])
    }
    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
        self.cookies = cookies;
    }
 }
 #[cfg(test)]
@@ -282,4 +699,116 @@ mod tests {
        let extractor = XenforoThreadExtractor::new().unwrap();
        assert!(extractor.pattern.is_match("https://simpcity.cr/threads/TITLE.12345/"));
    }
    #[test]
    fn test_thread_pattern_with_page() {
        let extractor = XenforoThreadExtractor::new().unwrap();
        let url = "https://simpcity.cr/threads/dimeestevez.39618/page-2";
        assert!(extractor.pattern.is_match(url));
        let caps = extractor.pattern.captures(url).unwrap();
        assert_eq!(caps.get(1).unwrap().as_str(), "simpcity.cr");
        assert_eq!(caps.get(2).unwrap().as_str(), "/threads/dimeestevez.39618");
        assert_eq!(caps.get(3).unwrap().as_str(), "39618");
        assert_eq!(caps.get(4).unwrap().as_str(), "2");
    }
    #[test]
    fn test_extract_media_from_html() {
        let html = r#"
            <img src="https://example.com/image1.jpg" class="bbImage " loading="lazy" />
            <video src="https://example.com/video.mp4"></video>
            <a href="https://example.com/attachments/file.zip">Download</a>
        "#;
        let urls = extract_media_from_html(html, "https://simpcity.cr");
        assert_eq!(urls.len(), 3);
        assert!(urls.contains(&"https://example.com/image1.jpg".to_string()));
        assert!(urls.contains(&"https://example.com/video.mp4".to_string()));
        assert!(urls.contains(&"https://example.com/attachments/file.zip".to_string()));
    }
    #[test]
    fn test_extract_media_skips_smilies() {
        let html = r#"
            <img src="https://simpcity.cr/styles/emoji.png" class="bbImage" />
            <img src="https://example.com/real-image.jpg" class="bbImage " loading="lazy" />
        "#;
        let urls = extract_media_from_html(html, "https://simpcity.cr");
        assert_eq!(urls.len(), 1);
        assert_eq!(urls[0], "https://example.com/real-image.jpg");
    }
    #[test]
    fn test_upgrade_thumbnail() {
        assert_eq!(
            upgrade_thumbnail("https://simp1.selti-delivery.ru/images/test.md.jpg"),
            "https://simp1.selti-delivery.ru/images/test.jpg"
        );
        assert_eq!(
            upgrade_thumbnail("https://example.com/image.jpg"),
            "https://example.com/image.jpg"
        );
    }
    #[test]
    fn test_extract_posts_from_real_html() {
        let html = r#"
            <article class="message" data-content="post-111" id="js-post-111">
                <article class="message-body js-selectToQuote">
                    <img src="https://cdn.example.com/img1.jpg" class="bbImage " />
                </article>
            </article>
            <article class="message" data-content="post-222" id="js-post-222">
                <article class="message-body js-selectToQuote">
                    <img src="https://cdn.example.com/img2.jpg" class="bbImage " />
                </article>
            </article>
        "#;
        let posts = extract_posts(html);
        assert_eq!(posts.len(), 2);
        assert_eq!(posts[0].0, "111");
        assert_eq!(posts[1].0, "222");
        // Each post should yield its own image
        let urls1 = extract_media_from_html(&posts[0].1, "https://simpcity.cr");
        assert_eq!(urls1.len(), 1);
        assert!(urls1[0].contains("img1.jpg"));
        let urls2 = extract_media_from_html(&posts[1].1, "https://simpcity.cr");
        assert_eq!(urls2.len(), 1);
        assert!(urls2[0].contains("img2.jpg"));
    }
    #[test]
    fn test_find_next_page() {
        let html = r#"<a href="/threads/test.123/page-2" class="pageNav-jump pageNav-jump--next">Next</a>"#;
        assert_eq!(find_next_page(html), Some("/threads/test.123/page-2".to_string()));
    }
    #[test]
    fn test_find_next_page_none() {
        let html = r#"<div>no pagination here</div>"#;
        assert_eq!(find_next_page(html), None);
    }
    #[test]
    fn test_extract_thread_title() {
        let html = r#"<h1 class="p-title-value">Thread Title Here</h1>"#;
        assert_eq!(extract_thread_title(html), Some("Thread Title Here".to_string()));
    }
    #[test]
    fn test_url_filename() {
        assert_eq!(
            url_filename("https://example.com/path/to/image.jpg"),
            Some("image.jpg".to_string())
        );
        assert_eq!(url_filename("https://example.com/"), None);
    }
    #[test]
    fn test_root_for_domain() {
        assert_eq!(root_for_domain("simpcity.cr"), "https://simpcity.cr");
        assert_eq!(root_for_domain("nudostar.com/forum"), "https://nudostar.com/forum");
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -86,6 +86,17 @@ fn write_page_dump(url: &str, items: &[Message]) {
    let _ = std::fs::write(path, out);
 }
 /// Extract a usable filename from a URL path
 fn url_to_filename(url: &str) -> Option<String> {
    let parsed = url::Url::parse(url).ok()?;
    let path = parsed.path();
    let segment = path.rsplit('/').next()?;
    if segment.is_empty() || !segment.contains('.') {
        return None;
    }
    urlencoding::decode(segment).ok().map(|s| s.into_owned())
 }
 fn render_filename(pattern: Option<&str>, index: usize, item: &Message) -> String {
    if let Some(template) = pattern {
        let ext = item.extension().unwrap_or_else(|| "bin".to_string());
@@ -1092,9 +1103,14 @@ fn main() {
            }
        }
    } else if let Some(ref browser) = args.cookies_from_browser {
-        match gallery_dl::extract_browser_cookies(browser, None) {
+        // Extract the domain from input URLs to filter browser cookies
        let domain_filter: Option<String> = args.urls.first()
            .and_then(|u| url::Url::parse(u).ok())
            .and_then(|u| u.host_str().map(|h| h.to_string()));
        match gallery_dl::extract_browser_cookies(browser, domain_filter.as_deref()) {
            Ok(c) => {
-                log::info!("Extracted {} cookies from browser '{}'", c.len(), browser);
+                log::info!("Extracted {} cookies from browser '{}' (domain filter: {:?})", c.len(), browser, domain_filter);
                Some(c)
            }
            Err(e) => {
@@ -1644,36 +1660,66 @@ fn main() {
                let mut metadata_by_url: HashMap<String, HashMap<String, serde_json::Value>> =
                    HashMap::new();
-                // Determine download directory: CLI arg > config > default
+                // Determine base download directory: CLI arg > config > default (Pictures/gallery-dl)
-                let download_dir = args.directory.clone()
+                let base_dir = args.directory.clone()
                    .or_else(|| args.destination.clone())
                    .or_else(|| config.downloader.directory.clone())
-                    .unwrap_or_else(|| PathBuf::from("."));
+                    .unwrap_or_else(|| {
-                
+                        dirs::picture_dir()
                            .unwrap_or_else(|| PathBuf::from("."))
                            .join("gallery-dl")
                    });
                // Extract directory metadata from the first Directory message
                // to build subdirectory path: {category}/{title}/
                let mut dir_category = String::new();
                let mut dir_title = String::new();
                for item in items.iter() {
                    if matches!(item.kind, MessageKind::Directory) {
                        if let Some(cat) = item.metadata.get("category") {
                            dir_category = cat.as_str().unwrap_or("").to_string();
                        }
                        if let Some(title) = item.metadata.get("title") {
                            dir_title = title.as_str().unwrap_or("").to_string();
                        }
                        break;
                    }
                }
                // Build the download directory with subdirectories
                let download_dir = if !dir_category.is_empty() || !dir_title.is_empty() {
                    let cat = if dir_category.is_empty() { "other".to_string() } else {
                        sanitize_filename(&dir_category, args.restrict_filenames, true)
                    };
                    let title = if dir_title.is_empty() { "untitled".to_string() } else {
                        sanitize_filename(&dir_title, args.restrict_filenames, true)
                    };
                    base_dir.join(cat).join(title)
                } else {
                    base_dir.clone()
                };
                for (j, item) in items.iter().enumerate() {
                    if !matches!(item.kind, MessageKind::Url | MessageKind::Queue) {
                        println!("  [{}] Skipping non-download message ({:?})", j + 1, item.kind);
                        continue;
                    }
-                    let mut template_pattern = args
+                    // Use the extractor-provided filename, or derive from URL, or fall back to template
-                        .rename_to
+                    let filename = if let Some(ref f) = item.filename {
-                        .as_deref()
+                        f.clone()
-                        .or(args.rename.as_deref())
+                    } else if let Some(f) = url_to_filename(&item.url) {
-                        .or(args.filename.as_deref())
+                        f
-                        .or(config.downloader.filename.as_deref());
+                    } else {
-
+                        let template_pattern = args
-                    if template_pattern.is_none() {
+                            .rename_to
-                        template_pattern = Some("{num}.{ext}");
+                            .as_deref()
-                    }
+                            .or(args.rename.as_deref())
-
+                            .or(args.filename.as_deref())
-                    // Create a simple destination path based on the URL
+                            .or(config.downloader.filename.as_deref())
-                    // In a full implementation, this would use path templates
+                            .unwrap_or("{num}.{ext}");
-                    let filename = render_filename(
+                        render_filename(Some(template_pattern), j, item)
-                        template_pattern,
+                    };
                        j,
                        item,
                    );
                    let filename = sanitize_filename(
                        &filename,
                        args.restrict_filenames,