feat: add Snapchat extractor, improve browser auth and XenForo support

- Add new Snapchat story extractor with spotlight and user story support - Expand browser cookie extraction to support Zen Browser and multi-platform profiles - Significantly enhance XenForo extractor with gallery, media, and attachment support - Add APPDATA-based profile discovery for Windows browsers - Update main.rs with new extractor wiring and improved CLI handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 16:29:16 +01:00
parent e4dae6de12
commit ca342ee3a3
6 changed files with 1441 additions and 243 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 archive/
+.claude/

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/src/auth/browser.rs
+++ b/src/auth/browser.rs
@@ -1,8 +1,15 @@
-//! Browser cookie extraction for Firefox and Chrome
+//! Browser cookie extraction for Firefox-based browsers and Chrome
 //!
 //! This module provides functionality to extract cookies directly from
 //! browser SQLite cookie databases, enabling seamless authentication
 //! without manual cookie file exports.
+//!
+//! Supported browsers:
+//! - Firefox (all platforms)
+//! - Zen Browser (Firefox-based)
+//! - LibreWolf (Firefox-based)
+//! - Waterfox (Firefox-based)
+//! - Chrome / Chromium (all platforms, plaintext cookies only)

 use rusqlite::Connection;
 use std::collections::HashMap;
@@ -60,6 +67,17 @@ fn get_home_dir() -> Result<PathBuf, BrowserError> {
        .ok_or_else(|| BrowserError::Other("Could not determine home directory".to_string()))
 }

+/// Get the APPDATA directory (Windows only, falls back to home)
+fn get_appdata_dir() -> Result<PathBuf, BrowserError> {
+    // Try APPDATA env var first (Windows)
+    if let Ok(appdata) = std::env::var("APPDATA") {
+        return Ok(PathBuf::from(appdata));
+    }
+    // Fallback: use dirs crate
+    dirs::config_dir()
+        .ok_or_else(|| BrowserError::Other("Could not determine config directory".to_string()))
+}
+
 /// Copy a file to a temporary location to avoid locking issues
 fn copy_to_temp<P: AsRef<std::path::Path>>(path: P) -> Result<tempfile::TempPath, BrowserError> {
    let temp_file = tempfile::NamedTempFile::new()?;
@@ -67,32 +85,26 @@ fn copy_to_temp<P: AsRef<std::path::Path>>(path: P) -> Result<tempfile::TempPath
    Ok(temp_file.into_temp_path())
 }

-/// Find the Firefox profile directory
+/// Find a profile directory from a list of candidate parent directories.
 ///
-/// Searches in ~/.mozilla/firefox/ for profiles
-pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
-    let home = get_home_dir()?;
-    let firefox_dir = home.join(".mozilla").join("firefox");
+/// Searches each candidate for subdirectories containing `cookies.sqlite`.
+/// Prefers `default-release` profiles, then `default` profiles.
+fn find_profile_in_dirs(candidate_dirs: &[PathBuf]) -> Result<PathBuf, BrowserError> {
+    for dir in candidate_dirs {
+        if !dir.exists() {
+            continue;
+        }

-    if !firefox_dir.exists() {
-        return Err(BrowserError::ProfileNotFound(format!(
-            "Firefox directory not found: {:?}",
-            firefox_dir
-        )));
-    }
+        let entries = match fs::read_dir(dir) {
+            Ok(e) => e,
+            Err(_) => continue,
+        };

-    // Read directory entries
-    let entries = fs::read_dir(&firefox_dir).map_err(|e| BrowserError::Io(e))?;
+        let mut profile_dirs: Vec<(String, PathBuf)> = Vec::new();

-    let mut profile_dirs: Vec<(String, PathBuf)> = Vec::new();
-
-    for entry in entries.flatten() {
-        let path = entry.path();
-        if path.is_dir() {
-            // Check if this is a profile directory (contains cookies.sqlite)
-            let cookies_path = path.join("cookies.sqlite");
-            if cookies_path.exists() {
-                // Get the profile name from the directory name
+        for entry in entries.flatten() {
+            let path = entry.path();
+            if path.is_dir() && path.join("cookies.sqlite").exists() {
                let name = path
                    .file_name()
                    .and_then(|n| n.to_str())
@@ -101,46 +113,112 @@ pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
                profile_dirs.push((name, path));
            }
        }
-    }

-    if profile_dirs.is_empty() {
-        return Err(BrowserError::ProfileNotFound(
-            "No Firefox profiles with cookies found".to_string(),
-        ));
-    }
-
-    // Prefer default-release profile, otherwise use first available
-    profile_dirs.sort_by(|a, b| {
-        let a_default = a.0.contains("default-release");
-        let b_default = b.0.contains("default-release");
-        match (a_default, b_default) {
-            (true, false) => std::cmp::Ordering::Less,
-            (false, true) => std::cmp::Ordering::Greater,
-            _ => std::cmp::Ordering::Equal,
+        if profile_dirs.is_empty() {
+            continue;
        }
-    });

-    let selected = &profile_dirs[0].1;
-    log::info!("Found Firefox profile: {:?}", selected);
-    Ok(selected.clone())
+        // Sort: prefer default-release > default > anything else
+        profile_dirs.sort_by(|a, b| {
+            fn rank(name: &str) -> u8 {
+                if name.contains("default-release") { 0 }
+                else if name.contains("default") { 1 }
+                else { 2 }
+            }
+            rank(&a.0).cmp(&rank(&b.0))
+        });
+
+        let selected = &profile_dirs[0].1;
+        log::info!("Found browser profile: {:?}", selected);
+        return Ok(selected.clone());
+    }
+
+    Err(BrowserError::ProfileNotFound(format!(
+        "No profiles with cookies found. Searched: {:?}",
+        candidate_dirs
+    )))
 }

-/// Extract cookies from Firefox profile
-///
-/// # Arguments
-/// * `domain` - Optional domain to filter cookies (e.g., ".twitter.com")
-///
-/// Returns a HashMap of cookie name -> value
-pub fn extract_firefox_cookies(
+/// Get candidate profile directories for Firefox
+fn firefox_profile_dirs() -> Vec<PathBuf> {
+    let mut dirs = Vec::new();
+    if let Ok(home) = get_home_dir() {
+        // Windows
+        if let Ok(appdata) = get_appdata_dir() {
+            dirs.push(appdata.join("Mozilla").join("Firefox").join("Profiles"));
+        }
+        // Linux
+        dirs.push(home.join(".mozilla").join("firefox"));
+        // macOS
+        dirs.push(home.join("Library").join("Application Support").join("Firefox").join("Profiles"));
+    }
+    dirs
+}
+
+/// Get candidate profile directories for Zen Browser
+fn zen_profile_dirs() -> Vec<PathBuf> {
+    let mut dirs = Vec::new();
+    if let Ok(home) = get_home_dir() {
+        // Windows
+        if let Ok(appdata) = get_appdata_dir() {
+            dirs.push(appdata.join("zen").join("Profiles"));
+        }
+        // Linux
+        dirs.push(home.join(".zen"));
+        // macOS
+        dirs.push(home.join("Library").join("Application Support").join("zen").join("Profiles"));
+    }
+    dirs
+}
+
+/// Get candidate profile directories for LibreWolf
+fn librewolf_profile_dirs() -> Vec<PathBuf> {
+    let mut dirs = Vec::new();
+    if let Ok(home) = get_home_dir() {
+        if let Ok(appdata) = get_appdata_dir() {
+            dirs.push(appdata.join("librewolf").join("Profiles"));
+        }
+        dirs.push(home.join(".librewolf"));
+        dirs.push(home.join("Library").join("Application Support").join("librewolf").join("Profiles"));
+    }
+    dirs
+}
+
+/// Get candidate profile directories for Waterfox
+fn waterfox_profile_dirs() -> Vec<PathBuf> {
+    let mut dirs = Vec::new();
+    if let Ok(home) = get_home_dir() {
+        if let Ok(appdata) = get_appdata_dir() {
+            dirs.push(appdata.join("Waterfox").join("Profiles"));
+        }
+        dirs.push(home.join(".waterfox"));
+        dirs.push(home.join("Library").join("Application Support").join("Waterfox").join("Profiles"));
+    }
+    dirs
+}
+
+/// Find a Firefox profile directory (searches standard Firefox locations)
+pub fn find_firefox_profile() -> Result<PathBuf, BrowserError> {
+    find_profile_in_dirs(&firefox_profile_dirs())
+}
+
+/// Find a Zen Browser profile directory
+pub fn find_zen_profile() -> Result<PathBuf, BrowserError> {
+    find_profile_in_dirs(&zen_profile_dirs())
+}
+
+/// Extract cookies from a Firefox-compatible SQLite database (moz_cookies table)
+fn extract_moz_cookies(
+    profile_dir: &PathBuf,
    domain: Option<&str>,
+    browser_name: &str,
 ) -> Result<HashMap<String, String>, BrowserError> {
-    let profile_dir = find_firefox_profile()?;
    let cookies_path = profile_dir.join("cookies.sqlite");

    if !cookies_path.exists() {
        return Err(BrowserError::DatabaseNotFound(format!(
-            "Firefox cookies database not found: {:?}",
-            cookies_path
+            "{} cookies database not found: {:?}",
+            browser_name, cookies_path
        )));
    }

@@ -150,7 +228,6 @@ pub fn extract_firefox_cookies(

    let cookies: HashMap<String, String> = match domain {
        Some(d) => {
-            // Query with domain filter
            let pattern = format!("%{}", d);
            let mut stmt = conn.prepare("SELECT name, value FROM moz_cookies WHERE host LIKE ?")?;
            let mut cookies = HashMap::new();
@@ -163,7 +240,6 @@ pub fn extract_firefox_cookies(
            cookies
        }
        None => {
-            // Get all cookies
            let mut stmt = conn.prepare("SELECT name, value FROM moz_cookies")?;
            let mut cookies = HashMap::new();
            let rows = stmt.query_map([], |row| {
@@ -176,24 +252,61 @@ pub fn extract_firefox_cookies(
        }
    };

-    log::info!("Extracted {} cookies from Firefox", cookies.len());
+    log::info!("Extracted {} cookies from {}", cookies.len(), browser_name);
    Ok(cookies)
 }

+/// Extract cookies from Firefox
+pub fn extract_firefox_cookies(
+    domain: Option<&str>,
+) -> Result<HashMap<String, String>, BrowserError> {
+    let profile_dir = find_profile_in_dirs(&firefox_profile_dirs())?;
+    extract_moz_cookies(&profile_dir, domain, "Firefox")
+}
+
+/// Extract cookies from Zen Browser
+pub fn extract_zen_cookies(
+    domain: Option<&str>,
+) -> Result<HashMap<String, String>, BrowserError> {
+    let profile_dir = find_profile_in_dirs(&zen_profile_dirs())?;
+    extract_moz_cookies(&profile_dir, domain, "Zen Browser")
+}
+
+/// Extract cookies from LibreWolf
+pub fn extract_librewolf_cookies(
+    domain: Option<&str>,
+) -> Result<HashMap<String, String>, BrowserError> {
+    let profile_dir = find_profile_in_dirs(&librewolf_profile_dirs())?;
+    extract_moz_cookies(&profile_dir, domain, "LibreWolf")
+}
+
+/// Extract cookies from Waterfox
+pub fn extract_waterfox_cookies(
+    domain: Option<&str>,
+) -> Result<HashMap<String, String>, BrowserError> {
+    let profile_dir = find_profile_in_dirs(&waterfox_profile_dirs())?;
+    extract_moz_cookies(&profile_dir, domain, "Waterfox")
+}
+
 /// Find the Chrome profile directory
-///
-/// Searches in ~/.config/google-chrome/ for Default profile
 pub fn find_chrome_profile() -> Result<PathBuf, BrowserError> {
    let home = get_home_dir()?;

-    // Try different possible Chrome config locations
-    let possible_paths = vec![
-        home.join(".config").join("google-chrome"),
-        home.join(".config").join("chromium"),
-        home.join("Library")
-            .join("Application Support")
-            .join("Google Chrome"),
-    ];
+    let mut possible_paths = Vec::new();
+
+    // Windows
+    if let Ok(local_appdata) = std::env::var("LOCALAPPDATA") {
+        let local = PathBuf::from(local_appdata);
+        possible_paths.push(local.join("Google").join("Chrome").join("User Data"));
+        possible_paths.push(local.join("Chromium").join("User Data"));
+    }
+
+    // Linux
+    possible_paths.push(home.join(".config").join("google-chrome"));
+    possible_paths.push(home.join(".config").join("chromium"));
+
+    // macOS
+    possible_paths.push(home.join("Library").join("Application Support").join("Google Chrome"));

    for chrome_dir in possible_paths {
        if chrome_dir.exists() {
@@ -215,13 +328,8 @@ pub fn find_chrome_profile() -> Result<PathBuf, BrowserError> {

 /// Extract cookies from Chrome profile
 ///
-/// Note: Chrome stores some cookies with encrypted values using the OS keyring.
-/// This function extracts plaintext cookies and logs a warning for encrypted ones.
-///
-/// # Arguments
-/// * `domain` - Optional domain to filter cookies (e.g., ".twitter.com")
-///
-/// Returns a HashMap of cookie name -> value
+/// Note: Chrome encrypts most cookies using the OS keyring.
+/// This function extracts plaintext cookies and skips encrypted ones.
 pub fn extract_chrome_cookies(
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
@@ -235,14 +343,12 @@ pub fn extract_chrome_cookies(
        )));
    }

-    // Copy to temp to avoid locking
    let temp_path = copy_to_temp(&cookies_path)?;
    let conn = Connection::open(&temp_path)?;

    let mut cookies = HashMap::new();
    let mut encrypted_count = 0;

-    // Chrome uses different table schema - check for encrypted_value column
    let has_encrypted = conn
        .query_row(
            "SELECT COUNT(*) FROM pragma_table_info('cookies') WHERE name='encrypted_value'",
@@ -252,14 +358,13 @@ pub fn extract_chrome_cookies(
        .unwrap_or(0)
        > 0;

-    // Always select with domain filter (use wildcard for all)
    let domain_pattern = match domain {
        Some(d) => format!("%{}%", d),
        None => "%".to_string(),
    };

    let mut stmt =
-        conn.prepare("SELECT name, value, encrypted_value FROM cookies WHERE host LIKE ?")?;
+        conn.prepare("SELECT name, value, encrypted_value FROM cookies WHERE host_key LIKE ?")?;

    let rows = stmt.query_map([domain_pattern], |row| {
        let name: String = row.get(0)?;
@@ -271,12 +376,11 @@ pub fn extract_chrome_cookies(
    for row_result in rows {
        let (name, value, encrypted) = row_result?;

-        // Check if cookie has encrypted value
        if has_encrypted {
            if let Some(enc) = encrypted {
                if !enc.is_empty() {
                    encrypted_count += 1;
-                    continue; // Skip encrypted cookies
+                    continue;
                }
            }
        }
@@ -287,7 +391,7 @@ pub fn extract_chrome_cookies(
    if encrypted_count > 0 {
        log::warn!(
            "Skipped {} encrypted Chrome cookies (OS keyring required). \
-            Run with --cookies-file for encrypted cookies.",
+            Use --cookies with a cookies.txt file instead.",
            encrypted_count
        );
    }
@@ -300,32 +404,22 @@ pub fn extract_chrome_cookies(
    Ok(cookies)
 }

-/// Extract cookies from a browser
+/// Extract cookies from a browser by name
 ///
-/// # Arguments
-/// * `browser` - Browser name: "firefox", "chrome", or "chromium"
-/// * `domain` - Optional domain to filter cookies
-///
-/// # Example
-/// ```no_run
-/// use gallery_dl::auth::extract_browser_cookies;
-///
-/// // Get all cookies from Firefox
-/// let cookies = extract_browser_cookies("firefox", None).unwrap();
-///
-/// // Get Twitter cookies from Chrome
-/// let twitter_cookies = extract_browser_cookies("chrome", Some("twitter.com")).unwrap();
-/// ```
+/// Supported browsers: firefox, zen, librewolf, waterfox, chrome, chromium
 pub fn extract_browser_cookies(
    browser: &str,
    domain: Option<&str>,
 ) -> Result<HashMap<String, String>, BrowserError> {
    match browser.to_lowercase().as_str() {
        "firefox" | "ff" => extract_firefox_cookies(domain),
+        "zen" | "zen-browser" => extract_zen_cookies(domain),
+        "librewolf" => extract_librewolf_cookies(domain),
+        "waterfox" => extract_waterfox_cookies(domain),
        "chrome" | "google-chrome" => extract_chrome_cookies(domain),
        "chromium" => extract_chrome_cookies(domain),
        _ => Err(BrowserError::Other(format!(
-            "Unsupported browser: {}. Supported: firefox, chrome, chromium",
+            "Unsupported browser: '{}'. Supported: firefox, zen, librewolf, waterfox, chrome, chromium",
            browser
        ))),
    }
@@ -334,7 +428,6 @@ pub fn extract_browser_cookies(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use std::env;

    #[test]
    fn test_get_home_dir() {
@@ -350,25 +443,26 @@ mod tests {

    #[test]
    fn test_extract_browser_cookies_case_insensitive() {
-        // Should not error, just return empty or ProfileNotFound
        let result = extract_browser_cookies("FIREFOX", None);
-        // Either works or profile not found (acceptable in test env)
+        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
+    }
+
+    #[test]
+    fn test_zen_browser_recognized() {
+        let result = extract_browser_cookies("zen", None);
+        // Should be ProfileNotFound (not unsupported browser error)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }

    #[test]
    fn test_firefox_cookies_with_domain() {
-        // Should not error even if profile not found in test env
        let result = extract_firefox_cookies(Some("twitter.com"));
-        // Either works or profile not found (acceptable in test env)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }

    #[test]
    fn test_chrome_cookies_with_domain() {
-        // Should not error even if profile not found in test env
        let result = extract_chrome_cookies(Some("twitter.com"));
-        // Either works or profile not found (acceptable in test env)
        assert!(result.is_ok() || matches!(result, Err(BrowserError::ProfileNotFound(_))));
    }
 }
--- a/src/extractor/extractors/mod.rs
+++ b/src/extractor/extractors/mod.rs
@@ -246,6 +246,7 @@ mod rawkuma;
 mod readcomiconline;
 mod schalenetwork;
 mod shimmie2;
+mod snapchat;
 mod tungsten;
 mod weebdex;
 mod xenforo;
@@ -861,6 +862,10 @@ pub fn register_all() {
    // Register SimplyHentai extractors (simplyhentai.com)
    register(simplyhentai::SimplyhentaiExtractor::new().expect("Failed to create SimplyHentai extractor"));

+    // Register Snapchat extractors (snapchat.com)
+    register(snapchat::SnapchatSpotlightExtractor::new());
+    register(snapchat::SnapchatProfileExtractor::new());
+
    // Register Skeb extractors (skeb.jp)
    register(skeb::SkebExtractor::new());

--- a/src/extractor/extractors/snapchat.rs
+++ b/src/extractor/extractors/snapchat.rs
@@ -0,0 +1,523 @@
+//! Snapchat extractor implementation
+//!
+//! Supports public Snapchat content:
+//! - Spotlight videos: `snapchat.com/spotlight/{id}`
+//! - Public profiles/stories: `snapchat.com/add/{username}`
+//!
+//! Data is extracted from the `__NEXT_DATA__` JSON embedded in the page HTML
+//! (Next.js server-side rendering). No authentication required for public content.
+
+use async_trait::async_trait;
+use regex::Regex;
+use serde_json::Value;
+use std::collections::HashMap;
+
+use crate::extractor::{Extractor, ExtractorError, ExtractorMatch, Message};
+
+const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
+
+/// Extract the `__NEXT_DATA__` JSON blob from a Snapchat page
+fn extract_next_data(html: &str) -> Option<Value> {
+    let re = Regex::new(r#"<script\s+id="__NEXT_DATA__"\s+type="application/json"[^>]*>(.*?)</script>"#).ok()?;
+    let caps = re.captures(html)?;
+    let json_str = caps.get(1)?.as_str();
+    serde_json::from_str(json_str).ok()
+}
+
+/// Recursively search a JSON value for all occurrences of a key
+fn find_all_values<'a>(json: &'a Value, key: &str) -> Vec<&'a Value> {
+    let mut results = Vec::new();
+    match json {
+        Value::Object(map) => {
+            for (k, v) in map {
+                if k == key {
+                    results.push(v);
+                }
+                results.extend(find_all_values(v, key));
+            }
+        }
+        Value::Array(arr) => {
+            for v in arr {
+                results.extend(find_all_values(v, key));
+            }
+        }
+        _ => {}
+    }
+    results
+}
+
+/// Extract a filename from a CDN URL
+/// e.g. `https://cf-st.sc-cdn.net/d/ABCDEF.27.IRZXSOY?mo=...` -> `ABCDEF.mp4`
+fn cdn_filename(url: &str) -> Option<String> {
+    let parsed = url::Url::parse(url).ok()?;
+    let path = parsed.path();
+    // Path is like /d/HASH.27.IRZXSOY or /TYPE/HASH.27.IRZXSOY
+    let segment = path.rsplit('/').next()?;
+    // Take everything before the first dot as the hash ID
+    let hash = segment.split('.').next()?;
+    if hash.is_empty() {
+        return None;
+    }
+    Some(format!("{}.mp4", hash))
+}
+
+// ============================================================================
+// SnapchatSpotlightExtractor — single spotlight video
+// ============================================================================
+
+#[derive(Clone)]
+pub struct SnapchatSpotlightExtractor {
+    pattern: Regex,
+    spotlight_id: Option<String>,
+}
+
+impl SnapchatSpotlightExtractor {
+    pub fn new() -> Self {
+        Self {
+            pattern: Regex::new(
+                r"(?:https?://)?(?:www\.)?snapchat\.com/spotlight/([A-Za-z0-9_-]+)"
+            ).expect("Failed to compile Snapchat spotlight pattern"),
+            spotlight_id: None,
+        }
+    }
+
+    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
+        reqwest::Client::builder()
+            .user_agent(USER_AGENT)
+            .timeout(std::time::Duration::from_secs(30))
+            .redirect(reqwest::redirect::Policy::limited(10))
+            .build()
+            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
+    }
+
+    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
+        let client = self.create_client()?;
+        let response = client.get(url).send().await
+            .map_err(ExtractorError::RequestFailed)?;
+
+        let status = response.status();
+        if status.as_u16() == 404 {
+            return Err(ExtractorError::NotFound(format!("Spotlight not found: {}", url)));
+        }
+        if !status.is_success() {
+            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
+        }
+
+        response.text().await
+            .map_err(|e| ExtractorError::ParseError(e.to_string()))
+    }
+
+    fn extract_videos_from_next_data(&self, data: &Value) -> Vec<(String, HashMap<String, Value>)> {
+        let mut videos = Vec::new();
+
+        // Look for contentUrl fields (direct video URLs)
+        let content_urls = find_all_values(data, "contentUrl");
+        for url_val in &content_urls {
+            if let Some(url) = url_val.as_str() {
+                if url.contains("sc-cdn.net") || url.contains(".mp4") {
+                    let mut meta = HashMap::new();
+                    videos.push((url.to_string(), meta));
+                }
+            }
+        }
+
+        // Also check mediaUrl as a fallback
+        if videos.is_empty() {
+            let media_urls = find_all_values(data, "mediaUrl");
+            for url_val in &media_urls {
+                if let Some(url) = url_val.as_str() {
+                    if url.contains("sc-cdn.net") || url.contains(".mp4") {
+                        let meta = HashMap::new();
+                        videos.push((url.to_string(), meta));
+                    }
+                }
+            }
+        }
+
+        // Enrich with metadata from the same JSON tree
+        // Try to find upload date, view count, creator info
+        let upload_dates = find_all_values(data, "uploadDateMs");
+        let view_counts = find_all_values(data, "viewCount");
+        let usernames = find_all_values(data, "username");
+        let display_names = find_all_values(data, "displayName");
+
+        for (i, (_url, meta)) in videos.iter_mut().enumerate() {
+            if let Some(date_val) = upload_dates.get(i) {
+                meta.insert("upload_date".to_string(), (*date_val).clone());
+            } else if let Some(date_val) = upload_dates.first() {
+                meta.insert("upload_date".to_string(), (*date_val).clone());
+            }
+
+            if let Some(count_val) = view_counts.get(i) {
+                meta.insert("view_count".to_string(), (*count_val).clone());
+            } else if let Some(count_val) = view_counts.first() {
+                meta.insert("view_count".to_string(), (*count_val).clone());
+            }
+
+            if let Some(user_val) = usernames.get(i).or(usernames.first()) {
+                meta.insert("username".to_string(), (*user_val).clone());
+            }
+
+            if let Some(name_val) = display_names.get(i).or(display_names.first()) {
+                meta.insert("display_name".to_string(), (*name_val).clone());
+            }
+        }
+
+        videos
+    }
+}
+
+#[async_trait]
+impl Extractor for SnapchatSpotlightExtractor {
+    fn category(&self) -> &str { "snapchat" }
+    fn subcategory(&self) -> &str { "spotlight" }
+    fn root(&self) -> &str { "https://www.snapchat.com" }
+    fn pattern(&self) -> &Regex { &self.pattern }
+
+    fn clone_extractor(&self) -> Box<dyn Extractor> {
+        Box::new(self.clone())
+    }
+
+    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
+        if let Some(caps) = self.pattern.captures(&m.url) {
+            self.spotlight_id = caps.get(1).map(|m| m.as_str().to_string());
+        }
+        Ok(())
+    }
+
+    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
+        let spotlight_id = self.spotlight_id.as_ref()
+            .ok_or_else(|| ExtractorError::NotInitialized("spotlight_id not set".to_string()))?;
+
+        let url = format!("https://www.snapchat.com/spotlight/{}", spotlight_id);
+        log::info!("Fetching Snapchat spotlight: {}", url);
+
+        let html = self.fetch_page(&url).await?;
+
+        let next_data = extract_next_data(&html)
+            .ok_or_else(|| ExtractorError::ParseError(
+                "Could not find __NEXT_DATA__ in page HTML".to_string()
+            ))?;
+
+        let videos = self.extract_videos_from_next_data(&next_data);
+
+        if videos.is_empty() {
+            return Err(ExtractorError::ParseError(
+                "No video URLs found in spotlight data".to_string()
+            ));
+        }
+
+        let mut messages = Vec::new();
+
+        // Directory message
+        let creator = videos.first()
+            .and_then(|(_, meta)| meta.get("username"))
+            .and_then(|v| v.as_str())
+            .unwrap_or("unknown");
+
+        let dir_msg = Message::directory("")
+            .with_metadata("category", serde_json::json!("snapchat"))
+            .with_metadata("subcategory", serde_json::json!("spotlight"))
+            .with_metadata("title", serde_json::json!(format!("spotlight_{}", spotlight_id)))
+            .with_metadata("creator", serde_json::json!(creator));
+        messages.push(dir_msg);
+
+        for (video_url, meta) in &videos {
+            let filename = cdn_filename(video_url)
+                .unwrap_or_else(|| format!("{}.mp4", spotlight_id));
+
+            let mut msg = Message::url(video_url)
+                .with_filename(&filename);
+
+            for (key, val) in meta {
+                msg = msg.with_metadata(key, val.clone());
+            }
+
+            messages.push(msg);
+        }
+
+        log::info!("Found {} video(s) in spotlight {}", videos.len(), spotlight_id);
+        Ok(messages)
+    }
+}
+
+// ============================================================================
+// SnapchatProfileExtractor — public profile stories
+// ============================================================================
+
+#[derive(Clone)]
+pub struct SnapchatProfileExtractor {
+    pattern: Regex,
+    username: Option<String>,
+}
+
+impl SnapchatProfileExtractor {
+    pub fn new() -> Self {
+        Self {
+            pattern: Regex::new(
+                r"(?:https?://)?(?:www\.)?snapchat\.com/add/([A-Za-z0-9._-]+)"
+            ).expect("Failed to compile Snapchat profile pattern"),
+            username: None,
+        }
+    }
+
+    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
+        reqwest::Client::builder()
+            .user_agent(USER_AGENT)
+            .timeout(std::time::Duration::from_secs(30))
+            .redirect(reqwest::redirect::Policy::limited(10))
+            .build()
+            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
+    }
+
+    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
+        let client = self.create_client()?;
+        let response = client.get(url).send().await
+            .map_err(ExtractorError::RequestFailed)?;
+
+        let status = response.status();
+        if status.as_u16() == 404 {
+            return Err(ExtractorError::NotFound(format!("Profile not found: {}", url)));
+        }
+        if !status.is_success() {
+            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
+        }
+
+        response.text().await
+            .map_err(|e| ExtractorError::ParseError(e.to_string()))
+    }
+}
+
+#[async_trait]
+impl Extractor for SnapchatProfileExtractor {
+    fn category(&self) -> &str { "snapchat" }
+    fn subcategory(&self) -> &str { "profile" }
+    fn root(&self) -> &str { "https://www.snapchat.com" }
+    fn pattern(&self) -> &Regex { &self.pattern }
+
+    fn clone_extractor(&self) -> Box<dyn Extractor> {
+        Box::new(self.clone())
+    }
+
+    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
+        if let Some(caps) = self.pattern.captures(&m.url) {
+            self.username = caps.get(1).map(|m| m.as_str().to_string());
+        }
+        Ok(())
+    }
+
+    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
+        let username = self.username.as_ref()
+            .ok_or_else(|| ExtractorError::NotInitialized("username not set".to_string()))?;
+
+        let url = format!("https://www.snapchat.com/add/{}", username);
+        log::info!("Fetching Snapchat profile: {}", url);
+
+        let html = self.fetch_page(&url).await?;
+
+        let next_data = extract_next_data(&html)
+            .ok_or_else(|| ExtractorError::ParseError(
+                "Could not find __NEXT_DATA__ in page HTML. Profile may be private or empty.".to_string()
+            ))?;
+
+        // Extract all media URLs from the profile data
+        let mut media_urls: Vec<String> = Vec::new();
+
+        // Look for contentUrl (videos)
+        for val in find_all_values(&next_data, "contentUrl") {
+            if let Some(url) = val.as_str() {
+                if url.contains("sc-cdn.net") || url.contains(".mp4") {
+                    media_urls.push(url.to_string());
+                }
+            }
+        }
+
+        // Look for mediaUrl (alternate/additional media)
+        for val in find_all_values(&next_data, "mediaUrl") {
+            if let Some(url) = val.as_str() {
+                if (url.contains("sc-cdn.net") || url.contains(".mp4") || url.contains(".jpg") || url.contains(".png"))
+                    && !media_urls.contains(&url.to_string())
+                {
+                    media_urls.push(url.to_string());
+                }
+            }
+        }
+
+        // Look for snapMediaUrl (story media)
+        for val in find_all_values(&next_data, "snapMediaUrl") {
+            if let Some(url) = val.as_str() {
+                if !media_urls.contains(&url.to_string()) {
+                    media_urls.push(url.to_string());
+                }
+            }
+        }
+
+        // Look for thumbnailUrl (image previews)
+        for val in find_all_values(&next_data, "thumbnailUrl") {
+            if let Some(url) = val.as_str() {
+                if !media_urls.contains(&url.to_string()) {
+                    media_urls.push(url.to_string());
+                }
+            }
+        }
+
+        let mut messages = Vec::new();
+
+        // Directory message
+        let display_name = find_all_values(&next_data, "displayName")
+            .first()
+            .and_then(|v| v.as_str())
+            .unwrap_or(username.as_str())
+            .to_string();
+
+        let dir_msg = Message::directory("")
+            .with_metadata("category", serde_json::json!("snapchat"))
+            .with_metadata("subcategory", serde_json::json!("profile"))
+            .with_metadata("title", serde_json::json!(&display_name))
+            .with_metadata("username", serde_json::json!(username));
+        messages.push(dir_msg);
+
+        if media_urls.is_empty() {
+            log::warn!("No media found on profile {}. It may be private or have no public stories.", username);
+            return Ok(messages);
+        }
+
+        for (i, media_url) in media_urls.iter().enumerate() {
+            let filename = cdn_filename(media_url)
+                .unwrap_or_else(|| {
+                    let ext = if media_url.contains(".mp4") { "mp4" }
+                        else if media_url.contains(".jpg") || media_url.contains(".jpeg") { "jpg" }
+                        else if media_url.contains(".png") { "png" }
+                        else { "mp4" };
+                    format!("{}_{:03}.{}", username, i + 1, ext)
+                });
+
+            let msg = Message::url(media_url)
+                .with_filename(&filename)
+                .with_metadata("username", serde_json::json!(username))
+                .with_metadata("num", serde_json::json!(i + 1));
+
+            messages.push(msg);
+        }
+
+        log::info!("Found {} media item(s) on profile {}", media_urls.len(), username);
+        Ok(messages)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_spotlight_pattern() {
+        let ext = SnapchatSpotlightExtractor::new();
+        assert!(ext.pattern.is_match("https://www.snapchat.com/spotlight/ABC123_def"));
+        assert!(ext.pattern.is_match("https://snapchat.com/spotlight/ABC123"));
+        assert!(ext.pattern.is_match("http://www.snapchat.com/spotlight/test-id_123"));
+        assert!(!ext.pattern.is_match("https://snapchat.com/add/username"));
+        assert!(!ext.pattern.is_match("https://snapchat.com/"));
+    }
+
+    #[test]
+    fn test_profile_pattern() {
+        let ext = SnapchatProfileExtractor::new();
+        assert!(ext.pattern.is_match("https://www.snapchat.com/add/john_doe"));
+        assert!(ext.pattern.is_match("https://snapchat.com/add/user.name"));
+        assert!(ext.pattern.is_match("http://www.snapchat.com/add/test-user"));
+        assert!(!ext.pattern.is_match("https://snapchat.com/spotlight/ABC123"));
+        assert!(!ext.pattern.is_match("https://snapchat.com/"));
+    }
+
+    #[test]
+    fn test_extract_next_data() {
+        let html = r#"<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"story":{"contentUrl":"https://cf-st.sc-cdn.net/d/HASH123.27.IRZXSOY?mo=test"}}}}</script></head></html>"#;
+        let data = extract_next_data(html);
+        assert!(data.is_some());
+        let data = data.unwrap();
+        let urls = find_all_values(&data, "contentUrl");
+        assert_eq!(urls.len(), 1);
+        assert_eq!(urls[0].as_str().unwrap(), "https://cf-st.sc-cdn.net/d/HASH123.27.IRZXSOY?mo=test");
+    }
+
+    #[test]
+    fn test_extract_next_data_missing() {
+        let html = r#"<html><head></head><body>No next data here</body></html>"#;
+        assert!(extract_next_data(html).is_none());
+    }
+
+    #[test]
+    fn test_find_all_values() {
+        let json: Value = serde_json::json!({
+            "a": {
+                "contentUrl": "url1",
+                "nested": {
+                    "contentUrl": "url2"
+                }
+            },
+            "b": [
+                {"contentUrl": "url3"},
+                {"other": "ignored"}
+            ]
+        });
+        let urls = find_all_values(&json, "contentUrl");
+        assert_eq!(urls.len(), 3);
+    }
+
+    #[test]
+    fn test_cdn_filename() {
+        assert_eq!(
+            cdn_filename("https://cf-st.sc-cdn.net/d/ABCDEF.27.IRZXSOY?mo=test&uc=46"),
+            Some("ABCDEF.mp4".to_string())
+        );
+        assert_eq!(
+            cdn_filename("https://bolt-gcdn.sc-cdn.net/video/HASH123.27.IRZXSOY?mo=test"),
+            Some("HASH123.mp4".to_string())
+        );
+    }
+
+    #[test]
+    fn test_cdn_filename_no_hash() {
+        // Should still extract something from normal URLs
+        assert!(cdn_filename("https://example.com/some/path/file.mp4").is_some());
+    }
+
+    #[test]
+    fn test_spotlight_extract_videos() {
+        let ext = SnapchatSpotlightExtractor::new();
+        let data: Value = serde_json::json!({
+            "props": {
+                "pageProps": {
+                    "story": {
+                        "contentUrl": "https://cf-st.sc-cdn.net/d/ABC.27.IRZXSOY?mo=test",
+                        "uploadDateMs": 1700000000000_u64,
+                        "viewCount": 50000,
+                        "username": "testuser",
+                        "displayName": "Test User"
+                    }
+                }
+            }
+        });
+        let videos = ext.extract_videos_from_next_data(&data);
+        assert_eq!(videos.len(), 1);
+        assert!(videos[0].0.contains("sc-cdn.net"));
+        assert!(videos[0].1.contains_key("username"));
+    }
+
+    #[test]
+    fn test_spotlight_mediaurl_fallback() {
+        let ext = SnapchatSpotlightExtractor::new();
+        let data: Value = serde_json::json!({
+            "props": {
+                "pageProps": {
+                    "media": {
+                        "mediaUrl": "https://cf-st.sc-cdn.net/d/FALLBACK.27.IRZXSOY?mo=x"
+                    }
+                }
+            }
+        });
+        let videos = ext.extract_videos_from_next_data(&data);
+        assert_eq!(videos.len(), 1);
+        assert!(videos[0].0.contains("FALLBACK"));
+    }
+}
--- a/src/extractor/extractors/xenforo.rs
+++ b/src/extractor/extractors/xenforo.rs
@@ -1,40 +1,462 @@
 //! XenForo extractor implementation
 //!
 //! Supports XenForo forums (simpcity.cr, nudostar.com/forum, etc.)
+//! Extracts images and videos from thread posts with pagination support.

 use async_trait::async_trait;
 use regex::Regex;
+use std::collections::{HashMap, HashSet};

 use crate::extractor::{
-    Extractor, ExtractorError, ExtractorMatch, HttpClient, Message,
+    Extractor, ExtractorError, ExtractorMatch, Message, MessageKind,
 };

-pub struct XenforoPostExtractor {
-    pattern: Regex,
-    category: String,
-    subcategory: String,
-    root_url: String,
-    post_id: Option<String>,
-    client: HttpClient,
+const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
+
+/// Known XenForo domains and their root URLs
+fn root_for_domain(domain: &str) -> String {
+    match domain {
+        "simpcity.cr" | "simpcity.su" => format!("https://{}", domain),
+        "nudostar.com/forum" => "https://nudostar.com/forum".to_string(),
+        "allthefallen.moe/forum" => "https://allthefallen.moe/forum".to_string(),
+        "celebforum.to" => "https://celebforum.to".to_string(),
+        "forums.socialmediagirls.com" => "https://forums.socialmediagirls.com".to_string(),
+        _ => format!("https://{}", domain),
+    }
 }

+/// Build a cookie header string from a HashMap
+fn cookie_header(cookies: &HashMap<String, String>) -> String {
+    cookies.iter()
+        .map(|(k, v)| format!("{}={}", k, v))
+        .collect::<Vec<_>>()
+        .join("; ")
+}
+
+/// Extract media URLs from HTML content.
+///
+/// Finds all media by matching multiple patterns:
+/// - `<img class="bbImage" src="...">` — inline images
+/// - `<video src="...">` — inline videos
+/// - `<a href=".../attachments/...">` — file attachments
+/// - `<iframe src="...">` — embedded media
+/// - `loadMedia(this, '...')` — lazy-loaded embeds
+fn extract_media_from_html(html: &str, root_url: &str) -> Vec<String> {
+    let mut urls = Vec::new();
+
+    // 1. bbImage: <img ... class="bbImage" ... src="URL"> or data-url="URL"
+    let img_re = Regex::new(r#"<img[^>]+class="bbImage[^"]*"[^>]*(?:data-url|src)="([^"]+)"|<img[^>]*(?:data-url|src)="([^"]+)"[^>]*class="bbImage[^"]*""#).unwrap();
+    for caps in img_re.captures_iter(html) {
+        if let Some(m) = caps.get(1).or(caps.get(2)) {
+            urls.push(m.as_str().to_string());
+        }
+    }
+
+    // 2. Video src
+    let video_re = Regex::new(r#"<video[^>]+src="([^"]+)"#).unwrap();
+    for caps in video_re.captures_iter(html) {
+        if let Some(m) = caps.get(1) {
+            urls.push(m.as_str().to_string());
+        }
+    }
+
+    // 3. Attachments
+    let attach_re = Regex::new(r#"<a[^>]+href="([^"]+/attachments/[^"]+)"#).unwrap();
+    for caps in attach_re.captures_iter(html) {
+        if let Some(m) = caps.get(1) {
+            urls.push(m.as_str().to_string());
+        }
+    }
+
+    // 4. Iframes
+    let iframe_re = Regex::new(r#"<iframe[^>]+src="([^"]+)"#).unwrap();
+    for caps in iframe_re.captures_iter(html) {
+        if let Some(m) = caps.get(1) {
+            urls.push(m.as_str().to_string());
+        }
+    }
+
+    // 5. Lazy-loaded media
+    let lazy_re = Regex::new(r#"loadMedia\(this,\s*'([^']+)'"#).unwrap();
+    for caps in lazy_re.captures_iter(html) {
+        if let Some(m) = caps.get(1) {
+            urls.push(m.as_str().to_string());
+        }
+    }
+
+    // Normalize and filter
+    urls.into_iter()
+        .filter_map(|u| normalize_url(&u, root_url))
+        .collect()
+}
+
+/// Normalize a URL: resolve relative paths, upgrade protocol, skip junk
+fn normalize_url(url: &str, root_url: &str) -> Option<String> {
+    // Skip smilies, avatars, style assets, base64 data URIs
+    if url.contains("/styles/") || url.contains("/smilies/")
+        || url.contains("data/avatars/") || url.contains("data:image")
+        || url.contains("/icons/") || url.contains("reaction-sprite")
+    {
+        return None;
+    }
+
+    let mut u = url.to_string();
+
+    if u.starts_with("//") {
+        u = format!("https:{}", u);
+    } else if u.starts_with('/') {
+        u = format!("{}{}", root_url, u);
+    }
+
+    if !u.starts_with("http://") && !u.starts_with("https://") {
+        return None;
+    }
+
+    // Upgrade .md.jpg thumbnails to full size (simpcity CDN pattern)
+    u = upgrade_thumbnail(&u);
+
+    Some(u)
+}
+
+/// Upgrade simpcity CDN thumbnail URLs to full-size
+/// e.g. image.md.jpg -> image.jpg
+fn upgrade_thumbnail(url: &str) -> String {
+    let re = Regex::new(r"\.md\.(jpg|jpeg|png|gif|webp)(\?|$)").unwrap();
+    re.replace(url, ".$1$2").into_owned()
+}
+
+/// Decode common HTML entities
+fn decode_html_entities(s: &str) -> String {
+    s.replace("&nbsp;", " ")
+        .replace("&amp;", "&")
+        .replace("&lt;", "<")
+        .replace("&gt;", ">")
+        .replace("&quot;", "\"")
+        .replace("&#39;", "'")
+        .replace("&#x27;", "'")
+}
+
+/// Extract the thread title from the page HTML
+fn extract_thread_title(html: &str) -> Option<String> {
+    let re = Regex::new(r#"<h1[^>]*class="[^"]*p-title-value[^"]*"[^>]*>(.*?)</h1>"#).ok()?;
+    re.captures(html)
+        .and_then(|c| c.get(1))
+        .map(|m| {
+            // Strip inner tags like <span>
+            let tag_re = Regex::new(r"<[^>]+>").unwrap();
+            let title = tag_re.replace_all(m.as_str().trim(), "").trim().to_string();
+            // Decode HTML entities
+            decode_html_entities(&title)
+        })
+}
+
+/// Find the next page URL from pagination
+fn find_next_page(html: &str) -> Option<String> {
+    // Handle both attribute orderings: class before href, or href before class
+    let re = Regex::new(
+        r#"<a[^>]*href="([^"]+)"[^>]*class="[^"]*pageNav-jump--next[^"]*"|<a[^>]*class="[^"]*pageNav-jump--next[^"]*"[^>]*href="([^"]+)""#
+    ).ok()?;
+    re.captures(html).and_then(|c| {
+        c.get(1).or(c.get(2))
+    }).map(|m| {
+        m.as_str().replace("&amp;", "&")
+    })
+}
+
+/// Extract individual post blocks from the page HTML.
+///
+/// XenForo posts are `<article>` elements with `data-content="post-NNNNN"`.
+/// We split the HTML at each post boundary and extract the content between them.
+fn extract_posts(html: &str) -> Vec<(String, String)> {
+    let boundary_re = Regex::new(r#"data-content="post-(\d+)""#)
+        .expect("Failed to compile post boundary regex");
+
+    let matches: Vec<_> = boundary_re.captures_iter(html)
+        .filter_map(|c| {
+            let full = c.get(0)?;
+            let id = c.get(1)?.as_str().to_string();
+            Some((id, full.start()))
+        })
+        .collect();
+
+    if matches.is_empty() {
+        return Vec::new();
+    }
+
+    let mut posts = Vec::new();
+    for i in 0..matches.len() {
+        let (ref id, start) = matches[i];
+        let end = if i + 1 < matches.len() {
+            matches[i + 1].1
+        } else {
+            html.len()
+        };
+        let post_html = &html[start..end];
+        posts.push((id.clone(), post_html.to_string()));
+    }
+
+    posts
+}
+
+// ============================================================================
+// XenforoThreadExtractor
+// ============================================================================
+
 pub struct XenforoThreadExtractor {
    pattern: Regex,
    category: String,
    subcategory: String,
    root_url: String,
+    domain: Option<String>,
+    thread_path: Option<String>,
    thread_id: Option<String>,
    page: Option<i64>,
-    client: HttpClient,
+    cookies: HashMap<String, String>,
 }

-pub struct XenforoForumExtractor {
+impl XenforoThreadExtractor {
+    pub fn new() -> Result<Self, ExtractorError> {
+        let pattern = Regex::new(
+            r"(?:https?://)?(?:www\.)?(simpcity\.cr|simpcity\.su|nudostar\.com/forum|allthefallen\.moe/forum|celebforum\.to|titsintops\.com/phpBB2|forums\.socialmediagirls\.com)(/(?:index\.php\?)?threads/(?:[^/?#]+\.)?(\d+))(?:/page-(\d+))?"
+        ).map_err(|e| ExtractorError::ConfigError(e.to_string()))?;
+
+        Ok(Self {
+            pattern,
+            category: "xenforo".to_string(),
+            subcategory: "thread".to_string(),
+            root_url: "https://simpcity.cr".to_string(),
+            domain: None,
+            thread_path: None,
+            thread_id: None,
+            page: None,
+            cookies: HashMap::new(),
+        })
+    }
+
+    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
+        reqwest::Client::builder()
+            .user_agent(USER_AGENT)
+            .timeout(std::time::Duration::from_secs(30))
+            .redirect(reqwest::redirect::Policy::limited(10))
+            .build()
+            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
+    }
+
+    async fn fetch_page(&self, url: &str) -> Result<String, ExtractorError> {
+        let client = self.create_client()?;
+        let mut request = client.get(url);
+
+        if !self.cookies.is_empty() {
+            request = request.header("Cookie", cookie_header(&self.cookies));
+        }
+
+        let response = request.send().await
+            .map_err(ExtractorError::RequestFailed)?;
+
+        let status = response.status();
+        if status.as_u16() == 403 || status.as_u16() == 401 {
+            return Err(ExtractorError::ConfigError(format!(
+                "Authentication required (HTTP {}). Set cookies in config: \
+                 extractor.xenforo.cookies.xf_user = \"your_cookie_value\"",
+                status.as_u16()
+            )));
+        }
+        if !status.is_success() {
+            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
+        }
+
+        response.text().await
+            .map_err(|e| ExtractorError::ParseError(e.to_string()))
+    }
+
+    async fn extract_thread(&self) -> Result<Vec<Message>, ExtractorError> {
+        let thread_path = self.thread_path.as_ref()
+            .ok_or_else(|| ExtractorError::NotInitialized("thread_path not set".to_string()))?;
+
+        let mut messages = Vec::new();
+        let mut seen_urls: HashSet<String> = HashSet::new();
+
+        // Build the starting URL
+        let start_url = if let Some(page) = self.page {
+            format!("{}{}/page-{}", self.root_url, thread_path, page)
+        } else {
+            format!("{}{}/", self.root_url, thread_path)
+        };
+
+        let mut current_url = Some(start_url);
+        let mut page_num = self.page.unwrap_or(1);
+        let mut total_media = 0;
+
+        while let Some(url) = current_url.take() {
+            log::info!("Fetching page {} of thread: {}", page_num, url);
+
+            let html = self.fetch_page(&url).await?;
+
+            // Extract thread title on first page for the directory message
+            if page_num <= 1 || (self.page.is_some() && page_num == self.page.unwrap()) {
+                let title = extract_thread_title(&html)
+                    .unwrap_or_else(|| "unknown".to_string());
+                log::info!("Thread title: {}", title);
+
+                let mut dir_msg = Message::directory("");
+                dir_msg.metadata.insert("thread_id".to_string(),
+                    serde_json::json!(self.thread_id.as_deref().unwrap_or("0")));
+                dir_msg.metadata.insert("title".to_string(), serde_json::json!(title));
+                dir_msg.metadata.insert("category".to_string(), serde_json::json!("xenforo"));
+                messages.push(dir_msg);
+            }
+
+            // Extract posts and their media
+            let posts = extract_posts(&html);
+            log::info!("Found {} posts on page {}", posts.len(), page_num);
+
+            for (post_id, post_html) in &posts {
+                let media_urls = extract_media_from_html(post_html, &self.root_url);
+
+                for media_url in media_urls {
+                    if seen_urls.contains(&media_url) {
+                        continue;
+                    }
+                    seen_urls.insert(media_url.clone());
+
+                    let msg = Message::url(&media_url)
+                        .with_metadata("post_id", serde_json::json!(post_id))
+                        .with_metadata("thread_id",
+                            serde_json::json!(self.thread_id.as_deref().unwrap_or("0")));
+
+                    // Try to extract a filename from the URL
+                    if let Some(filename) = url_filename(&media_url) {
+                        messages.push(msg.with_filename(filename));
+                    } else {
+                        messages.push(msg);
+                    }
+                    total_media += 1;
+                }
+            }
+
+            // If no posts found at all, try a simpler fallback: just extract all media from the page
+            if posts.is_empty() {
+                log::warn!("No post blocks found on page {} — trying full-page scan", page_num);
+                let media_urls = extract_media_from_html(&html, &self.root_url);
+                for media_url in media_urls {
+                    if seen_urls.contains(&media_url) {
+                        continue;
+                    }
+                    seen_urls.insert(media_url.clone());
+
+                    let msg = Message::url(&media_url);
+                    if let Some(filename) = url_filename(&media_url) {
+                        messages.push(msg.with_filename(filename));
+                    } else {
+                        messages.push(msg);
+                    }
+                    total_media += 1;
+                }
+            }
+
+            // Check for next page
+            if let Some(next_href) = find_next_page(&html) {
+                let next_url = if next_href.starts_with("http") {
+                    next_href
+                } else {
+                    format!("{}{}", self.root_url, next_href)
+                };
+                current_url = Some(next_url);
+                page_num += 1;
+            }
+        }
+
+        log::info!("Extracted {} media URLs across {} pages", total_media, page_num);
+        Ok(messages)
+    }
+}
+
+/// Try to extract a usable filename from a URL
+fn url_filename(url: &str) -> Option<String> {
+    let path = url::Url::parse(url).ok()?.path().to_string();
+    let segment = path.rsplit('/').next()?;
+    if segment.is_empty() || !segment.contains('.') {
+        return None;
+    }
+    // URL-decode the filename
+    let decoded = urlencoding::decode(segment).ok()?;
+    Some(decoded.into_owned())
+}
+
+impl Default for XenforoThreadExtractor {
+    fn default() -> Self {
+        Self::new().expect("Failed to create XenforoThreadExtractor")
+    }
+}
+
+impl Clone for XenforoThreadExtractor {
+    fn clone(&self) -> Self {
+        Self {
+            pattern: self.pattern.clone(),
+            category: self.category.clone(),
+            subcategory: self.subcategory.clone(),
+            root_url: self.root_url.clone(),
+            domain: self.domain.clone(),
+            thread_path: self.thread_path.clone(),
+            thread_id: self.thread_id.clone(),
+            page: self.page,
+            cookies: self.cookies.clone(),
+        }
+    }
+}
+
+#[async_trait]
+impl Extractor for XenforoThreadExtractor {
+    fn category(&self) -> &str { &self.category }
+    fn subcategory(&self) -> &str { &self.subcategory }
+    fn root(&self) -> &str { &self.root_url }
+    fn pattern(&self) -> &Regex { &self.pattern }
+    fn clone_extractor(&self) -> Box<dyn Extractor> { Box::new(self.clone()) }
+
+    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
+        if let Some(captures) = self.pattern.captures(&m.url) {
+            if let Some(domain) = captures.get(1) {
+                self.domain = Some(domain.as_str().to_string());
+                self.root_url = root_for_domain(domain.as_str());
+            }
+            if let Some(path) = captures.get(2) {
+                self.thread_path = Some(path.as_str().to_string());
+            }
+            if let Some(id) = captures.get(3) {
+                self.thread_id = Some(id.as_str().to_string());
+            }
+            if let Some(page) = captures.get(4) {
+                self.page = Some(page.as_str().parse::<i64>().unwrap_or(1));
+            }
+        }
+        log::info!("Initialized XenForo thread extractor: path={:?} id={:?} page={:?}",
+            self.thread_path, self.thread_id, self.page);
+        Ok(())
+    }
+
+    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
+        self.extract_thread().await
+    }
+
+    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
+        self.cookies = cookies;
+        log::debug!("XenForo cookies set: {} entries", self.cookies.len());
+    }
+}
+
+// ============================================================================
+// XenforoPostExtractor
+// ============================================================================
+
+pub struct XenforoPostExtractor {
    pattern: Regex,
    category: String,
    subcategory: String,
    root_url: String,
-    forum_id: Option<String>,
-    client: HttpClient,
+    domain: Option<String>,
+    post_id: Option<String>,
+    post_url_prefix: Option<String>,
+    cookies: HashMap<String, String>,
 }

 impl XenforoPostExtractor {
@@ -48,27 +470,79 @@ impl XenforoPostExtractor {
            category: "xenforo".to_string(),
            subcategory: "post".to_string(),
            root_url: "https://simpcity.cr".to_string(),
+            domain: None,
            post_id: None,
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
+            post_url_prefix: None,
+            cookies: HashMap::new(),
        })
    }

+    fn create_client(&self) -> Result<reqwest::Client, ExtractorError> {
+        reqwest::Client::builder()
+            .user_agent(USER_AGENT)
+            .timeout(std::time::Duration::from_secs(30))
+            .redirect(reqwest::redirect::Policy::limited(10))
+            .build()
+            .map_err(|e| ExtractorError::ConfigError(e.to_string()))
+    }
+
    async fn extract_post(&self) -> Result<Vec<Message>, ExtractorError> {
        let post_id = self.post_id.as_ref()
            .ok_or_else(|| ExtractorError::NotInitialized("post_id not set".to_string()))?;

        log::info!("Extracting XenForo post: {}", post_id);

-        let mut messages = Vec::new();
+        // Fetch the post page
+        let url = format!("{}/posts/{}/", self.root_url, post_id);
+        let client = self.create_client()?;
+        let mut request = client.get(&url);
+        if !self.cookies.is_empty() {
+            request = request.header("Cookie", cookie_header(&self.cookies));
+        }

+        let response = request.send().await
+            .map_err(ExtractorError::RequestFailed)?;
+
+        let status = response.status();
+        if !status.is_success() {
+            return Err(ExtractorError::HttpError(format!("HTTP {}", status.as_u16())));
+        }
+
+        let html = response.text().await
+            .map_err(|e| ExtractorError::ParseError(e.to_string()))?;
+
+        let mut messages = Vec::new();
+        let mut seen_urls: HashSet<String> = HashSet::new();
+
+        // Directory message
        let mut dir_msg = Message::directory("");
-        dir_msg.metadata.insert("post_id".to_string(), serde_json::json!(post_id.parse::<i64>().unwrap_or(0)));
+        dir_msg.metadata.insert("post_id".to_string(), serde_json::json!(post_id));
        messages.push(dir_msg);

-        log::info!("Found XenForo post {}", post_id);
+        // Try to find just the target post
+        let posts = extract_posts(&html);
+        let target_html = posts.iter()
+            .find(|(id, _)| id == post_id)
+            .map(|(_, content)| content.as_str())
+            .unwrap_or(&html);
+
+        let media_urls = extract_media_from_html(target_html, &self.root_url);
+        for media_url in media_urls {
+            if seen_urls.contains(&media_url) {
+                continue;
+            }
+            seen_urls.insert(media_url.clone());
+
+            let msg = Message::url(&media_url)
+                .with_metadata("post_id", serde_json::json!(post_id));
+            if let Some(filename) = url_filename(&media_url) {
+                messages.push(msg.with_filename(filename));
+            } else {
+                messages.push(msg);
+            }
+        }
+
+        log::info!("Extracted {} media URLs from post {}", messages.len() - 1, post_id);
        Ok(messages)
    }
 }
@@ -86,11 +560,10 @@ impl Clone for XenforoPostExtractor {
            category: self.category.clone(),
            subcategory: self.subcategory.clone(),
            root_url: self.root_url.clone(),
+            domain: self.domain.clone(),
            post_id: self.post_id.clone(),
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .expect("Failed to create HTTP client"),
+            post_url_prefix: self.post_url_prefix.clone(),
+            cookies: self.cookies.clone(),
        }
    }
 }
@@ -105,7 +578,14 @@ impl Extractor for XenforoPostExtractor {

    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
-            if let Some(id) = captures.get(2) {
+            if let Some(domain) = captures.get(1) {
+                self.domain = Some(domain.as_str().to_string());
+                self.root_url = root_for_domain(domain.as_str());
+            }
+            if let Some(prefix) = captures.get(2) {
+                self.post_url_prefix = Some(prefix.as_str().to_string());
+            }
+            if let Some(id) = captures.get(3) {
                self.post_id = Some(id.as_str().to_string());
            }
        }
@@ -115,91 +595,24 @@ impl Extractor for XenforoPostExtractor {
    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
        self.extract_post().await
    }
-}

-impl XenforoThreadExtractor {
-    pub fn new() -> Result<Self, ExtractorError> {
-        let pattern = Regex::new(
-            r"(?:https?://)?(?:www\.)?(simpcity\.cr|simpcity\.su|nudostar\.com/forum|allthefallen\.moe/forum|celebforum\.to|titsintops\.com/phpBB2|forums\.socialmediagirls\.com)(/(?:index\.php\?)?threads/(?:[^/?#]+\.)?(\d+))(?:/page-(\d+))?"
-        ).map_err(|e| ExtractorError::ConfigError(e.to_string()))?;
-
-        Ok(Self {
-            pattern,
-            category: "xenforo".to_string(),
-            subcategory: "thread".to_string(),
-            root_url: "https://simpcity.cr".to_string(),
-            thread_id: None,
-            page: None,
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
-        })
-    }
-
-    async fn extract_thread(&self) -> Result<Vec<Message>, ExtractorError> {
-        let thread_id = self.thread_id.as_ref()
-            .ok_or_else(|| ExtractorError::NotInitialized("thread_id not set".to_string()))?;
-
-        log::info!("Extracting XenForo thread: {}", thread_id);
-
-        let mut messages = Vec::new();
-
-        let mut dir_msg = Message::directory("");
-        dir_msg.metadata.insert("thread_id".to_string(), serde_json::json!(thread_id.parse::<i64>().unwrap_or(0)));
-        messages.push(dir_msg);
-
-        log::info!("Found XenForo thread {}", thread_id);
-        Ok(messages)
+    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
+        self.cookies = cookies;
    }
 }

-impl Default for XenforoThreadExtractor {
-    fn default() -> Self {
-        Self::new().expect("Failed to create XenforoThreadExtractor")
-    }
-}
+// ============================================================================
+// XenforoForumExtractor
+// ============================================================================

-impl Clone for XenforoThreadExtractor {
-    fn clone(&self) -> Self {
-        Self {
-            pattern: self.pattern.clone(),
-            category: self.category.clone(),
-            subcategory: self.subcategory.clone(),
-            root_url: self.root_url.clone(),
-            thread_id: self.thread_id.clone(),
-            page: self.page.clone(),
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .expect("Failed to create HTTP client"),
-        }
-    }
-}
-
-#[async_trait]
-impl Extractor for XenforoThreadExtractor {
-    fn category(&self) -> &str { &self.category }
-    fn subcategory(&self) -> &str { &self.subcategory }
-    fn root(&self) -> &str { &self.root_url }
-    fn pattern(&self) -> &Regex { &self.pattern }
-    fn clone_extractor(&self) -> Box<dyn Extractor> { Box::new(self.clone()) }
-
-    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
-        if let Some(captures) = self.pattern.captures(&m.url) {
-            if let Some(id) = captures.get(2) {
-                self.thread_id = Some(id.as_str().to_string());
-            }
-            if let Some(page) = captures.get(3) {
-                self.page = Some(page.as_str().parse::<i64>().unwrap_or(1));
-            }
-        }
-        Ok(())
-    }
-
-    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
-        self.extract_thread().await
-    }
+pub struct XenforoForumExtractor {
+    pattern: Regex,
+    category: String,
+    subcategory: String,
+    root_url: String,
+    domain: Option<String>,
+    forum_path: Option<String>,
+    cookies: HashMap<String, String>,
 }

 impl XenforoForumExtractor {
@@ -213,11 +626,9 @@ impl XenforoForumExtractor {
            category: "xenforo".to_string(),
            subcategory: "forum".to_string(),
            root_url: "https://simpcity.cr".to_string(),
-            forum_id: None,
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .map_err(|e| ExtractorError::ConfigError(e.to_string()))?,
+            domain: None,
+            forum_path: None,
+            cookies: HashMap::new(),
        })
    }
 }
@@ -235,11 +646,9 @@ impl Clone for XenforoForumExtractor {
            category: self.category.clone(),
            subcategory: self.subcategory.clone(),
            root_url: self.root_url.clone(),
-            forum_id: self.forum_id.clone(),
-            client: HttpClient::builder()
-                .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
-                .build()
-                .expect("Failed to create HTTP client"),
+            domain: self.domain.clone(),
+            forum_path: self.forum_path.clone(),
+            cookies: self.cookies.clone(),
        }
    }
 }
@@ -254,17 +663,25 @@ impl Extractor for XenforoForumExtractor {

    async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
        if let Some(captures) = self.pattern.captures(&m.url) {
-            if let Some(id) = captures.get(1) {
-                self.forum_id = Some(id.as_str().to_string());
+            if let Some(domain) = captures.get(1) {
+                self.domain = Some(domain.as_str().to_string());
+                self.root_url = root_for_domain(domain.as_str());
+            }
+            if let Some(path) = captures.get(2) {
+                self.forum_path = Some(path.as_str().to_string());
            }
        }
        Ok(())
    }

    async fn items(&mut self) -> Result<Vec<Message>, ExtractorError> {
-        log::info!("Extracting XenForo forum");
+        log::info!("XenForo forum extractor not yet implemented");
        Ok(vec![])
    }
+
+    fn set_cookies(&mut self, cookies: HashMap<String, String>) {
+        self.cookies = cookies;
+    }
 }

 #[cfg(test)]
@@ -282,4 +699,116 @@ mod tests {
        let extractor = XenforoThreadExtractor::new().unwrap();
        assert!(extractor.pattern.is_match("https://simpcity.cr/threads/TITLE.12345/"));
    }
+
+    #[test]
+    fn test_thread_pattern_with_page() {
+        let extractor = XenforoThreadExtractor::new().unwrap();
+        let url = "https://simpcity.cr/threads/dimeestevez.39618/page-2";
+        assert!(extractor.pattern.is_match(url));
+
+        let caps = extractor.pattern.captures(url).unwrap();
+        assert_eq!(caps.get(1).unwrap().as_str(), "simpcity.cr");
+        assert_eq!(caps.get(2).unwrap().as_str(), "/threads/dimeestevez.39618");
+        assert_eq!(caps.get(3).unwrap().as_str(), "39618");
+        assert_eq!(caps.get(4).unwrap().as_str(), "2");
+    }
+
+    #[test]
+    fn test_extract_media_from_html() {
+        let html = r#"
+            <img src="https://example.com/image1.jpg" class="bbImage " loading="lazy" />
+            <video src="https://example.com/video.mp4"></video>
+            <a href="https://example.com/attachments/file.zip">Download</a>
+        "#;
+        let urls = extract_media_from_html(html, "https://simpcity.cr");
+        assert_eq!(urls.len(), 3);
+        assert!(urls.contains(&"https://example.com/image1.jpg".to_string()));
+        assert!(urls.contains(&"https://example.com/video.mp4".to_string()));
+        assert!(urls.contains(&"https://example.com/attachments/file.zip".to_string()));
+    }
+
+    #[test]
+    fn test_extract_media_skips_smilies() {
+        let html = r#"
+            <img src="https://simpcity.cr/styles/emoji.png" class="bbImage" />
+            <img src="https://example.com/real-image.jpg" class="bbImage " loading="lazy" />
+        "#;
+        let urls = extract_media_from_html(html, "https://simpcity.cr");
+        assert_eq!(urls.len(), 1);
+        assert_eq!(urls[0], "https://example.com/real-image.jpg");
+    }
+
+    #[test]
+    fn test_upgrade_thumbnail() {
+        assert_eq!(
+            upgrade_thumbnail("https://simp1.selti-delivery.ru/images/test.md.jpg"),
+            "https://simp1.selti-delivery.ru/images/test.jpg"
+        );
+        assert_eq!(
+            upgrade_thumbnail("https://example.com/image.jpg"),
+            "https://example.com/image.jpg"
+        );
+    }
+
+    #[test]
+    fn test_extract_posts_from_real_html() {
+        let html = r#"
+            <article class="message" data-content="post-111" id="js-post-111">
+                <article class="message-body js-selectToQuote">
+                    <img src="https://cdn.example.com/img1.jpg" class="bbImage " />
+                </article>
+            </article>
+            <article class="message" data-content="post-222" id="js-post-222">
+                <article class="message-body js-selectToQuote">
+                    <img src="https://cdn.example.com/img2.jpg" class="bbImage " />
+                </article>
+            </article>
+        "#;
+        let posts = extract_posts(html);
+        assert_eq!(posts.len(), 2);
+        assert_eq!(posts[0].0, "111");
+        assert_eq!(posts[1].0, "222");
+
+        // Each post should yield its own image
+        let urls1 = extract_media_from_html(&posts[0].1, "https://simpcity.cr");
+        assert_eq!(urls1.len(), 1);
+        assert!(urls1[0].contains("img1.jpg"));
+
+        let urls2 = extract_media_from_html(&posts[1].1, "https://simpcity.cr");
+        assert_eq!(urls2.len(), 1);
+        assert!(urls2[0].contains("img2.jpg"));
+    }
+
+    #[test]
+    fn test_find_next_page() {
+        let html = r#"<a href="/threads/test.123/page-2" class="pageNav-jump pageNav-jump--next">Next</a>"#;
+        assert_eq!(find_next_page(html), Some("/threads/test.123/page-2".to_string()));
+    }
+
+    #[test]
+    fn test_find_next_page_none() {
+        let html = r#"<div>no pagination here</div>"#;
+        assert_eq!(find_next_page(html), None);
+    }
+
+    #[test]
+    fn test_extract_thread_title() {
+        let html = r#"<h1 class="p-title-value">Thread Title Here</h1>"#;
+        assert_eq!(extract_thread_title(html), Some("Thread Title Here".to_string()));
+    }
+
+    #[test]
+    fn test_url_filename() {
+        assert_eq!(
+            url_filename("https://example.com/path/to/image.jpg"),
+            Some("image.jpg".to_string())
+        );
+        assert_eq!(url_filename("https://example.com/"), None);
+    }
+
+    #[test]
+    fn test_root_for_domain() {
+        assert_eq!(root_for_domain("simpcity.cr"), "https://simpcity.cr");
+        assert_eq!(root_for_domain("nudostar.com/forum"), "https://nudostar.com/forum");
+    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -86,6 +86,17 @@ fn write_page_dump(url: &str, items: &[Message]) {
    let _ = std::fs::write(path, out);
 }

+/// Extract a usable filename from a URL path
+fn url_to_filename(url: &str) -> Option<String> {
+    let parsed = url::Url::parse(url).ok()?;
+    let path = parsed.path();
+    let segment = path.rsplit('/').next()?;
+    if segment.is_empty() || !segment.contains('.') {
+        return None;
+    }
+    urlencoding::decode(segment).ok().map(|s| s.into_owned())
+}
+
 fn render_filename(pattern: Option<&str>, index: usize, item: &Message) -> String {
    if let Some(template) = pattern {
        let ext = item.extension().unwrap_or_else(|| "bin".to_string());
@@ -1092,9 +1103,14 @@ fn main() {
            }
        }
    } else if let Some(ref browser) = args.cookies_from_browser {
-        match gallery_dl::extract_browser_cookies(browser, None) {
+        // Extract the domain from input URLs to filter browser cookies
+        let domain_filter: Option<String> = args.urls.first()
+            .and_then(|u| url::Url::parse(u).ok())
+            .and_then(|u| u.host_str().map(|h| h.to_string()));
+
+        match gallery_dl::extract_browser_cookies(browser, domain_filter.as_deref()) {
            Ok(c) => {
-                log::info!("Extracted {} cookies from browser '{}'", c.len(), browser);
+                log::info!("Extracted {} cookies from browser '{}' (domain filter: {:?})", c.len(), browser, domain_filter);
                Some(c)
            }
            Err(e) => {
@@ -1644,36 +1660,66 @@ fn main() {
                let mut metadata_by_url: HashMap<String, HashMap<String, serde_json::Value>> =
                    HashMap::new();
                
-                // Determine download directory: CLI arg > config > default
-                let download_dir = args.directory.clone()
+                // Determine base download directory: CLI arg > config > default (Pictures/gallery-dl)
+                let base_dir = args.directory.clone()
                    .or_else(|| args.destination.clone())
                    .or_else(|| config.downloader.directory.clone())
-                    .unwrap_or_else(|| PathBuf::from("."));
-                
+                    .unwrap_or_else(|| {
+                        dirs::picture_dir()
+                            .unwrap_or_else(|| PathBuf::from("."))
+                            .join("gallery-dl")
+                    });
+
+                // Extract directory metadata from the first Directory message
+                // to build subdirectory path: {category}/{title}/
+                let mut dir_category = String::new();
+                let mut dir_title = String::new();
+                for item in items.iter() {
+                    if matches!(item.kind, MessageKind::Directory) {
+                        if let Some(cat) = item.metadata.get("category") {
+                            dir_category = cat.as_str().unwrap_or("").to_string();
+                        }
+                        if let Some(title) = item.metadata.get("title") {
+                            dir_title = title.as_str().unwrap_or("").to_string();
+                        }
+                        break;
+                    }
+                }
+
+                // Build the download directory with subdirectories
+                let download_dir = if !dir_category.is_empty() || !dir_title.is_empty() {
+                    let cat = if dir_category.is_empty() { "other".to_string() } else {
+                        sanitize_filename(&dir_category, args.restrict_filenames, true)
+                    };
+                    let title = if dir_title.is_empty() { "untitled".to_string() } else {
+                        sanitize_filename(&dir_title, args.restrict_filenames, true)
+                    };
+                    base_dir.join(cat).join(title)
+                } else {
+                    base_dir.clone()
+                };
+
                for (j, item) in items.iter().enumerate() {
                    if !matches!(item.kind, MessageKind::Url | MessageKind::Queue) {
                        println!("  [{}] Skipping non-download message ({:?})", j + 1, item.kind);
                        continue;
                    }

-                    let mut template_pattern = args
-                        .rename_to
-                        .as_deref()
-                        .or(args.rename.as_deref())
-                        .or(args.filename.as_deref())
-                        .or(config.downloader.filename.as_deref());
-
-                    if template_pattern.is_none() {
-                        template_pattern = Some("{num}.{ext}");
-                    }
-
-                    // Create a simple destination path based on the URL
-                    // In a full implementation, this would use path templates
-                    let filename = render_filename(
-                        template_pattern,
-                        j,
-                        item,
-                    );
+                    // Use the extractor-provided filename, or derive from URL, or fall back to template
+                    let filename = if let Some(ref f) = item.filename {
+                        f.clone()
+                    } else if let Some(f) = url_to_filename(&item.url) {
+                        f
+                    } else {
+                        let template_pattern = args
+                            .rename_to
+                            .as_deref()
+                            .or(args.rename.as_deref())
+                            .or(args.filename.as_deref())
+                            .or(config.downloader.filename.as_deref())
+                            .unwrap_or("{num}.{ext}");
+                        render_filename(Some(template_pattern), j, item)
+                    };
                    let filename = sanitize_filename(
                        &filename,
                        args.restrict_filenames,