feat(06-03): wire cookies to extractors in main.rs
- Added cookie loading from --cookies and --cookies-from-browser CLI args - Added set_cookies() method to Extractor trait (default no-op) - Implemented set_cookies() for TwitterExtractor and InstagramExtractor - Extractors now receive cookies during initialization for authenticated requests - All 145 library tests pass
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
|
||||
use async_trait::async_trait;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::extractor::message::Message;
|
||||
@@ -116,6 +117,14 @@ pub trait Extractor: Send + Sync {
|
||||
/// messages containing URLs to download.
|
||||
async fn items(&mut self) -> Result<Vec<Message>, ExtractorError>;
|
||||
|
||||
/// Set cookies for the extractor
|
||||
///
|
||||
/// Called before extraction to provide authentication cookies.
|
||||
/// Extractors that need authentication can override this method.
|
||||
/// Default implementation does nothing (extractors that don't need
|
||||
/// cookies will simply ignore them).
|
||||
fn set_cookies(&mut self, _cookies: HashMap<String, String>) {}
|
||||
|
||||
/// Create a clone of this extractor
|
||||
///
|
||||
/// This is used by the registry to create new instances
|
||||
|
||||
@@ -388,6 +388,12 @@ impl Extractor for InstagramExtractor {
|
||||
fn clone_extractor(&self) -> Box<dyn Extractor> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
/// Set cookies for authenticated requests
|
||||
fn set_cookies(&mut self, cookies: HashMap<String, String>) {
|
||||
self.cookies = cookies;
|
||||
log::debug!("InstagramExtractor: Cookies set, session_id present: {}", self.cookies.contains_key("sessionid"));
|
||||
}
|
||||
|
||||
/// Initialize the extractor with a matched URL
|
||||
async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
|
||||
|
||||
@@ -374,6 +374,16 @@ impl Extractor for TwitterExtractor {
|
||||
fn clone_extractor(&self) -> Box<dyn Extractor> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
/// Set cookies for authenticated requests
|
||||
fn set_cookies(&mut self, cookies: HashMap<String, String>) {
|
||||
self.cookies = cookies;
|
||||
// Extract CSRF token from cookies if present
|
||||
if let Some(token) = self.cookies.get("ct0") {
|
||||
self.csrf_token = Some(token.clone());
|
||||
}
|
||||
log::debug!("TwitterExtractor: Cookies set, has auth_token: {}", self.cookies.contains_key("auth_token"));
|
||||
}
|
||||
|
||||
/// Initialize the extractor with a matched URL
|
||||
async fn initialize(&mut self, m: ExtractorMatch) -> Result<(), ExtractorError> {
|
||||
|
||||
32
src/main.rs
32
src/main.rs
@@ -51,6 +51,33 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
// Load cookies from CLI arguments
|
||||
let cookies = if let Some(cookies_file) = &args.cookies {
|
||||
match gallery_dl::load_cookies_from_file(cookies_file) {
|
||||
Ok(c) => {
|
||||
log::info!("Loaded {} cookies from {:?}", c.len(), cookies_file);
|
||||
Some(c)
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error loading cookies: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else if let Some(ref browser) = args.cookies_from_browser {
|
||||
match gallery_dl::extract_browser_cookies(browser, None) {
|
||||
Ok(c) => {
|
||||
log::info!("Extracted {} cookies from browser '{}'", c.len(), browser);
|
||||
Some(c)
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error extracting browser cookies: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Parse file filter options
|
||||
let file_filter = build_filter(&args);
|
||||
|
||||
@@ -133,6 +160,11 @@ fn main() {
|
||||
// This will clone the underlying data if there are other Arc references
|
||||
let extractor = Arc::make_mut(&mut shared_extractor);
|
||||
|
||||
// Pass cookies to the extractor if cookies are available
|
||||
if let Some(ref cookie_map) = cookies {
|
||||
extractor.set_cookies(cookie_map.clone());
|
||||
}
|
||||
|
||||
// Extract items using a blocking call
|
||||
let items: Vec<Message> = rt.block_on(async {
|
||||
// Create an ExtractorMatch with the URL
|
||||
|
||||
Reference in New Issue
Block a user