diff --git a/.github/workflows/executables.yml b/.github/workflows/executables.yml index 8a7313d0..2954d6f0 100644 --- a/.github/workflows/executables.yml +++ b/.github/workflows/executables.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: os: ["windows-latest", "ubuntu-latest", "macOS-latest"] - python-version: ["3.9"] + python-version: ["3.10"] architecture: ["x64"] include: - os: "windows-2019" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 74b6c26e..e844be4a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,12 +28,16 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies + env: + PYV: ${{ matrix.python-version }} run: | pip install -r requirements.txt + pip install flake8 + pip install youtube-dl + if [[ "$PYV" != "3.4" && "$PYV" != "3.5" ]]; then pip install yt-dlp; fi - name: Lint with flake8 run: | - pip install flake8 flake8 . - name: Run tests diff --git a/CHANGELOG.md b/CHANGELOG.md index 994d5f0e..f41bec99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,69 @@ # Changelog +## 1.22.0 - 2022-05-25 +### Additions +- [gelbooru_v01] add `favorite` extractor ([#2546](https://github.com/mikf/gallery-dl/issues/2546)) +- [Instagram] add `tagged_users` to keywords for stories ([#2582](https://github.com/mikf/gallery-dl/issues/2582), [#2584](https://github.com/mikf/gallery-dl/issues/2584)) +- [lolisafe] implement `domain` option ([#2575](https://github.com/mikf/gallery-dl/issues/2575)) +- [naverwebtoon] support (best)challenge comics ([#2542](https://github.com/mikf/gallery-dl/issues/2542)) +- [nijie] support /history_nuita.php listings ([#2541](https://github.com/mikf/gallery-dl/issues/2541)) +- [pixiv] provide more data when `metadata` is enabled ([#2594](https://github.com/mikf/gallery-dl/issues/2594)) +- [shopify] support several more sites by default ([#2089](https://github.com/mikf/gallery-dl/issues/2089)) +- [twitter] extract alt texts as `description` ([#2617](https://github.com/mikf/gallery-dl/issues/2617)) +- [twitter] recognize vxtwitter URLs ([#2621](https://github.com/mikf/gallery-dl/issues/2621)) +- [weasyl] implement `metadata` option ([#2610](https://github.com/mikf/gallery-dl/issues/2610)) +- implement `--cookies-from-browser` ([#1606](https://github.com/mikf/gallery-dl/issues/1606)) +- implement `output.colors` options ([#2532](https://github.com/mikf/gallery-dl/issues/2532)) +- implement string literals in replacement fields +- support using extended format strings for archive keys +### Changes +- [foolfuuka] match 4chan filenames ([#2577](https://github.com/mikf/gallery-dl/issues/2577)) +- [pixiv] implement `include` option + - provide `avatar`/`background` downloads as separate extractors ([#2495](https://github.com/mikf/gallery-dl/issues/2495)) +- [twitter] use a better strategy for user URLs +- [twitter] disable `cards` by default +- delay directory creation ([#2461](https://github.com/mikf/gallery-dl/issues/2461), [#2474](https://github.com/mikf/gallery-dl/issues/2474)) +- flush writes to stdout/stderr ([#2529](https://github.com/mikf/gallery-dl/issues/2529)) +- build executables on GitHub Actions with Python 3.10 +### Fixes +- [artstation] use `"browser": "firefox"` by default ([#2527](https://github.com/mikf/gallery-dl/issues/2527)) +- [imgur] prevent exception with empty albums ([#2557](https://github.com/mikf/gallery-dl/issues/2557)) +- [instagram] report redirects to captcha challenges ([#2543](https://github.com/mikf/gallery-dl/issues/2543)) +- [khinsider] fix metadata extraction ([#2611](https://github.com/mikf/gallery-dl/issues/2611)) +- [mangafox] send Referer headers ([#2592](https://github.com/mikf/gallery-dl/issues/2592)) +- [mangahere] send Referer headers ([#2592](https://github.com/mikf/gallery-dl/issues/2592)) +- [mangasee] use randomly generated PHPSESSID cookie ([#2560](https://github.com/mikf/gallery-dl/issues/2560)) +- [pixiv] make retrieving ugoira metadata non-fatal ([#2562](https://github.com/mikf/gallery-dl/issues/2562)) +- [readcomiconline] update deobfuscation code ([#2481](https://github.com/mikf/gallery-dl/issues/2481)) +- [realbooru] fix extraction ([#2530](https://github.com/mikf/gallery-dl/issues/2530)) +- [vk] handle photos without width/height info ([#2535](https://github.com/mikf/gallery-dl/issues/2535)) +- [vk] fix user ID extraction ([#2535](https://github.com/mikf/gallery-dl/issues/2535)) +- [webtoons] extract real episode numbers ([#2591](https://github.com/mikf/gallery-dl/issues/2591)) +- create missing directories for archive files ([#2597](https://github.com/mikf/gallery-dl/issues/2597)) +- detect circular references with `-K` ([#2609](https://github.com/mikf/gallery-dl/issues/2609)) +- replace "\f" in `--filename` arguments with a form feed character ([#2396](https://github.com/mikf/gallery-dl/issues/2396)) +### Removals +- [gelbooru_v01] remove tlb.booru.org from supported domains + +## 1.21.2 - 2022-04-27 +### Additions +- [deviantart] implement `pagination` option ([#2488](https://github.com/mikf/gallery-dl/issues/2488)) +- [pixiv] implement `background` option ([#623](https://github.com/mikf/gallery-dl/issues/623), [#1124](https://github.com/mikf/gallery-dl/issues/1124), [#2495](https://github.com/mikf/gallery-dl/issues/2495)) +- [postprocessor:ugoira] report ffmpeg/mkvmerge errors ([#2487](https://github.com/mikf/gallery-dl/issues/2487)) +### Fixes +- [cyberdrop] match cyberdrop.to URLs ([#2496](https://github.com/mikf/gallery-dl/issues/2496)) +- [e621] fix 403 errors ([#2533](https://github.com/mikf/gallery-dl/issues/2533)) +- [issuu] fix extraction ([#2483](https://github.com/mikf/gallery-dl/issues/2483)) +- [mangadex] download from available chapters despite `externalUrl` ([#2503](https://github.com/mikf/gallery-dl/issues/2503)) +- [photovogue] update domain and api endpoint ([#2494](https://github.com/mikf/gallery-dl/issues/2494)) +- [sexcom] add fallback for empty files ([#2485](https://github.com/mikf/gallery-dl/issues/2485)) +- [twitter] improve syndication video selection ([#2354](https://github.com/mikf/gallery-dl/issues/2354)) +- [twitter] fix various syndication issues ([#2499](https://github.com/mikf/gallery-dl/issues/2499), [#2354](https://github.com/mikf/gallery-dl/issues/2354)) +- [vk] fix extraction ([#2512](https://github.com/mikf/gallery-dl/issues/2512)) +- [weibo] fix infinite retries for deleted accounts ([#2521](https://github.com/mikf/gallery-dl/issues/2521)) +- [postprocessor:ugoira] use compatible paths with mkvmerge ([#2487](https://github.com/mikf/gallery-dl/issues/2487)) +- [postprocessor:ugoira] do not auto-select the `image2` demuxer ([#2492](https://github.com/mikf/gallery-dl/issues/2492)) + ## 1.21.1 - 2022-04-08 ### Additions - [gofile] add gofile.io extractor ([#2364](https://github.com/mikf/gallery-dl/issues/2364)) diff --git a/README.rst b/README.rst index 4359c990..e5337030 100644 --- a/README.rst +++ b/README.rst @@ -65,8 +65,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/docs/configuration.rst b/docs/configuration.rst index 9cb2b7f9..4dc89eb3 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -399,16 +399,19 @@ Description extractor.*.cookies ------------------- Type - |Path|_ or ``object`` + |Path|_ or ``object`` or ``list`` Default ``null`` Description - Source to read additional cookies from. Either as + Source to read additional cookies from. This can be - * the |Path|_ to a Mozilla/Netscape format cookies.txt file or - * a JSON ``object`` specifying cookies as a name-to-value mapping + * The |Path|_ to a Mozilla/Netscape format cookies.txt file - Example: + .. code:: json + + "~/.local/share/cookies-instagram-com.txt" + + * An ``object`` specifying cookies as name-value pairs .. code:: json @@ -418,6 +421,17 @@ Description "isAdult" : "1" } + * A ``list`` with up to 3 entries specifying a browser profile. + + * The first entry is the browser name + * The optional second entry is a profile name or an absolote path to a profile directory + * The optional third entry is the keyring to retrieve passwords for decrypting cookies from + + .. code:: json + + ["firefox"] + ["chromium", "Private", "kwallet"] + extractor.*.cookies-update -------------------------- @@ -613,9 +627,7 @@ Type Example ``"{id}_{offset}"`` Description - An alternative `format string`__ to build archive IDs with. - -.. __: https://docs.python.org/3/library/string.html#format-string-syntax + An alternative `format string`_ to build archive IDs with. extractor.*.archive-prefix @@ -865,6 +877,21 @@ Description Download embedded videos hosted on https://www.blogger.com/ +extractor.cyberdrop.domain +-------------------------- +Type + ``string`` +Default + ``"auto"`` +Example + ``"cyberdrop.to"`` +Description + Specifies the domain used by ``cyberdrop`` regardless of input URL. + + Setting this option to ``"auto"`` + uses the same domain as a given input URL. + + extractor.danbooru.external --------------------------- Type @@ -1077,6 +1104,19 @@ Description everything else (archives, etc.). +extractor.deviantart.pagination +------------------------------- +Type + ``string`` +Default + ``"api"`` +Description + Controls when to stop paginating over API results. + + * ``"api"``: Trust the API and stop when ``has_more`` is ``false``. + * ``"manual"``: Disregard ``has_more`` and only stop when a batch of results is empty. + + extractor.deviantart.refresh-token ---------------------------------- Type @@ -1506,6 +1546,20 @@ Description the first in the list gets chosen (usually `mp3`). +extractor.lolisafe.domain +------------------------- +Type + ``string`` +Default + ``"auto"`` +Description + Specifies the domain used by a ``lolisafe`` extractor + regardless of input URL. + + Setting this option to ``"auto"`` + uses the same domain as a given input URL. + + extractor.luscious.gif ---------------------- Type @@ -1637,7 +1691,7 @@ Description extractor.nijie.include ----------------------------- +----------------------- Type ``string`` or ``list`` of ``strings`` Default @@ -1647,7 +1701,7 @@ Description when processing a user profile. Possible values are - ``"illustration"``, ``"doujin"``, ``"favorite"``. + ``"illustration"``, ``"doujin"``, ``"favorite"``, ``"nuita"``. You can use ``"all"`` instead of listing all values separately. @@ -1765,18 +1819,28 @@ Description Download from video pins. -extractor.pixiv.user.avatar ---------------------------- +extractor.pixiv.include +----------------------- Type - ``bool`` + * ``string`` + * ``list`` of ``strings`` Default - ``false`` + ``"artworks"`` +Example + * ``"avatar,background,artworks"`` + * ``["avatar", "background", "artworks"]`` Description - Download user avatars. + A (comma-separated) list of subcategories to include + when processing a user profile. + + Possible values are + ``"artworks"``, ``"avatar"``, ``"background"``, ``"favorite"``. + + It is possible to use ``"all"`` instead of listing all values separately. -extractor.pixiv.user.metadata ------------------------------ +extractor.pixiv.artworks.metadata +--------------------------------- Type ``bool`` Default @@ -1874,6 +1938,19 @@ Description * ``"wait``: Ask the user to solve the CAPTCHA and wait. +extractor.readcomiconline.quality +--------------------------------- +Type + ``string`` +Default + ``"auto"`` +Description + Sets the ``quality`` query parameter of issue pages. (``"lq"`` or ``"hq"``) + + ``"auto"`` uses the quality parameter of the input URL + or ``"hq"`` if not present. + + extractor.reddit.comments ------------------------- Type @@ -2147,7 +2224,7 @@ extractor.twitter.cards Type ``bool`` or ``string`` Default - ``true`` + ``false`` Description Controls how to handle `Twitter Cards `__. @@ -2289,6 +2366,7 @@ Description Special values: * ``"timeline"``: ``https://twitter.com/i/user/{rest_id}`` + * ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets`` * ``"media"``: ``https://twitter.com/id:{rest_id}/media`` Note: To allow gallery-dl to follow custom URL formats, set the blacklist__ @@ -2358,6 +2436,20 @@ Description to use your account's browsing settings and filters. +extractor.weasyl.metadata +------------------------- +Type + ``bool`` +Default + ``false`` +Description + | Fetch extra submission metadata during gallery downloads. + | (``comments``, ``description``, ``favorites``, ``folder_name``, + ``tags``, ``views``) + + Note: This requires 1 additional HTTP request per submission. + + extractor.weibo.retweets ------------------------ Type @@ -2843,6 +2935,19 @@ Description with a display width greater than 1. +output.colors +------------- +Type + ``object`` +Default + ``{"success": "1;32", "skip": "2"}`` +Description + Controls the `ANSI colors `__ + used with |mode: color|__ for successfully downloaded or skipped files. + +.. __: `output.mode`_ + + output.skip ----------- Type @@ -3275,12 +3380,11 @@ Default Description FFmpeg demuxer to read and process input files with. Possible values are - * "`concat `_" (inaccurate frame timecodes) - * "`image2 `_" (accurate timecodes, not usable on Windows) + * "`concat `_" (inaccurate frame timecodes for non-uniform frame delays) + * "`image2 `_" (accurate timecodes, requires nanosecond file timestamps, i.e. no Windows or macOS) * "mkvmerge" (accurate timecodes, only WebM or MKV, requires `mkvmerge `__) - `"auto"` will select `mkvmerge` if possible and fall back to `image2` or - `concat` depending on the local operating system. + `"auto"` will select `mkvmerge` if available and fall back to `concat` otherwise. ugoira.ffmpeg-location @@ -3791,6 +3895,7 @@ Description .. |Postprocessor Configuration| replace:: ``Postprocessor Configuration`` .. |strptime| replace:: strftime() and strptime() Behavior .. |postprocessors| replace:: ``postprocessors`` +.. |mode: color| replace:: ``"mode": "color"`` .. _base-directory: `extractor.*.base-directory`_ .. _date-format: `extractor.*.date-format`_ diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 063ce91a..9e1eb4d2 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -305,9 +305,13 @@ "mode": "auto", "progress": true, "shorten": true, + "colors": { + "success": "1;32", + "skip" : "2" + }, "skip": true, "log": "[{name}][{levelname}] {message}", "logfile": null, "unsupportedfile": null } -} \ No newline at end of file +} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ea1c7f8a..2a8f0b57 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -538,7 +538,7 @@ Consider all sites to be NSFW unless otherwise known. nijie https://nijie.info/ - Doujin, Favorites, Illustrations, individual Images, User Profiles + Doujin, Favorites, Illustrations, individual Images, Nuita History, User Profiles Required @@ -567,7 +567,7 @@ Consider all sites to be NSFW unless otherwise known. PhotoVogue - https://www.vogue.it/en/photovogue/ + https://www.vogue.com/photovogue/ User Profiles @@ -604,7 +604,7 @@ Consider all sites to be NSFW unless otherwise known. Pixiv https://www.pixiv.net/ - Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images + Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images OAuth @@ -930,37 +930,31 @@ Consider all sites to be NSFW unless otherwise known. The /co/llection https://the-collection.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches Illusion Game Cards https://illusioncards.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches All girl https://allgirl.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches Draw Friends https://drawfriends.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches /v/idyart https://vidyart.booru.org/ - Posts, Tag Searches - - - - The Loud Booru - https://tlb.booru.org/ - Posts, Tag Searches + Favorites, Posts, Tag Searches @@ -1187,12 +1181,36 @@ Consider all sites to be NSFW unless otherwise known. Shopify Instances + + Chelseacrew + https://chelseacrew.com/ + Collections, Products + + Fashion Nova https://www.fashionnova.com/ Collections, Products + + Loungeunderwear + https://loungeunderwear.com/ + Collections, Products + + + + Michaelscameras + https://michaels.com.au/ + Collections, Products + + + + Modcloth + https://modcloth.com/ + Collections, Products + + Omg Miami Swimwear https://www.omgmiamiswimwear.com/ @@ -1200,14 +1218,26 @@ Consider all sites to be NSFW unless otherwise known. - Windsorstore - https://www.windsorstore.com/ + Pinupgirlclothing + https://pinupgirlclothing.com/ Collections, Products - Loungeunderwear - https://loungeunderwear.com/ + Raidlondon + https://www.raidlondon.com/ + Collections, Products + + + + Unique-vintage + https://www.unique-vintage.com/ + Collections, Products + + + + Windsorstore + https://www.windsorstore.com/ Collections, Products diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 72e2ca0a..082932c8 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -22,10 +22,13 @@ __version__ = version.__version__ def progress(urls, pformat): """Wrapper around urls to output a simple progress indicator""" if pformat is True: - pformat = "[{current}/{total}] {url}" + pformat = "[{current}/{total}] {url}\n" + else: + pformat += "\n" + pinfo = {"total": len(urls)} for pinfo["current"], pinfo["url"] in enumerate(urls, 1): - print(pformat.format_map(pinfo), file=sys.stderr) + output.stderr_write(pformat.format_map(pinfo)) yield pinfo["url"] @@ -119,9 +122,12 @@ def main(): if args.yamlfiles: config.load(args.yamlfiles, strict=True, fmt="yaml") if args.filename: - if args.filename == "/O": - args.filename = "{filename}.{extension}" - config.set("__global__", "filename", args.filename) + filename = args.filename + if filename == "/O": + filename = "{filename}.{extension}" + elif filename.startswith("\\f"): + filename = "\f" + filename[2:] + config.set("__global__", "filename", filename) if args.directory: config.set("__global__", "base-directory", args.directory) config.set("__global__", "directory", ()) @@ -131,6 +137,10 @@ def main(): config.set("__global__", "skip", f"abort:{args.abort}") if args.terminate: config.set("__global__", "skip", f"terminate:{args.terminate}") + if args.cookies_from_browser: + browser, _, profile = args.cookies_from_browser.partition(":") + browser, _, keyring = browser.partition("+") + config.set("__global__", "cookies", (browser, profile, keyring)) for opts in args.options: config.set(*opts) @@ -192,20 +202,23 @@ def main(): pass if args.list_modules: - for module_name in extractor.modules: - print(module_name) + extractor.modules.append("") + sys.stdout.write("\n".join(extractor.modules)) + elif args.list_extractors: + write = sys.stdout.write + fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format + for extr in extractor.extractors(): if not extr.__doc__: continue - print(extr.__name__) - print(extr.__doc__) - print("Category:", extr.category, - "- Subcategory:", extr.subcategory) test = next(extr._get_tests(), None) - if test: - print("Example :", test[0]) - print() + write(fmt( + extr.__name__, extr.__doc__, + extr.category, extr.subcategory, + "\nExample : " + test[0] if test else "", + )) + elif args.clear_cache: from . import cache log = logging.getLogger("cache") diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py new file mode 100644 index 00000000..22cb0528 --- /dev/null +++ b/gallery_dl/aes.py @@ -0,0 +1,641 @@ +# -*- coding: utf-8 -*- + +# This is a slightly modified version of yt-dlp's aes module. +# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/aes.py + +import struct +import binascii +from math import ceil + +try: + from Cryptodome.Cipher import AES as Cryptodome_AES +except ImportError: + try: + from Crypto.Cipher import AES as Cryptodome_AES + except ImportError: + Cryptodome_AES = None + + +if Cryptodome_AES: + def aes_cbc_decrypt_bytes(data, key, iv): + """Decrypt bytes with AES-CBC using pycryptodome""" + return Cryptodome_AES.new( + key, Cryptodome_AES.MODE_CBC, iv).decrypt(data) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """Decrypt bytes with AES-GCM using pycryptodome""" + return Cryptodome_AES.new( + key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) +else: + def aes_cbc_decrypt_bytes(data, key, iv): + """Decrypt bytes with AES-CBC using native implementation""" + return intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(data), + bytes_to_intlist(key), + bytes_to_intlist(iv), + )) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """Decrypt bytes with AES-GCM using native implementation""" + return intlist_to_bytes(aes_gcm_decrypt_and_verify( + bytes_to_intlist(data), + bytes_to_intlist(key), + bytes_to_intlist(tag), + bytes_to_intlist(nonce), + )) + + +bytes_to_intlist = list + + +def intlist_to_bytes(xs): + if not xs: + return b"" + return struct.pack("%dB" % len(xs), *xs) + + +def unpad_pkcs7(data): + return data[:-data[-1]] + + +BLOCK_SIZE_BYTES = 16 + + +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ctr_decrypt(data, key, iv): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} decrypted data + """ + return aes_ctr_encrypt(data, key, iv) + + +def aes_ctr_encrypt(data, key, iv): + """ + Encrypt with aes in counter mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + counter = iter_vector(iv) + + encrypted_data = [] + for i in range(block_count): + counter_block = next(counter) + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + encrypted_data += xor(block, cipher_counter_block) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_cbc_decrypt(data, key, iv): + """ + Decrypt with aes in CBC mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + decrypted_block = aes_decrypt(block, expanded_key) + decrypted_data += xor(decrypted_block, previous_cipher_block) + previous_cipher_block = block + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + + +def aes_cbc_encrypt(data, key, iv): + """ + Encrypt with aes in CBC mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + remaining_length = BLOCK_SIZE_BYTES - len(block) + block += [remaining_length] * remaining_length + mixed_block = xor(block, previous_cipher_block) + + encrypted_block = aes_encrypt(mixed_block, expanded_key) + encrypted_data += encrypted_block + + previous_cipher_block = encrypted_block + + return encrypted_data + + +def aes_gcm_decrypt_and_verify(data, key, tag, nonce): + """ + Decrypt with aes in GBM mode and checks authenticity using tag + + @param {int[]} data cipher + @param {int[]} key 16-Byte cipher key + @param {int[]} tag authentication tag + @param {int[]} nonce IV (recommended 12-Byte) + @returns {int[]} decrypted data + """ + + # XXX: check aes, gcm param + + hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) + + if len(nonce) == 12: + j0 = nonce + [0, 0, 0, 1] + else: + fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % \ + BLOCK_SIZE_BYTES + 8 + ghash_in = nonce + [0] * fill + bytes_to_intlist( + (8 * len(nonce)).to_bytes(8, "big")) + j0 = ghash(hash_subkey, ghash_in) + + # TODO: add nonce support to aes_ctr_decrypt + + # nonce_ctr = j0[:12] + iv_ctr = inc(j0) + + decrypted_data = aes_ctr_decrypt( + data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) + + pad_len = len(data) // 16 * 16 + s_tag = ghash( + hash_subkey, + data + + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) + # pad + bytes_to_intlist( + (0 * 8).to_bytes(8, "big") + # length of associated data + ((len(data) * 8).to_bytes(8, "big")) # length of data + ) + ) + + if tag != aes_ctr_encrypt(s_tag, key, j0): + raise ValueError("Mismatching authentication tag") + + return decrypted_data + + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds + 1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) + data = xor(data, expanded_key[ + i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt(data, expanded_key): + """ + Decrypt one block with aes + + @param {int[]} data 16-Byte cipher + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte state + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + for i in range(rounds, 0, -1): + data = xor(data, expanded_key[ + i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) + data = shift_rows_inv(data) + data = sub_bytes_inv(data) + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' + (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, + 24 for 192-Bit, or + 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = bytes_to_intlist(binascii.a2b_base64(data)) + password = bytes_to_intlist(password.encode("utf-8")) + + key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * \ + (key_size_bytes // BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + return intlist_to_bytes(aes_ctr_decrypt( + cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) + )) + + +RCON = ( + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, +) + +SBOX = ( + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, + 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, + 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, + 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, + 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, + 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, + 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, + 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, + 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, + 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, + 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, + 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, +) + +SBOX_INV = ( + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +) + +MIX_COLUMN_MATRIX = ( + (0x2, 0x3, 0x1, 0x1), + (0x1, 0x2, 0x3, 0x1), + (0x1, 0x1, 0x2, 0x3), + (0x3, 0x1, 0x1, 0x2), +) + +MIX_COLUMN_MATRIX_INV = ( + (0xE, 0xB, 0xD, 0x9), + (0x9, 0xE, 0xB, 0xD), + (0xD, 0x9, 0xE, 0xB), + (0xB, 0xD, 0x9, 0xE), +) + +RIJNDAEL_EXP_TABLE = ( + 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, + 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, + 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, + 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, + 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, + 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, + 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, + 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, + 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, + 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, + 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, + 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, + 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, + 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, + 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, + 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, + 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, + 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, + 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, + 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, + 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, + 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, + 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, + 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, + 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, + 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, + 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, + 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, + 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, + 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, + 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, + 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01, +) + +RIJNDAEL_LOG_TABLE = ( + 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, + 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, + 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, + 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, + 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, + 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, + 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, + 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, + 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, + 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, + 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, + 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, + 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, + 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, + 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, + 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, + 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, + 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, + 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, + 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, + 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, + 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, + 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, + 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, + 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, + 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, + 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, + 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, + 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, + 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, + 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, + 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07, +) + + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else + 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + + +def iter_vector(iv): + while True: + yield iv + iv = inc(iv) + + +def sub_bytes(data): + return [SBOX[x] for x in data] + + +def sub_bytes_inv(data): + return [SBOX_INV[x] for x in data] + + +def rotate(data): + return data[1:] + [data[0]] + + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + + +def xor(data1, data2): + return [x ^ y for x, y in zip(data1, data2)] + + +def iter_mix_columns(data, matrix): + for i in (0, 4, 8, 12): + for row in matrix: + mixed = 0 + for j in range(4): + if data[i:i + 4][j] == 0 or row[j] == 0: + mixed ^= 0 + else: + mixed ^= RIJNDAEL_EXP_TABLE[ + (RIJNDAEL_LOG_TABLE[data[i + j]] + + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF + ] + yield mixed + + +def shift_rows(data): + return [ + data[((column + row) & 0b11) * 4 + row] + for column in range(4) + for row in range(4) + ] + + +def shift_rows_inv(data): + return [ + data[((column - row) & 0b11) * 4 + row] + for column in range(4) + for row in range(4) + ] + + +def shift_block(data): + data_shifted = [] + + bit = 0 + for n in data: + if bit: + n |= 0x100 + bit = n & 1 + n >>= 1 + data_shifted.append(n) + + return data_shifted + + +def inc(data): + data = data[:] # copy + for i in range(len(data) - 1, -1, -1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data + + +def block_product(block_x, block_y): + # NIST SP 800-38D, Algorithm 1 + + if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: + raise ValueError( + "Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + + block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) + block_v = block_y[:] + block_z = [0] * BLOCK_SIZE_BYTES + + for i in block_x: + for bit in range(7, -1, -1): + if i & (1 << bit): + block_z = xor(block_z, block_v) + + do_xor = block_v[-1] & 1 + block_v = shift_block(block_v) + if do_xor: + block_v = xor(block_v, block_r) + + return block_z + + +def ghash(subkey, data): + # NIST SP 800-38D, Algorithm 2 + + if len(data) % BLOCK_SIZE_BYTES: + raise ValueError( + "Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + + last_y = [0] * BLOCK_SIZE_BYTES + for i in range(0, len(data), BLOCK_SIZE_BYTES): + block = data[i: i + BLOCK_SIZE_BYTES] + last_y = block_product(xor(last_y, block), subkey) + + return last_y diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py new file mode 100644 index 00000000..b173a308 --- /dev/null +++ b/gallery_dl/cookies.py @@ -0,0 +1,956 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +# Adapted from yt-dlp's cookies module. +# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/cookies.py + +import binascii +import contextlib +import ctypes +import json +import logging +import os +import shutil +import sqlite3 +import struct +import subprocess +import sys +import tempfile +from datetime import datetime, timedelta, timezone +from hashlib import pbkdf2_hmac +from http.cookiejar import Cookie +from . import aes + + +SUPPORTED_BROWSERS_CHROMIUM = { + "brave", "chrome", "chromium", "edge", "opera", "vivaldi"} +SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"} + +logger = logging.getLogger("cookies") + + +def load_cookies(cookiejar, browser_specification): + browser_name, profile, keyring = \ + _parse_browser_specification(*browser_specification) + + if browser_name == "firefox": + load_cookies_firefox(cookiejar, profile) + elif browser_name == "safari": + load_cookies_safari(cookiejar, profile) + elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: + load_cookies_chrome(cookiejar, browser_name, profile, keyring) + else: + raise ValueError("unknown browser '{}'".format(browser_name)) + + +def load_cookies_firefox(cookiejar, profile=None): + set_cookie = cookiejar.set_cookie + with _firefox_cookies_database(profile) as db: + for name, value, domain, path, secure, expires in db.execute( + "SELECT name, value, host, path, isSecure, expiry " + "FROM moz_cookies"): + set_cookie(Cookie( + 0, name, value, None, False, + domain, bool(domain), domain.startswith("."), + path, bool(path), secure, expires, False, None, None, {}, + )) + + +def load_cookies_safari(cookiejar, profile=None): + """Ref.: https://github.com/libyal/dtformats/blob + /main/documentation/Safari%20Cookies.asciidoc + - This data appears to be out of date + but the important parts of the database structure is the same + - There are a few bytes here and there + which are skipped during parsing + """ + with _safari_cookies_database() as fp: + data = fp.read() + page_sizes, body_start = _safari_parse_cookies_header(data) + p = DataParser(data[body_start:]) + for page_size in page_sizes: + _safari_parse_cookies_page(p.read_bytes(page_size), cookiejar) + + +def load_cookies_chrome(cookiejar, browser_name, profile, keyring): + config = _get_chromium_based_browser_settings(browser_name) + + with _chrome_cookies_database(profile, config) as db: + + db.text_factory = bytes + decryptor = get_cookie_decryptor( + config["directory"], config["keyring"], keyring=keyring) + + try: + rows = db.execute( + "SELECT host_key, name, value, encrypted_value, path, " + "expires_utc, is_secure FROM cookies") + except sqlite3.OperationalError: + rows = db.execute( + "SELECT host_key, name, value, encrypted_value, path, " + "expires_utc, secure FROM cookies") + + set_cookie = cookiejar.set_cookie + failed_cookies = unencrypted_cookies = 0 + + for domain, name, value, enc_value, path, expires, secure in rows: + + if not value and enc_value: # encrypted + value = decryptor.decrypt(enc_value) + if value is None: + failed_cookies += 1 + continue + else: + value = value.decode() + unencrypted_cookies += 1 + + domain = domain.decode() + path = path.decode() + name = name.decode() + + set_cookie(Cookie( + 0, name, value, None, False, + domain, bool(domain), domain.startswith("."), + path, bool(path), secure, expires, False, None, None, {}, + )) + + if failed_cookies > 0: + failed_message = " ({} could not be decrypted)".format(failed_cookies) + else: + failed_message = "" + + logger.info("Extracted %s cookies from %s%s", + len(cookiejar), browser_name, failed_message) + counts = decryptor.cookie_counts.copy() + counts["unencrypted"] = unencrypted_cookies + logger.debug("cookie version breakdown: %s", counts) + + +# -------------------------------------------------------------------- +# firefox + +def _firefox_cookies_database(profile=None): + if profile is None: + search_root = _firefox_browser_directory() + elif _is_path(profile): + search_root = profile + else: + search_root = os.path.join(_firefox_browser_directory(), profile) + + path = _find_most_recently_used_file(search_root, "cookies.sqlite") + if path is None: + raise FileNotFoundError("Unable to find Firefox cookies database in " + "{}".format(search_root)) + + logger.debug("Extracting cookies from %s", path) + return DatabaseCopy(path) + + +def _firefox_browser_directory(): + if sys.platform in ("linux", "linux2"): + return os.path.expanduser("~/.mozilla/firefox") + if sys.platform == "win32": + return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles") + if sys.platform == "darwin": + return os.path.expanduser("~/Library/Application Support/Firefox") + raise ValueError("unsupported platform '{}'".format(sys.platform)) + + +# -------------------------------------------------------------------- +# safari + +def _safari_cookies_database(): + try: + path = os.path.expanduser("~/Library/Cookies/Cookies.binarycookies") + return open(path, "rb") + except FileNotFoundError: + logger.debug("Trying secondary cookie location") + path = os.path.expanduser("~/Library/Containers/com.apple.Safari/Data" + "/Library/Cookies/Cookies.binarycookies") + return open(path, "rb") + + +def _safari_parse_cookies_header(data): + p = DataParser(data) + p.expect_bytes(b"cook", "database signature") + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) + for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _safari_parse_cookies_page(data, jar): + p = DataParser(data) + p.expect_bytes(b"\x00\x00\x01\x00", "page signature") + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug("a cookies page of size %s has no cookies", len(data)) + return + + p.skip_to(record_offsets[0], "unknown page header field") + + for i, record_offset in enumerate(record_offsets): + p.skip_to(record_offset, "space between records") + record_length = _safari_parse_cookies_record( + data[record_offset:], jar) + p.read_bytes(record_length) + p.skip_to_end("space in between pages") + + +def _safari_parse_cookies_record(data, cookiejar): + p = DataParser(data) + record_size = p.read_uint() + p.skip(4, "unknown record field 1") + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, "unknown record field 2") + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, "unknown record field 3") + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + logger.warning("failed to parse Safari cookie " + "because UTF-8 decoding failed") + return record_size + + p.skip_to(record_size, "space at the end of the record") + + cookiejar.set_cookie(Cookie( + 0, name, value, None, False, + domain, bool(domain), domain.startswith('.'), + path, bool(path), is_secure, expiration_date, False, + None, None, {}, + )) + + return record_size + + +# -------------------------------------------------------------------- +# chrome + +def _chrome_cookies_database(profile, config): + if profile is None: + search_root = config["directory"] + elif _is_path(profile): + search_root = profile + config["directory"] = (os.path.dirname(profile) + if config["profiles"] else profile) + elif config["profiles"]: + search_root = os.path.join(config["directory"], profile) + else: + logger.warning("%s does not support profiles", config["browser"]) + search_root = config["directory"] + + path = _find_most_recently_used_file(search_root, "Cookies") + if path is None: + raise FileNotFoundError("Unable tp find {} cookies database in " + "'{}'".format(config["browser"], search_root)) + + logger.debug("Extracting cookies from %s", path) + return DatabaseCopy(path) + + +def _get_chromium_based_browser_settings(browser_name): + # https://chromium.googlesource.com/chromium + # /src/+/HEAD/docs/user_data_dir.md + join = os.path.join + + if sys.platform in ("linux", "linux2"): + config = (os.environ.get("XDG_CONFIG_HOME") or + os.path.expanduser("~/.config")) + + browser_dir = { + "brave" : join(config, "BraveSoftware/Brave-Browser"), + "chrome" : join(config, "google-chrome"), + "chromium": join(config, "chromium"), + "edge" : join(config, "microsoft-edge"), + "opera" : join(config, "opera"), + "vivaldi" : join(config, "vivaldi"), + }[browser_name] + + elif sys.platform == "win32": + appdata_local = os.path.expandvars("%LOCALAPPDATA%") + appdata_roaming = os.path.expandvars("%APPDATA%") + browser_dir = { + "brave" : join(appdata_local, + R"BraveSoftware\Brave-Browser\User Data"), + "chrome" : join(appdata_local, R"Google\Chrome\User Data"), + "chromium": join(appdata_local, R"Chromium\User Data"), + "edge" : join(appdata_local, R"Microsoft\Edge\User Data"), + "opera" : join(appdata_roaming, R"Opera Software\Opera Stable"), + "vivaldi" : join(appdata_local, R"Vivaldi\User Data"), + }[browser_name] + + elif sys.platform == "darwin": + appdata = os.path.expanduser("~/Library/Application Support") + browser_dir = { + "brave" : join(appdata, "BraveSoftware/Brave-Browser"), + "chrome" : join(appdata, "Google/Chrome"), + "chromium": join(appdata, "Chromium"), + "edge" : join(appdata, "Microsoft Edge"), + "opera" : join(appdata, "com.operasoftware.Opera"), + "vivaldi" : join(appdata, "Vivaldi"), + }[browser_name] + + else: + raise ValueError("unsupported platform '{}'".format(sys.platform)) + + # Linux keyring names can be determined by snooping on dbus + # while opening the browser in KDE: + # dbus-monitor "interface="org.kde.KWallet"" "type=method_return" + keyring_name = { + "brave" : "Brave", + "chrome" : "Chrome", + "chromium": "Chromium", + "edge" : "Microsoft Edge" if sys.platform == "darwin" else + "Chromium", + "opera" : "Opera" if sys.platform == "darwin" else "Chromium", + "vivaldi" : "Vivaldi" if sys.platform == "darwin" else "Chrome", + }[browser_name] + + browsers_without_profiles = {"opera"} + + return { + "browser" : browser_name, + "directory": browser_dir, + "keyring" : keyring_name, + "profiles" : browser_name not in browsers_without_profiles + } + + +class ChromeCookieDecryptor: + """ + Overview: + + Linux: + - cookies are either v10 or v11 + - v10: AES-CBC encrypted with a fixed key + - v11: AES-CBC encrypted with an OS protected key (keyring) + - v11 keys can be stored in various places depending on the + activate desktop environment [2] + + Mac: + - cookies are either v10 or not v10 + - v10: AES-CBC encrypted with an OS protected key (keyring) + and more key derivation iterations than linux + - not v10: "old data" stored as plaintext + + Windows: + - cookies are either v10 or not v10 + - v10: AES-GCM encrypted with a key which is encrypted with DPAPI + - not v10: encrypted with DPAPI + + Sources: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads + /main/components/os_crypt/ + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads + /main/components/os_crypt/key_storage_linux.cc + - KeyStorageLinux::CreateService + """ + + def decrypt(self, encrypted_value): + raise NotImplementedError("Must be implemented by sub classes") + + @property + def cookie_counts(self): + raise NotImplementedError("Must be implemented by sub classes") + + +def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None): + if sys.platform in ("linux", "linux2"): + return LinuxChromeCookieDecryptor( + browser_keyring_name, keyring=keyring) + elif sys.platform == "darwin": + return MacChromeCookieDecryptor(browser_keyring_name) + elif sys.platform == "win32": + return WindowsChromeCookieDecryptor(browser_root) + else: + raise NotImplementedError("Chrome cookie decryption is not supported " + "on {}".format(sys.platform)) + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, *, keyring=None): + self._v10_key = self.derive_key(b"peanuts") + password = _get_linux_keyring_password(browser_keyring_name, keyring) + self._v11_key = None if password is None else self.derive_key(password) + self._cookie_counts = {"v10": 0, "v11": 0, "other": 0} + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads + # /main/components/os_crypt/os_crypt_linux.cc + return pbkdf2_sha1(password, salt=b"saltysalt", + iterations=1, key_length=16) + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b"v10": + self._cookie_counts["v10"] += 1 + return _decrypt_aes_cbc(ciphertext, self._v10_key) + + elif version == b"v11": + self._cookie_counts["v11"] += 1 + if self._v11_key is None: + logger.warning("cannot decrypt v11 cookies: no key found") + return None + return _decrypt_aes_cbc(ciphertext, self._v11_key) + + else: + self._cookie_counts["other"] += 1 + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name): + password = _get_mac_keyring_password(browser_keyring_name) + self._v10_key = None if password is None else self.derive_key(password) + self._cookie_counts = {"v10": 0, "other": 0} + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads + # /main/components/os_crypt/os_crypt_mac.mm + return pbkdf2_sha1(password, salt=b"saltysalt", + iterations=1003, key_length=16) + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b"v10": + self._cookie_counts["v10"] += 1 + if self._v10_key is None: + logger.warning("cannot decrypt v10 cookies: no key found") + return None + + return _decrypt_aes_cbc(ciphertext, self._v10_key) + + else: + self._cookie_counts["other"] += 1 + # other prefixes are considered "old data", + # which were stored as plaintext + # https://chromium.googlesource.com/chromium/src/+/refs/heads + # /main/components/os_crypt/os_crypt_mac.mm + return encrypted_value + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_root): + self._v10_key = _get_windows_v10_key(browser_root) + self._cookie_counts = {"v10": 0, "other": 0} + + @property + def cookie_counts(self): + return self._cookie_counts + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b"v10": + self._cookie_counts["v10"] += 1 + if self._v10_key is None: + logger.warning("cannot decrypt v10 cookies: no key found") + return None + + # https://chromium.googlesource.com/chromium/src/+/refs/heads + # /main/components/os_crypt/os_crypt_win.cc + # kNonceLength + nonce_length = 96 // 8 + # boringssl + # EVP_AEAD_AES_GCM_TAG_LEN + authentication_tag_length = 16 + + raw_ciphertext = ciphertext + nonce = raw_ciphertext[:nonce_length] + ciphertext = raw_ciphertext[ + nonce_length:-authentication_tag_length] + authentication_tag = raw_ciphertext[-authentication_tag_length:] + + return _decrypt_aes_gcm( + ciphertext, self._v10_key, nonce, authentication_tag) + + else: + self._cookie_counts["other"] += 1 + # any other prefix means the data is DPAPI encrypted + # https://chromium.googlesource.com/chromium/src/+/refs/heads + # /main/components/os_crypt/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value).decode() + + +# -------------------------------------------------------------------- +# keyring + +def _choose_linux_keyring(): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads + /main/components/os_crypt/key_storage_util_linux.cc + SelectBackend + """ + desktop_environment = _get_linux_desktop_environment(os.environ) + logger.debug("Detected desktop environment: %s", desktop_environment) + if desktop_environment == DE_KDE: + return KEYRING_KWALLET + if desktop_environment == DE_OTHER: + return KEYRING_BASICTEXT + return KEYRING_GNOMEKEYRING + + +def _get_kwallet_network_wallet(): + """ The name of the wallet used to store network passwords. + + https://chromium.googlesource.com/chromium/src/+/refs/heads + /main/components/os_crypt/kwallet_dbus.cc + KWalletDBus::NetworkWallet + which does a dbus call to the following function: + https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html + Wallet::NetworkWallet + """ + default_wallet = "kdewallet" + try: + proc, stdout = Popen_communicate( + "dbus-send", "--session", "--print-reply=literal", + "--dest=org.kde.kwalletd5", + "/modules/kwalletd5", + "org.kde.KWallet.networkWallet" + ) + + if proc.returncode != 0: + logger.warning("failed to read NetworkWallet") + return default_wallet + else: + network_wallet = stdout.decode().strip() + logger.debug("NetworkWallet = '%s'", network_wallet) + return network_wallet + except Exception as exc: + logger.warning("exception while obtaining NetworkWallet (%s: %s)", + exc.__class__.__name__, exc) + return default_wallet + + +def _get_kwallet_password(browser_keyring_name): + logger.debug("using kwallet-query to obtain password from kwallet") + + if shutil.which("kwallet-query") is None: + logger.error( + "kwallet-query command not found. KWallet and kwallet-query " + "must be installed to read from KWallet. kwallet-query should be " + "included in the kwallet package for your distribution") + return b"" + + network_wallet = _get_kwallet_network_wallet() + + try: + proc, stdout = Popen_communicate( + "kwallet-query", + "--read-password", browser_keyring_name + " Safe Storage", + "--folder", browser_keyring_name + " Keys", + network_wallet, + ) + + if proc.returncode != 0: + logger.error("kwallet-query failed with return code {}. " + "Please consult the kwallet-query man page " + "for details".format(proc.returncode)) + return b"" + + if stdout.lower().startswith(b"failed to read"): + logger.debug("Failed to read password from kwallet. " + "Using empty string instead") + # This sometimes occurs in KDE because chrome does not check + # hasEntry and instead just tries to read the value (which + # kwallet returns "") whereas kwallet-query checks hasEntry. + # To verify this: + # dbus-monitor "interface="org.kde.KWallet"" "type=method_return" + # while starting chrome. + # This may be a bug, as the intended behaviour is to generate a + # random password and store it, but that doesn't matter here. + return b"" + else: + logger.debug("password found") + if stdout[-1:] == b"\n": + stdout = stdout[:-1] + return stdout + except Exception as exc: + logger.warning("exception running kwallet-query (%s: %s)", + exc.__class__.__name__, exc) + return b"" + + +def _get_gnome_keyring_password(browser_keyring_name): + try: + import secretstorage + except ImportError: + logger.error("secretstorage not available") + return b"" + + # Gnome keyring does not seem to organise keys in the same way as KWallet, + # using `dbus-monitor` during startup, it can be observed that chromium + # lists all keys and presumably searches for its key in the list. + # It appears that we must do the same. + # https://github.com/jaraco/keyring/issues/556 + with contextlib.closing(secretstorage.dbus_init()) as con: + col = secretstorage.get_default_collection(con) + label = browser_keyring_name + " Safe Storage" + for item in col.get_all_items(): + if item.get_label() == label: + return item.get_secret() + else: + logger.error("failed to read from keyring") + return b"" + + +def _get_linux_keyring_password(browser_keyring_name, keyring): + # Note: chrome/chromium can be run with the following flags + # to determine which keyring backend it has chosen to use + # - chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_ + # + # Chromium supports --password-store= + # so the automatic detection will not be sufficient in all cases. + + if not keyring: + keyring = _choose_linux_keyring() + logger.debug("Chosen keyring: %s", keyring) + + if keyring == KEYRING_KWALLET: + return _get_kwallet_password(browser_keyring_name) + elif keyring == KEYRING_GNOMEKEYRING: + return _get_gnome_keyring_password(browser_keyring_name) + elif keyring == KEYRING_BASICTEXT: + # when basic text is chosen, all cookies are stored as v10 + # so no keyring password is required + return None + assert False, "Unknown keyring " + keyring + + +def _get_mac_keyring_password(browser_keyring_name): + logger.debug("using find-generic-password to obtain " + "password from OSX keychain") + try: + proc, stdout = Popen_communicate( + "security", "find-generic-password", + "-w", # write password to stdout + "-a", browser_keyring_name, # match "account" + "-s", browser_keyring_name + " Safe Storage", # match "service" + ) + + if stdout[-1:] == b"\n": + stdout = stdout[:-1] + return stdout + except Exception as exc: + logger.warning("exception running find-generic-password (%s: %s)", + exc.__class__.__name__, exc) + return None + + +def _get_windows_v10_key(browser_root): + path = _find_most_recently_used_file(browser_root, "Local State") + if path is None: + logger.error("could not find local state file") + return None + logger.debug("Found local state file at '%s'", path) + with open(path, encoding="utf8") as f: + data = json.load(f) + try: + base64_key = data["os_crypt"]["encrypted_key"] + except KeyError: + logger.error("no encrypted key in Local State") + return None + encrypted_key = binascii.a2b_base64(base64_key) + prefix = b"DPAPI" + if not encrypted_key.startswith(prefix): + logger.error("invalid key") + return None + return _decrypt_windows_dpapi(encrypted_key[len(prefix):]) + + +# -------------------------------------------------------------------- +# utility + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data): + self.cursor = 0 + self._data = data + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError("invalid read of {} bytes".format(num_bytes)) + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError("reached end of input") + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError("unexpected value: {} != {} ({})".format( + value, expected_value, message)) + + def read_uint(self, big_endian=False): + data_format = ">I" if big_endian else " 0: + logger.debug("skipping {} bytes ({}): {!r}".format( + num_bytes, description, self.read_bytes(num_bytes))) + elif num_bytes < 0: + raise ParserError("invalid skip of {} bytes".format(num_bytes)) + + def skip_to(self, offset, description="unknown"): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description="unknown"): + self.skip_to(len(self._data), description) + + +class DatabaseCopy(): + + def __init__(self, path): + self.path = path + self.directory = self.database = None + + def __enter__(self): + try: + self.directory = tempfile.TemporaryDirectory(prefix="gallery-dl-") + path_copy = os.path.join(self.directory.name, "copy.sqlite") + shutil.copyfile(self.path, path_copy) + self.database = db = sqlite3.connect( + path_copy, isolation_level=None, check_same_thread=False) + return db + except BaseException: + if self.directory: + self.directory.cleanup() + raise + + def __exit__(self, exc, value, tb): + self.database.close() + self.directory.cleanup() + + +def Popen_communicate(*args): + proc = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + try: + stdout, stderr = proc.communicate() + except BaseException: # Including KeyboardInterrupt + proc.kill() + proc.wait() + raise + return proc, stdout + + +""" +https://chromium.googlesource.com/chromium/src/+/refs/heads +/main/base/nix/xdg_util.h - DesktopEnvironment +""" +DE_OTHER = "other" +DE_CINNAMON = "cinnamon" +DE_GNOME = "gnome" +DE_KDE = "kde" +DE_PANTHEON = "pantheon" +DE_UNITY = "unity" +DE_XFCE = "xfce" + + +""" +https://chromium.googlesource.com/chromium/src/+/refs/heads +/main/components/os_crypt/key_storage_util_linux.h - SelectedLinuxBackend +""" +KEYRING_KWALLET = "kwallet" +KEYRING_GNOMEKEYRING = "gnomekeyring" +KEYRING_BASICTEXT = "basictext" +SUPPORTED_KEYRINGS = {"kwallet", "gnomekeyring", "basictext"} + + +def _get_linux_desktop_environment(env): + """ + Ref: https://chromium.googlesource.com/chromium/src/+/refs/heads + /main/base/nix/xdg_util.cc - GetDesktopEnvironment + """ + xdg_current_desktop = env.get("XDG_CURRENT_DESKTOP") + desktop_session = env.get("DESKTOP_SESSION") + + if xdg_current_desktop: + xdg_current_desktop = (xdg_current_desktop.partition(":")[0] + .strip().lower()) + + if xdg_current_desktop == "unity": + if desktop_session and "gnome-fallback" in desktop_session: + return DE_GNOME + else: + return DE_UNITY + elif xdg_current_desktop == "gnome": + return DE_GNOME + elif xdg_current_desktop == "x-cinnamon": + return DE_CINNAMON + elif xdg_current_desktop == "kde": + return DE_KDE + elif xdg_current_desktop == "pantheon": + return DE_PANTHEON + elif xdg_current_desktop == "xfce": + return DE_XFCE + + if desktop_session: + if desktop_session in ("mate", "gnome"): + return DE_GNOME + if "kde" in desktop_session: + return DE_KDE + if "xfce" in desktop_session: + return DE_XFCE + + if "GNOME_DESKTOP_SESSION_ID" in env: + return DE_GNOME + if "KDE_FULL_SESSION" in env: + return DE_KDE + return DE_OTHER + + +def _mac_absolute_time_to_posix(timestamp): + return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + + timedelta(seconds=timestamp)).timestamp()) + + +def pbkdf2_sha1(password, salt, iterations, key_length): + return pbkdf2_hmac("sha1", password, salt, iterations, key_length) + + +def _decrypt_aes_cbc(ciphertext, key, initialization_vector=b" " * 16): + plaintext = aes.unpad_pkcs7( + aes.aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) + try: + return plaintext.decode() + except UnicodeDecodeError: + logger.warning("failed to decrypt cookie (AES-CBC) because UTF-8 " + "decoding failed. Possibly the key is wrong?") + return None + + +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag): + try: + plaintext = aes.aes_gcm_decrypt_and_verify_bytes( + ciphertext, key, authentication_tag, nonce) + except ValueError: + logger.warning("failed to decrypt cookie (AES-GCM) because MAC check " + "failed. Possibly the key is wrong?") + return None + + try: + return plaintext.decode() + except UnicodeDecodeError: + logger.warning("failed to decrypt cookie (AES-GCM) because UTF-8 " + "decoding failed. Possibly the key is wrong?") + return None + + +def _decrypt_windows_dpapi(ciphertext): + """ + References: + - https://docs.microsoft.com/en-us/windows + /win32/api/dpapi/nf-dpapi-cryptunprotectdata + """ + from ctypes.wintypes import DWORD + + class DATA_BLOB(ctypes.Structure): + _fields_ = [("cbData", DWORD), + ("pbData", ctypes.POINTER(ctypes.c_char))] + + buffer = ctypes.create_string_buffer(ciphertext) + blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) + blob_out = DATA_BLOB() + ret = ctypes.windll.crypt32.CryptUnprotectData( + ctypes.byref(blob_in), # pDataIn + None, # ppszDataDescr: human readable description of pDataIn + None, # pOptionalEntropy: salt? + None, # pvReserved: must be NULL + None, # pPromptStruct: information about prompts to display + 0, # dwFlags + ctypes.byref(blob_out) # pDataOut + ) + if not ret: + logger.warning("failed to decrypt with DPAPI") + return None + + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + + +def _find_most_recently_used_file(root, filename): + # if there are multiple browser profiles, take the most recently used one + paths = [] + for curr_root, dirs, files in os.walk(root): + for file in files: + if file == filename: + paths.append(os.path.join(curr_root, file)) + if not paths: + return None + return max(paths, key=lambda path: os.lstat(path).st_mtime) + + +def _is_path(value): + return os.path.sep in value + + +def _parse_browser_specification(browser, profile=None, keyring=None): + if browser not in SUPPORTED_BROWSERS: + raise ValueError("unsupported browser '{}'".format(browser)) + if keyring and keyring not in SUPPORTED_KEYRINGS: + raise ValueError("unsupported keyring '{}'".format(keyring)) + if profile and _is_path(profile): + profile = os.path.expanduser(profile) + return browser, profile, keyring diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 5675081c..e686c708 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,6 +20,7 @@ class ArtstationExtractor(Extractor): filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}" directory_fmt = ("{category}", "{userinfo[username]}") archive_fmt = "{asset[id]}" + browser = "firefox" root = "https://www.artstation.com" def __init__(self, match): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 95d335d5..c9da99f0 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -285,23 +285,29 @@ class Extractor(): cookiefile = util.expand_path(cookies) try: with open(cookiefile) as fp: - cookies = util.load_cookiestxt(fp) + util.cookiestxt_load(fp, self._cookiejar) except Exception as exc: self.log.warning("cookies: %s", exc) else: - self._update_cookies(cookies) self._cookiefile = cookiefile + elif isinstance(cookies, (list, tuple)): + from ..cookies import load_cookies + try: + load_cookies(self._cookiejar, cookies) + except Exception as exc: + self.log.warning("cookies: %s", exc) else: self.log.warning( - "expected 'dict' or 'str' value for 'cookies' option, " - "got '%s' (%s)", cookies.__class__.__name__, cookies) + "Expected 'dict', 'list', or 'str' value for 'cookies' " + "option, got '%s' (%s)", + cookies.__class__.__name__, cookies) def _store_cookies(self): """Store the session's cookiejar in a cookies.txt file""" if self._cookiefile and self.config("cookies-update", True): try: with open(self._cookiefile, "w") as fp: - util.save_cookiestxt(fp, self._cookiejar) + util.cookiestxt_store(fp, self._cookiejar) except OSError as exc: self.log.warning("cookies: %s", exc) @@ -582,18 +588,21 @@ class BaseExtractor(Extractor): def __init__(self, match): if not self.category: - for index, group in enumerate(match.groups()): - if group is not None: - if index: - self.category, self.root = self.instances[index-1] - if not self.root: - self.root = text.root_from_url(match.group(0)) - else: - self.root = group - self.category = group.partition("://")[2] - break + self._init_category(match) Extractor.__init__(self, match) + def _init_category(self, match): + for index, group in enumerate(match.groups()): + if group is not None: + if index: + self.category, self.root = self.instances[index-1] + if not self.root: + self.root = text.root_from_url(match.group(0)) + else: + self.root = group + self.category = group.partition("://")[2] + break + @classmethod def update(cls, instances): extra_instances = config._config.get(cls.basecategory + ":instances") diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 6d6e1923..1afaac86 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -10,10 +10,10 @@ from . import lolisafe from .. import text -class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): +class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor): category = "cyberdrop" root = "https://cyberdrop.me" - pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" + pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)" test = ( # images ("https://cyberdrop.me/a/keKRjm4t", { @@ -29,7 +29,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): }, }), # videos - ("https://cyberdrop.me/a/l8gIAXVD", { + ("https://cyberdrop.to/a/l8gIAXVD", { "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$", "count": 31, "keyword": { diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 710950a6..f21817e5 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,6 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message +from ..version import __version__ from .. import text import datetime @@ -22,16 +23,7 @@ class DanbooruExtractor(BaseExtractor): per_page = 200 def __init__(self, match): - BaseExtractor.__init__(self, match) - - self.ugoira = self.config("ugoira", False) - self.external = self.config("external", False) - self.extended_metadata = self.config("metadata", False) - - username, api_key = self._get_auth_info() - if username: - self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self._init_category(match) instance = INSTANCES.get(self.category) or {} iget = instance.get @@ -43,6 +35,17 @@ class DanbooruExtractor(BaseExtractor): self.request_interval_min = iget("request-interval-min", 0.0) self._pools = iget("pools") + BaseExtractor.__init__(self, match) + + self.ugoira = self.config("ugoira", False) + self.external = self.config("external", False) + self.extended_metadata = self.config("metadata", False) + + username, api_key = self._get_auth_info() + if username: + self.log.debug("Using HTTP Basic Auth for user '%s'", username) + self.session.auth = (username, api_key) + def request(self, url, **kwargs): kwargs["headers"] = self.headers return BaseExtractor.request(self, url, **kwargs) @@ -144,7 +147,8 @@ INSTANCES = { "e621": { "root": None, "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}, + "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( + __version__)}, "pools": "sort", "page-limit": 750, "per-page": 320, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index fda7220f..85ec0cf6 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -417,8 +417,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(api-da\.wixmp\.com/_api/download/file" - r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", + "pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.(jpg|png)\?token=.+", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -563,7 +563,8 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.png\?token=.+", "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f", "count": 1, }), @@ -574,7 +575,8 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.rar\?token=.+", "count": 1, }), # mixed folders and images (#659) @@ -863,8 +865,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { "options": (("comments", True),), - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "keyword": {"comments": list}, + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.jpg\?token=.+", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -878,8 +881,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # Flash animation with GIF preview (#1731) ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { - "pattern": r"https://api-da\.wixmp\.com/_api/download" - r"/file\?downloadToken=.+", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.swf\?token=.+", "keyword": { "filename": "flash_comic_tutorial_by_yuumei-d3juatd", "extension": "swf", @@ -1015,6 +1018,7 @@ class DeviantartOAuthAPI(): self.folders = extractor.config("folders", False) self.metadata = extractor.extra or extractor.config("metadata", False) + self.strategy = extractor.config("pagination") self.client_id = extractor.config("client-id") if self.client_id: @@ -1306,14 +1310,20 @@ class DeviantartOAuthAPI(): self._folders(results) yield from results - if not data["has_more"]: + if not data["has_more"] and ( + self.strategy != "manual" or not results): return + if "next_cursor" in data: params["offset"] = None params["cursor"] = data["next_cursor"] - else: + elif data["next_offset"] is not None: params["offset"] = data["next_offset"] params["cursor"] = None + else: + if params.get("offset") is None: + return + params["offset"] = int(params["offset"]) + len(results) def _pagination_list(self, endpoint, params, key="results"): result = [] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 4e11a510..713fc2a7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -120,7 +120,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "20", + "favorites": "21", "filecount": "4", "filesize": 1488978, "gid": 1200119, diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 04e5926b..093113d3 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for 4chan archives based on FoolFuuka""" +"""Extractors for FoolFuuka 4chan archives""" from .common import BaseExtractor, Message from .. import text @@ -16,6 +16,7 @@ import itertools class FoolfuukaExtractor(BaseExtractor): """Base extractor for FoolFuuka based boards/archives""" basecategory = "foolfuuka" + filename_fmt = "{timestamp_ms} {filename_media}.{extension}" archive_fmt = "{board[shortname]}_{num}_{timestamp}" external = "default" @@ -40,6 +41,9 @@ class FoolfuukaExtractor(BaseExtractor): post["filename"], _, post["extension"] = \ media["media"].rpartition(".") + post["filename_media"] = media["media_filename"].rpartition(".")[0] + post["timestamp_ms"] = text.parse_int( + media["media_orig"].rpartition(".")[0]) yield Message.Url, url, post def metadata(self): @@ -66,6 +70,7 @@ BASE_PATTERN = FoolfuukaExtractor.update({ }, "archivedmoe": { "root": "https://archived.moe", + "pattern": r"archived\.moe", }, "archiveofsins": { "root": "https://archiveofsins.com", @@ -73,12 +78,15 @@ BASE_PATTERN = FoolfuukaExtractor.update({ }, "b4k": { "root": "https://arch.b4k.co", + "pattern": r"arch\.b4k\.co", }, "desuarchive": { "root": "https://desuarchive.org", + "pattern": r"desuarchive\.org", }, "fireden": { "root": "https://boards.fireden.net", + "pattern": r"boards\.fireden\.net", }, "nyafuu": { "root": "https://archive.nyafuu.org", @@ -90,9 +98,11 @@ BASE_PATTERN = FoolfuukaExtractor.update({ }, "thebarchive": { "root": "https://thebarchive.com", + "pattern": r"thebarchive\.com", }, "wakarimasen": { "root": "https://archive.wakarimasen.moe", + "pattern": r"archive\.wakarimasen\.moe", }, }) @@ -101,7 +111,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): """Base extractor for threads on FoolFuuka based boards/archives""" subcategory = "thread" directory_fmt = ("{category}", "{board[shortname]}", - "{thread_num}{title:? - //}") + "{thread_num} {title|comment[:50]}") pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)" test = ( ("https://archive.4plebs.org/tg/thread/54059290", { diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index c09eb969..382cc259 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2021 Mike Fährmann +# Copyright 2016-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -41,6 +41,7 @@ class FoolslideExtractor(BaseExtractor): BASE_PATTERN = FoolslideExtractor.update({ "kireicake": { "root": "https://reader.kireicake.com", + "pattern": r"reader\.kireicake\.com", }, "powermanga": { "root": "https://read.powermanga.org", diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py index 541f454b..9c19664e 100644 --- a/gallery_dl/extractor/gelbooru_v01.py +++ b/gallery_dl/extractor/gelbooru_v01.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for Gelbooru v0.1 sites""" +"""Extractors for Gelbooru Beta 0.1.11 sites""" from . import booru from .. import text @@ -42,14 +42,43 @@ class GelbooruV01Extractor(booru.BooruExtractor): return post + def _pagination(self, url, begin, end): + pid = self.page_start + + while True: + page = self.request(url + str(pid)).text + + cnt = 0 + for post_id in text.extract_iter(page, begin, end): + yield self._parse_post(post_id) + cnt += 1 + + if cnt < self.per_page: + return + pid += self.per_page + BASE_PATTERN = GelbooruV01Extractor.update({ - "thecollection" : {"root": "https://the-collection.booru.org"}, - "illusioncardsbooru": {"root": "https://illusioncards.booru.org"}, - "allgirlbooru" : {"root": "https://allgirl.booru.org"}, - "drawfriends" : {"root": "https://drawfriends.booru.org"}, - "vidyart" : {"root": "https://vidyart.booru.org"}, - "theloudbooru" : {"root": "https://tlb.booru.org"}, + "thecollection": { + "root": "https://the-collection.booru.org", + "pattern": r"the-collection\.booru\.org", + }, + "illusioncardsbooru": { + "root": "https://illusioncards.booru.org", + "pattern": r"illusioncards\.booru\.org", + }, + "allgirlbooru": { + "root": "https://allgirl.booru.org", + "pattern": r"allgirl\.booru\.org", + }, + "drawfriends": { + "root": "https://drawfriends.booru.org", + "pattern": r"drawfriends\.booru\.org", + }, + "vidyart": { + "root": "https://vidyart.booru.org", + "pattern": r"vidyart\.booru\.org", + }, }) @@ -75,7 +104,6 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor): }), ("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"), ("https://vidyart.booru.org/index.php?page=post&s=list&tags=all"), - ("https://tlb.booru.org/index.php?page=post&s=list&tags=all"), ) def __init__(self, match): @@ -88,20 +116,42 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor): def posts(self): url = "{}/index.php?page=post&s=list&tags={}&pid=".format( self.root, self.tags) - pid = self.page_start + return self._pagination(url, 'class="thumb">") additional_data, pos = text.extract( @@ -349,13 +384,15 @@ class InstagramExtractor(Extractor): return data def _extract_profile_page(self, url): - data = self._extract_shared_data(url)["entry_data"] + page = self.request(url).text + data = self._extract_shared_data(page)["entry_data"] if "HttpErrorPage" in data: raise exception.NotFoundError("user") return data["ProfilePage"][0]["graphql"]["user"] def _extract_post_page(self, url): - data = self._extract_shared_data(url)["entry_data"] + page = self.request(url).text + data = self._extract_shared_data(page)["entry_data"] if "HttpErrorPage" in data: raise exception.NotFoundError("post") return data["PostPage"][0] @@ -524,7 +561,8 @@ class InstagramTagExtractor(InstagramExtractor): def posts(self): url = "{}/explore/tags/{}/".format(self.root, self.item) - page = self._extract_shared_data(url)["entry_data"]["TagPage"][0] + page = self._extract_shared_data( + self.request(url).text)["entry_data"]["TagPage"][0] if "data" in page: return self._pagination_sections(page["data"]["recent"]) @@ -718,8 +756,12 @@ class InstagramStoriesExtractor(InstagramExtractor): reel_id = "highlight:" + self.highlight_id else: url = "{}/stories/{}/".format(self.root, self.user) + with self.request(url, allow_redirects=False) as response: + if 300 <= response.status_code < 400: + return () + page = response.text try: - data = self._extract_shared_data(url)["entry_data"] + data = self._extract_shared_data(page)["entry_data"] user = data["StoriesPage"][0]["user"] except KeyError: return () diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 88d57e5c..ae4112b3 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text, util +from .. import text import json @@ -22,33 +22,30 @@ class IssuuBase(): class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): """Extractor for a single publication""" subcategory = "publication" - directory_fmt = ("{category}", "{document[userName]}", - "{document[originalPublishDate]} {document[title]}") + directory_fmt = ("{category}", "{document[username]}", + "{document[date]:%Y-%m-%d} {document[title]}") filename_fmt = "{num:>03}.{extension}" - archive_fmt = "{document[id]}_{num}" + archive_fmt = "{document[publicationId]}_{num}" pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" test = ("https://issuu.com/issuu/docs/motions-1-2019/", { "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "count" : 36, "keyword": { "document": { - "access" : "public", - "articleStories": list, - "contentRating" : dict, + "access" : "PUBLIC", + "contentRating" : { + "isAdsafe" : True, + "isExplicit": False, + "isReviewed": True, + }, "date" : "dt:2019-09-16 00:00:00", "description" : "re:Motions, the brand new publication by I", - "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", "documentName" : "motions-1-2019", - "downloadState" : "NOT_AVAILABLE", - "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "isConverting" : False, - "isQuarantined" : False, - "lang" : "en", - "language" : "English", + "downloadable" : False, "pageCount" : 36, "publicationId" : "d99ec95935f15091b040cb8060f05510", "title" : "Motions by Issuu - Issue 1", - "userName" : "issuu", + "username" : "issuu", }, "extension": "jpg", "filename" : r"re:page_\d+", @@ -58,17 +55,18 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): def metadata(self, page): data = json.loads(text.extract( - page, 'window.__INITIAL_STATE__ =', ';\n')[0]) + page, '