1#!/usr/bin/env python3
2"""
3
4See EOF for license/metadata/notes as applicable
5"""
6
7# Imports:
8from __future__ import annotations
9
10# ##-- stdlib imports
11import datetime
12import enum
13import functools as ftz
14import itertools as itz
15import logging as logmod
16import pathlib as pl
17import re
18import time
19import types
20import weakref
21from uuid import UUID, uuid1
22
23# ##-- end stdlib imports
24
25# ##-- 3rd party imports
26import bibtexparser
27import bibtexparser.model as model
28from bibtexparser.middlewares.middleware import (BlockMiddleware, LibraryMiddleware)
29from jgdv import Proto, Mixin
30from jgdv.files.bookmarks import BookmarkCollection
31from jgdv.files.tags import TagFile
32from waybackpy import WaybackMachineSaveAPI
33
34# ##-- end 3rd party imports
35
36# ##-- 1st party imports
37import bibble._interface as API
38from . import _interface as FAPI
39from ._firefox import FirefoxController
40from bibble.util.mixins import FieldMatcher_m, EntrySkipper_m
41from bibble.util.middlecore import IdenBlockMiddleware
42
43# ##-- end 1st party imports
44
45# ##-- types
46# isort: off
47import abc
48import collections.abc
49from typing import TYPE_CHECKING, cast, assert_type, assert_never
50from typing import Generic, NewType
51# Protocols:
52from typing import Protocol, runtime_checkable
53# Typing Decorators:
54from typing import no_type_check, final, override, overload
55
56if TYPE_CHECKING:
57 from jgdv import Maybe
58 from typing import Final
59 from typing import ClassVar, Any, LiteralString
60 from typing import Never, Self, Literal
61 from typing import TypeGuard
62 from collections.abc import Iterable, Iterator, Callable, Generator
63 from collections.abc import Sequence, Mapping, MutableMapping, Hashable
64
65 from bibtexparser.library import Library
66
67##--|
68
69# isort: on
70# ##-- end types
71
72##-- logging
73logging = logmod.getLogger(__name__)
74##-- end logging
75
76##--|
77
[docs]
78@Mixin(EntrySkipper_m)
79class OnlineDownloader(IdenBlockMiddleware):
80 """
81 if the entry is 'online', and it doesn't have a file associated with it,
82 download it as a pdf and add it to the entry
83 """
84 _whitelist = ("online", "blog")
85 _target_dir : pl.Path
86
87 def __init__(self, *, target:pl.Path, **kwargs):
88 super().__init__(**kwargs)
89 self._extra.setdefault("tqdm", True)
90 self.set_entry_skiplists(white=self._whitelist, black=[])
91 self._target_dir = target
92
99
[docs]
100 def transform_Entry(self, entry, library):
101 if self.should_skip_entry(entry, library):
102 return [entry]
103
104 match entry.get("url"), entry.get("file"):
105 case _, pl.Path()|str():
106 self._logger.info("Entry %s : Already has file", entry.key)
107 return entry
108 case None, _:
109 self._logger.warning("Entry %s : no url found", entry.key)
110 return entry
111 case model.Field(value=url), None:
112 safe_key = entry.key.replace(":","_")
113 dest = (self._target_dir / safe_key).with_suffix(".pdf")
114 FirefoxController.save_pdf(url, dest)
115 # add it to the entry
116 entry.set_field(model.Field("file", value=dest))
117
118 return [entry]