Source code for bibble.files.online

  1#!/usr/bin/env python3
  2"""
  3
  4See EOF for license/metadata/notes as applicable
  5"""
  6
  7# Imports:
  8from __future__ import annotations
  9
 10# ##-- stdlib imports
 11import datetime
 12import enum
 13import functools as ftz
 14import itertools as itz
 15import logging as logmod
 16import pathlib as pl
 17import re
 18import time
 19import types
 20import weakref
 21from uuid import UUID, uuid1
 22
 23# ##-- end stdlib imports
 24
 25# ##-- 3rd party imports
 26import bibtexparser
 27import bibtexparser.model as model
 28from bibtexparser.middlewares.middleware import (BlockMiddleware, LibraryMiddleware)
 29from jgdv import Proto, Mixin
 30from jgdv.files.bookmarks import BookmarkCollection
 31from jgdv.files.tags import TagFile
 32from waybackpy import WaybackMachineSaveAPI
 33
 34# ##-- end 3rd party imports
 35
 36# ##-- 1st party imports
 37import bibble._interface as API
 38from . import  _interface as FAPI
 39from ._firefox import FirefoxController
 40from bibble.util.mixins import FieldMatcher_m, EntrySkipper_m
 41from bibble.util.middlecore import IdenBlockMiddleware
 42
 43# ##-- end 1st party imports
 44
 45# ##-- types
 46# isort: off
 47import abc
 48import collections.abc
 49from typing import TYPE_CHECKING, cast, assert_type, assert_never
 50from typing import Generic, NewType
 51# Protocols:
 52from typing import Protocol, runtime_checkable
 53# Typing Decorators:
 54from typing import no_type_check, final, override, overload
 55
 56if TYPE_CHECKING:
 57    from jgdv import Maybe
 58    from typing import Final
 59    from typing import ClassVar, Any, LiteralString
 60    from typing import Never, Self, Literal
 61    from typing import TypeGuard
 62    from collections.abc import Iterable, Iterator, Callable, Generator
 63    from collections.abc import Sequence, Mapping, MutableMapping, Hashable
 64
 65    from bibtexparser.library import Library
 66
 67##--|
 68
 69# isort: on
 70# ##-- end types
 71
 72##-- logging
 73logging = logmod.getLogger(__name__)
 74##-- end logging
 75
 76##--|
 77
[docs] 78@Mixin(EntrySkipper_m) 79class OnlineDownloader(IdenBlockMiddleware): 80 """ 81 if the entry is 'online', and it doesn't have a file associated with it, 82 download it as a pdf and add it to the entry 83 """ 84 _whitelist = ("online", "blog") 85 _target_dir : pl.Path 86 87 def __init__(self, *, target:pl.Path, **kwargs): 88 super().__init__(**kwargs) 89 self._extra.setdefault("tqdm", True) 90 self.set_entry_skiplists(white=self._whitelist, black=[]) 91 self._target_dir = target 92
[docs] 93 def transform(self, library:Library) -> Library: 94 try: 95 result = super().transform(library) 96 return result 97 finally: 98 FirefoxController.close()
99
[docs] 100 def transform_Entry(self, entry, library): 101 if self.should_skip_entry(entry, library): 102 return [entry] 103 104 match entry.get("url"), entry.get("file"): 105 case _, pl.Path()|str(): 106 self._logger.info("Entry %s : Already has file", entry.key) 107 return entry 108 case None, _: 109 self._logger.warning("Entry %s : no url found", entry.key) 110 return entry 111 case model.Field(value=url), None: 112 safe_key = entry.key.replace(":","_") 113 dest = (self._target_dir / safe_key).with_suffix(".pdf") 114 FirefoxController.save_pdf(url, dest) 115 # add it to the entry 116 entry.set_field(model.Field("file", value=dest)) 117 118 return [entry]