Source code for bibble.files._firefox

  1#!/usr/bin/env python3
  2"""
  3
  4"""
  5# Imports:
  6from __future__ import annotations
  7
  8# ##-- stdlib imports
  9import datetime
 10import base64
 11import enum
 12import functools as ftz
 13import itertools as itz
 14import logging as logmod
 15import pathlib as pl
 16import re
 17import time
 18import types
 19import collections
 20import contextlib
 21import hashlib
 22from copy import deepcopy
 23from uuid import UUID, uuid1
 24from weakref import ref
 25import atexit # for @atexit.register
 26import faulthandler
 27# ##-- end stdlib imports
 28
 29from selenium.webdriver import Firefox, FirefoxOptions, FirefoxService
 30from selenium.webdriver.common.print_page_options import PrintOptions
 31import bibble._interface as API
 32from . import _interface as FAPI
 33# ##-- types
 34# isort: off
 35import abc
 36import collections.abc
 37from typing import TYPE_CHECKING, cast, assert_type, assert_never
 38from typing import Generic, NewType
 39# Protocols:
 40from typing import Protocol, runtime_checkable
 41# Typing Decorators:
 42from typing import no_type_check, final, override, overload
 43# from dataclasses import InitVar, dataclass, field
 44# from pydantic import BaseModel, Field, model_validator, field_validator, ValidationError
 45
 46if TYPE_CHECKING:
 47    from jgdv import Maybe
 48    from typing import Final
 49    from typing import ClassVar, Any, LiteralString
 50    from typing import Never, Self, Literal
 51    from typing import TypeGuard
 52    from collections.abc import Iterable, Iterator, Callable, Generator
 53    from collections.abc import Sequence, Mapping, MutableMapping, Hashable
 54
 55##--|
 56
 57# isort: on
 58# ##-- end types
 59
 60##-- logging
 61logging = logmod.getLogger(__name__)
 62##-- end logging
 63
 64# Vars:
 65
 66# Body:
 67
[docs] 68class FirefoxController: 69 """ A Static controller for starting and closing firefox via selenium """ 70
[docs] 71 @staticmethod 72 def setup(*, opts:list, kwargs:dict) -> None: 73 """ Setups a selenium driven, headless firefox to print to pdf 74 75 """ 76 if hasattr(FirefoxController, FAPI.FF_DRIVER): 77 logging.info("Skipping Firefox Setup") 78 return getattr(FirefoxController, FAPI.FF_DRIVER) 79 80 logging.info("Setting up headless Firefox") 81 options = FirefoxOptions() 82 for x in opts: 83 options.add_argument(x) 84 85 for x,y in kwargs.items(): 86 options.set_preference(x, y) 87 88 # options.binary_location = "/usr/bin/firefox" 89 # options.binary_location = "/snap/bin/geckodriver" 90 service = FirefoxService(executable_path=FAPI.GECKO_DRIVER) 91 driver = Firefox(options=options, service=service) 92 driver.set_page_load_timeout(FAPI.LOAD_TIMEOUT) 93 setattr(FirefoxController, FAPI.FF_DRIVER, driver) 94 return driver
95
[docs] 96 @staticmethod 97 def close() -> None: 98 if not hasattr(FirefoxController, FAPI.FF_DRIVER): 99 return 100 101 logging.info("Closing Firefox") 102 getattr(FirefoxController, FAPI.FF_DRIVER).quit()
103
[docs] 104 @staticmethod 105 def save_pdf(url, dest) -> None: 106 """ prints a url to a pdf file using selenium """ 107 if not isinstance(dest, pl.Path): 108 raise FileNotFoundError("Destination to save pdf to is not a path", dest) 109 110 if dest.suffix != ".pdf": 111 raise FileNotFoundError("Destination isn't a pdf", dest) 112 113 if dest.exists(): 114 logging.info("Destination already exists: %s", dest) 115 return 116 117 driver = FirefoxController.setup(opts=FAPI.SELENIUM_OPTS, kwargs=FAPI.SELENIUM_PREFS) 118 logging.info("Saving: %s", url) 119 print_ops = PrintOptions() 120 print_ops.page_range = "all" 121 122 driver.get(FAPI.READER_PREFIX + url) 123 time.sleep(FAPI.LOAD_TIMEOUT) 124 pdf = driver.print_page(print_options=print_ops) 125 pdf_bytes = base64.b64decode(pdf) 126 127 if not bool(pdf_bytes): 128 logging.warning("No Bytes were downloaded") 129 return 130 131 logging.info("Saving to: %s", dest) 132 with dest.open("wb") as f: 133 f.write(pdf_bytes)