Source code for bibble.fields.url_reader

  1#!/usr/bin/env python3
  2"""
  3
  4See EOF for license/metadata/notes as applicable
  5"""
  6
  7# Imports:
  8from __future__ import annotations
  9
 10# ##-- stdlib imports
 11import datetime
 12import enum
 13import functools as ftz
 14import itertools as itz
 15import logging as logmod
 16import pathlib as pl
 17import re
 18import time
 19import types
 20import weakref
 21from uuid import UUID, uuid1
 22
 23# ##-- end stdlib imports
 24
 25# ##-- 3rd party imports
 26from jgdv import Proto, Mixin
 27import bibtexparser
 28import bibtexparser.model as model
 29from bibtexparser.library import Library
 30from bibtexparser import middlewares as ms
 31from bibtexparser.middlewares.middleware import (BlockMiddleware,
 32                                                 LibraryMiddleware)
 33
 34# ##-- end 3rd party imports
 35
 36# ##-- 1st party imports
 37from bibble import _interface as API
 38from . import _interface as API_F
 39from bibble.util.mixins import ErrorRaiser_m, FieldMatcher_m
 40from bibble.util.middlecore import IdenBlockMiddleware
 41
 42# ##-- end 1st party imports
 43
 44# ##-- types
 45# isort: off
 46import abc
 47import collections.abc
 48from typing import TYPE_CHECKING, cast, assert_type, assert_never
 49from typing import Generic, NewType
 50# Protocols:
 51from typing import Protocol, runtime_checkable
 52# Typing Decorators:
 53from typing import no_type_check, final, override, overload
 54
 55if TYPE_CHECKING:
 56    from jgdv import Maybe
 57    from typing import Final
 58    from typing import ClassVar, Any, LiteralString
 59    from typing import Never, Self, Literal
 60    from typing import TypeGuard
 61    from collections.abc import Iterable, Iterator, Callable, Generator
 62    from collections.abc import Sequence, Mapping, MutableMapping, Hashable
 63
 64    type Entry = model.Entry
 65
 66##--|
 67
 68# isort: on
 69# ##-- end types
 70
 71##-- logging
 72logging = logmod.getLogger(__name__)
 73##-- end logging
 74
[docs] 75@Mixin(ErrorRaiser_m, FieldMatcher_m) 76class CleanUrls(IdenBlockMiddleware): 77 """ Strip unnecessary doi and dblp prefixes from urls """ 78 79 _whitelist = (API_F.DOI_K, API_F.URL_K, API_F.EE_K) 80 81 def __init__(self, **kwargs): 82 super().__init__(**kwargs) 83 self.set_field_matchers(white=self._whitelist, black=[]) 84
[docs] 85 def on_read(self): 86 Never()
87
[docs] 88 def transform_Entry(self, entry:Entry, library:Library): 89 match self.match_on_fields(entry, library): 90 case model.Entry() as x: 91 return [x] 92 case Exception() as err: 93 return [self.make_error_block(entry, err)] 94 case x: 95 raise TypeError(type(x))
96
[docs] 97 def field_h(self, field, entry): 98 fields = [] 99 match field.value: 100 case str() as value if value.startswith(API_F.DOI_PREFIX) and (field.key == API_F.DOI_K or API_F.DOI_K not in entry): 101 # Remove doi prefix 102 clean = value.removeprefix(API_F.DOI_PREFIX) 103 fields.append(model.Field(API_F.DOI_K, clean)) 104 case str() as value if value.startswith(API_F.BIBDB_PREFIX) and API_F.SOURCE_K not in entry: 105 # cleanup dblp 106 url = "".join([API_F.DBLP_PREFIX, value]) 107 fields.append(model.Field(API_F.BIBURL_K, url)) 108 fields.append(model.Field(API_F.SOURCE_K, "dblp computer science bibliography, https://dblp.org")) 109 case str() as value if field.key == API_F.EE_K and API_F.URL_K not in entry: 110 # ee -> url 111 fields.append(model.Field(API_F.URL_K, value)) 112 fields.append(model.Field(API_F.EE_K, "")) 113 case _: 114 pass 115 116 return fields
117
[docs] 118@Mixin(ErrorRaiser_m, FieldMatcher_m) 119class ExpandUrls(IdenBlockMiddleware): 120 """ TODO expand shortened urls """ 121
[docs] 122 def on_read(self): 123 Never()
124
[docs] 125 def transform_Entry(self, entry:Entry, library:Library): 126 match self.match_on_fields(entry, library): 127 case model.Entry() as x: 128 return [x] 129 case Exception() as err: 130 return [self.make_error_block(entry, err)] 131 case x: 132 raise TypeError(type(x))
133
[docs] 134 def field_h(self, field, entry): 135 raise NotImplementedError()