1#!/usr/bin/env python3
2"""
3
4See EOF for license/metadata/notes as applicable
5"""
6
7# Imports:
8from __future__ import annotations
9
10# ##-- stdlib imports
11import datetime
12import enum
13import functools as ftz
14import itertools as itz
15import logging as logmod
16import pathlib as pl
17import re
18import time
19import types
20import weakref
21from uuid import UUID, uuid1
22
23# ##-- end stdlib imports
24
25# ##-- 3rd party imports
26from jgdv import Proto, Mixin
27import bibtexparser
28import bibtexparser.model as model
29from bibtexparser.library import Library
30from bibtexparser import middlewares as ms
31from bibtexparser.middlewares.middleware import (BlockMiddleware,
32 LibraryMiddleware)
33
34# ##-- end 3rd party imports
35
36# ##-- 1st party imports
37from bibble import _interface as API
38from . import _interface as API_F
39from bibble.util.mixins import ErrorRaiser_m, FieldMatcher_m
40from bibble.util.middlecore import IdenBlockMiddleware
41
42# ##-- end 1st party imports
43
44# ##-- types
45# isort: off
46import abc
47import collections.abc
48from typing import TYPE_CHECKING, cast, assert_type, assert_never
49from typing import Generic, NewType
50# Protocols:
51from typing import Protocol, runtime_checkable
52# Typing Decorators:
53from typing import no_type_check, final, override, overload
54
55if TYPE_CHECKING:
56 from jgdv import Maybe
57 from typing import Final
58 from typing import ClassVar, Any, LiteralString
59 from typing import Never, Self, Literal
60 from typing import TypeGuard
61 from collections.abc import Iterable, Iterator, Callable, Generator
62 from collections.abc import Sequence, Mapping, MutableMapping, Hashable
63
64 type Entry = model.Entry
65
66##--|
67
68# isort: on
69# ##-- end types
70
71##-- logging
72logging = logmod.getLogger(__name__)
73##-- end logging
74
[docs]
75@Mixin(ErrorRaiser_m, FieldMatcher_m)
76class CleanUrls(IdenBlockMiddleware):
77 """ Strip unnecessary doi and dblp prefixes from urls """
78
79 _whitelist = (API_F.DOI_K, API_F.URL_K, API_F.EE_K)
80
81 def __init__(self, **kwargs):
82 super().__init__(**kwargs)
83 self.set_field_matchers(white=self._whitelist, black=[])
84
[docs]
85 def on_read(self):
86 Never()
87
[docs]
88 def transform_Entry(self, entry:Entry, library:Library):
89 match self.match_on_fields(entry, library):
90 case model.Entry() as x:
91 return [x]
92 case Exception() as err:
93 return [self.make_error_block(entry, err)]
94 case x:
95 raise TypeError(type(x))
96
[docs]
97 def field_h(self, field, entry):
98 fields = []
99 match field.value:
100 case str() as value if value.startswith(API_F.DOI_PREFIX) and (field.key == API_F.DOI_K or API_F.DOI_K not in entry):
101 # Remove doi prefix
102 clean = value.removeprefix(API_F.DOI_PREFIX)
103 fields.append(model.Field(API_F.DOI_K, clean))
104 case str() as value if value.startswith(API_F.BIBDB_PREFIX) and API_F.SOURCE_K not in entry:
105 # cleanup dblp
106 url = "".join([API_F.DBLP_PREFIX, value])
107 fields.append(model.Field(API_F.BIBURL_K, url))
108 fields.append(model.Field(API_F.SOURCE_K, "dblp computer science bibliography, https://dblp.org"))
109 case str() as value if field.key == API_F.EE_K and API_F.URL_K not in entry:
110 # ee -> url
111 fields.append(model.Field(API_F.URL_K, value))
112 fields.append(model.Field(API_F.EE_K, ""))
113 case _:
114 pass
115
116 return fields
117
[docs]
118@Mixin(ErrorRaiser_m, FieldMatcher_m)
119class ExpandUrls(IdenBlockMiddleware):
120 """ TODO expand shortened urls """
121
[docs]
122 def on_read(self):
123 Never()
124
[docs]
125 def transform_Entry(self, entry:Entry, library:Library):
126 match self.match_on_fields(entry, library):
127 case model.Entry() as x:
128 return [x]
129 case Exception() as err:
130 return [self.make_error_block(entry, err)]
131 case x:
132 raise TypeError(type(x))
133
[docs]
134 def field_h(self, field, entry):
135 raise NotImplementedError()