1#!/usr/bin/env python3
2"""
3
4"""
5
6# Imports:
7from __future__ import annotations
8
9# ##-- stdlib imports
10import datetime
11import enum
12import functools as ftz
13import itertools as itz
14import logging as logmod
15import pathlib as pl
16import re
17import time
18import types
19import weakref
20from uuid import UUID, uuid1
21
22# ##-- end stdlib imports
23
24# ##-- 3rd party imports
25from jgdv import Mixin, Proto
26from jgdv.debugging.timing import TimeCtx
27from bibtexparser.library import Library
28from bibtexparser.splitter import Splitter
29
30# ##-- end 3rd party imports
31
32from bibble import _interface as API
33from bibble.model import MetaBlock
34from bibble.util.mixins import MiddlewareValidator_m
35from bibble.util import PairStack
36from ._util import Runner_m
37
38# ##-- types
39# isort: off
40import abc
41import collections.abc
42from typing import TYPE_CHECKING, cast, assert_type, assert_never
43from typing import Generic, NewType
44# Protocols:
45from typing import Protocol, runtime_checkable
46# Typing Decorators:
47from typing import no_type_check, final, override, overload
48
49if TYPE_CHECKING:
50 from jgdv import Maybe
51 from typing import Final
52 from typing import ClassVar, Any, LiteralString
53 from typing import Never, Self, Literal
54 from typing import TypeGuard
55 from collections.abc import Iterable, Iterator, Callable, Generator
56 from collections.abc import Sequence, Mapping, MutableMapping, Hashable
57
58 type Logger = logmod.Logger
59 type Middleware = API.Middleware_p | API.BidirectionalMiddleware_p
60##--|
61
62# isort: on
63# ##-- end types
64
65##-- logging
66logging = logmod.getLogger(__name__)
67##-- end logging
68
[docs]
69@Proto(API.Reader_p)
70@Mixin(Runner_m, MiddlewareValidator_m)
71class BibbleReader:
72 """ A Refactored bibtexparser reader
73
74 """
75 _middlewares : list[Middleware]
76 _lib_class : type[Library]
77
78 def __init__(self, stack:PairStack|list[Middleware], *, lib_base:Maybe[type]=None, logger:Maybe[Logger]=None):
79 match stack:
80 case PairStack():
81 self._middlewares = stack.read_stack()
82 case list():
83 self._middlewares = stack
84 case x:
85 raise TypeError(type(x))
86
87 self._lib_class : type = lib_base or Library
88 self._logger = logger or logging
89
90 self.exclude_middlewares(API.WriteTime_p)
91 if not issubclass(self._lib_class, Library):
92 raise TypeError("Bad library base pased to reader", lib_base)
93
[docs]
94 def read_dir(self, source:pl.Path, *, ext:str, into:Maybe[Library]=None, append:Maybe[list[Middleware]]=None) -> Maybe[Library]:
95 visited : set = set()
96 to_read : list = []
97 for args in pl.Path().walk(top_down=True, on_error=None, follow_symlinks=False):
98 dpath : pl.Path = args[0]
99 dnames : list[str] = args[0] # Edit to control descent
100 filenames : list[str] = args[2]
101 if dpath in visited:
102 dnames.clear()
103 else:
104 visited.add(dpath)
105
106 to_read += [y for x in filenames if (y:=dpath/x).suffix == ext]
107 else:
108 pass
109 ##--|
110 lib = into or self._lib_class()
111 for x in to_read:
112 match self.read(x, into=lib, append=append):
113 case None:
114 return None
115 case Library() as y:
116 lib = y
117 else:
118 return lib
119
[docs]
120 def read(self, source:str|pl.Path, *, into:Maybe[Library]=None, append:Maybe[list[Middleware]]=None) -> Maybe[Library]:
121 """ read source and make a new library.
122 if given 'into' lib, add the newly read entries into that libray as well
123 """
124 source_text : str
125 basic : Library
126 transformed : Library
127
128 match source:
129 case str():
130 source_text = source
131 case pl.Path():
132 try:
133 source_text = source.read_text()
134 except UnicodeDecodeError as err:
135 logging.exception("Unicode Error in File: %s, Start: %s", source, err.start)
136 return None
137 case x:
138 raise TypeError(type(x))
139
140 with TimeCtx(level=logmod.INFO) as timer:
141 timer.msg("--> Bibtex Reading: Start")
142 basic = self._read_into(self._lib_class(), source_text)
143
144 timer.msg("<-- Bibtex Reading took: %s", timer.total_s)
145
146 with TimeCtx(level=logmod.INFO) as timer:
147 timer.msg("--> Read Transforms: Start")
148 transformed = self._run_readwares(basic, append=append)
149
150 timer.msg("<-- Read Transforms took: %s", timer.total_s)
151
152 entry_keys : set = {x.key for x in transformed.entries}
153 match into:
154 case Library():
155 into.add(transformed.blocks)
156 final_lib = into
157 case None:
158 final_lib = transformed
159 case x:
160 raise TypeError(type(x))
161
162 return self._map_keys(final_lib, source, entry_keys)
163
[docs]
164 def _map_keys(self, final_lib:Library, source:str|pl.Path, entry_keys:set[str]) -> Library:
165 """ Map source -> keys
166
167 """
168 logging.debug("Mapping %s new keys to source %s", len(entry_keys), source)
169 match MetaBlock.find_in(final_lib), source:
170 case None, str():
171 final_lib.add(MetaBlock(sources={"raw_text"}, raw_text=entry_keys))
172 case None, pl.Path():
173 kwargs = {str(source) : entry_keys}
174 final_lib.add(MetaBlock(sources={source}, **kwargs))
175 case MetaBlock() as b, str() if 'sources' in b.data:
176 b.data['sources'].add("raw_text")
177 b.data["raw_text"] = entry_keys
178 case MetaBlock() as b, pl.Path() if 'sources' in b.data:
179 b.data['sources'].add(source)
180 b.data[str(source)] = entry_keys
181 case MetaBlock() as b, str():
182 b.data['sources'] = {"raw_text"}
183 b.data["raw_text"] = entry_keys
184 case MetaBlock() as b, pl.Path():
185 b.data['sources'] = {source}
186 b.data[source] = entry_keys
187 case x:
188 raise TypeError(type(x))
189
190 return final_lib
191
[docs]
192 def _read_into(self, lib:Library, source:str) -> Library:
193 assert(isinstance(source, str))
194 splitter = Splitter(bibstr=source)
195 library = splitter.split(library=lib)
196 return library
197