Source code for bibble.io.reader

  1#!/usr/bin/env python3
  2"""
  3
  4"""
  5
  6# Imports:
  7from __future__ import annotations
  8
  9# ##-- stdlib imports
 10import datetime
 11import enum
 12import functools as ftz
 13import itertools as itz
 14import logging as logmod
 15import pathlib as pl
 16import re
 17import time
 18import types
 19import weakref
 20from uuid import UUID, uuid1
 21
 22# ##-- end stdlib imports
 23
 24# ##-- 3rd party imports
 25from jgdv import Mixin, Proto
 26from jgdv.debugging.timing import TimeCtx
 27from bibtexparser.library import Library
 28from bibtexparser.splitter import Splitter
 29
 30# ##-- end 3rd party imports
 31
 32from bibble import _interface as API
 33from bibble.model import MetaBlock
 34from bibble.util.mixins import MiddlewareValidator_m
 35from bibble.util import PairStack
 36from ._util import Runner_m
 37
 38# ##-- types
 39# isort: off
 40import abc
 41import collections.abc
 42from typing import TYPE_CHECKING, cast, assert_type, assert_never
 43from typing import Generic, NewType
 44# Protocols:
 45from typing import Protocol, runtime_checkable
 46# Typing Decorators:
 47from typing import no_type_check, final, override, overload
 48
 49if TYPE_CHECKING:
 50    from jgdv import Maybe
 51    from typing import Final
 52    from typing import ClassVar, Any, LiteralString
 53    from typing import Never, Self, Literal
 54    from typing import TypeGuard
 55    from collections.abc import Iterable, Iterator, Callable, Generator
 56    from collections.abc import Sequence, Mapping, MutableMapping, Hashable
 57
 58    type Logger = logmod.Logger
 59    type Middleware = API.Middleware_p | API.BidirectionalMiddleware_p
 60##--|
 61
 62# isort: on
 63# ##-- end types
 64
 65##-- logging
 66logging = logmod.getLogger(__name__)
 67##-- end logging
 68
[docs] 69@Proto(API.Reader_p) 70@Mixin(Runner_m, MiddlewareValidator_m) 71class BibbleReader: 72 """ A Refactored bibtexparser reader 73 74 """ 75 _middlewares : list[Middleware] 76 _lib_class : type[Library] 77 78 def __init__(self, stack:PairStack|list[Middleware], *, lib_base:Maybe[type]=None, logger:Maybe[Logger]=None): 79 match stack: 80 case PairStack(): 81 self._middlewares = stack.read_stack() 82 case list(): 83 self._middlewares = stack 84 case x: 85 raise TypeError(type(x)) 86 87 self._lib_class : type = lib_base or Library 88 self._logger = logger or logging 89 90 self.exclude_middlewares(API.WriteTime_p) 91 if not issubclass(self._lib_class, Library): 92 raise TypeError("Bad library base pased to reader", lib_base) 93
[docs] 94 def read_dir(self, source:pl.Path, *, ext:str, into:Maybe[Library]=None, append:Maybe[list[Middleware]]=None) -> Maybe[Library]: 95 visited : set = set() 96 to_read : list = [] 97 for args in pl.Path().walk(top_down=True, on_error=None, follow_symlinks=False): 98 dpath : pl.Path = args[0] 99 dnames : list[str] = args[0] # Edit to control descent 100 filenames : list[str] = args[2] 101 if dpath in visited: 102 dnames.clear() 103 else: 104 visited.add(dpath) 105 106 to_read += [y for x in filenames if (y:=dpath/x).suffix == ext] 107 else: 108 pass 109 ##--| 110 lib = into or self._lib_class() 111 for x in to_read: 112 match self.read(x, into=lib, append=append): 113 case None: 114 return None 115 case Library() as y: 116 lib = y 117 else: 118 return lib
119
[docs] 120 def read(self, source:str|pl.Path, *, into:Maybe[Library]=None, append:Maybe[list[Middleware]]=None) -> Maybe[Library]: 121 """ read source and make a new library. 122 if given 'into' lib, add the newly read entries into that libray as well 123 """ 124 source_text : str 125 basic : Library 126 transformed : Library 127 128 match source: 129 case str(): 130 source_text = source 131 case pl.Path(): 132 try: 133 source_text = source.read_text() 134 except UnicodeDecodeError as err: 135 logging.exception("Unicode Error in File: %s, Start: %s", source, err.start) 136 return None 137 case x: 138 raise TypeError(type(x)) 139 140 with TimeCtx(level=logmod.INFO) as timer: 141 timer.msg("--> Bibtex Reading: Start") 142 basic = self._read_into(self._lib_class(), source_text) 143 144 timer.msg("<-- Bibtex Reading took: %s", timer.total_s) 145 146 with TimeCtx(level=logmod.INFO) as timer: 147 timer.msg("--> Read Transforms: Start") 148 transformed = self._run_readwares(basic, append=append) 149 150 timer.msg("<-- Read Transforms took: %s", timer.total_s) 151 152 entry_keys : set = {x.key for x in transformed.entries} 153 match into: 154 case Library(): 155 into.add(transformed.blocks) 156 final_lib = into 157 case None: 158 final_lib = transformed 159 case x: 160 raise TypeError(type(x)) 161 162 return self._map_keys(final_lib, source, entry_keys)
163
[docs] 164 def _map_keys(self, final_lib:Library, source:str|pl.Path, entry_keys:set[str]) -> Library: 165 """ Map source -> keys 166 167 """ 168 logging.debug("Mapping %s new keys to source %s", len(entry_keys), source) 169 match MetaBlock.find_in(final_lib), source: 170 case None, str(): 171 final_lib.add(MetaBlock(sources={"raw_text"}, raw_text=entry_keys)) 172 case None, pl.Path(): 173 kwargs = {str(source) : entry_keys} 174 final_lib.add(MetaBlock(sources={source}, **kwargs)) 175 case MetaBlock() as b, str() if 'sources' in b.data: 176 b.data['sources'].add("raw_text") 177 b.data["raw_text"] = entry_keys 178 case MetaBlock() as b, pl.Path() if 'sources' in b.data: 179 b.data['sources'].add(source) 180 b.data[str(source)] = entry_keys 181 case MetaBlock() as b, str(): 182 b.data['sources'] = {"raw_text"} 183 b.data["raw_text"] = entry_keys 184 case MetaBlock() as b, pl.Path(): 185 b.data['sources'] = {source} 186 b.data[source] = entry_keys 187 case x: 188 raise TypeError(type(x)) 189 190 return final_lib
191
[docs] 192 def _read_into(self, lib:Library, source:str) -> Library: 193 assert(isinstance(source, str)) 194 splitter = Splitter(bibstr=source) 195 library = splitter.split(library=lib) 196 return library
197