Source code for bibble.failure.duplicate_handler

  1#!/usr/bin/env python3
  2"""
  3
  4See EOF for license/metadata/notes as applicable
  5"""
  6
  7# Imports:
  8from __future__ import annotations
  9
 10# ##-- stdlib imports
 11import datetime
 12import enum
 13import functools as ftz
 14import itertools as itz
 15import logging as logmod
 16import pathlib as pl
 17import re
 18import time
 19import types
 20import weakref
 21from uuid import UUID, uuid1
 22
 23# ##-- end stdlib imports
 24
 25# ##-- 3rd party imports
 26from jgdv import Proto, Mixin
 27import bibtexparser
 28import bibtexparser.model as model
 29from bibtexparser.library import Library
 30from bibtexparser import middlewares as ms
 31from bibtexparser.middlewares.middleware import (BlockMiddleware, LibraryMiddleware)
 32
 33# ##-- end 3rd party imports
 34
 35import bibble._interface as API
 36from . import _interface as MAPI
 37from bibble.util.middlecore import IdenLibraryMiddleware
 38
 39# ##-- types
 40# isort: off
 41import abc
 42import collections.abc
 43from typing import TYPE_CHECKING, cast, assert_type, assert_never
 44from typing import Generic, NewType
 45# Protocols:
 46from typing import Protocol, runtime_checkable
 47# Typing Decorators:
 48from typing import no_type_check, final, override, overload
 49
 50if TYPE_CHECKING:
 51    from jgdv import Maybe
 52    from typing import Final
 53    from typing import ClassVar, Any, LiteralString
 54    from typing import Never, Self, Literal
 55    from typing import TypeGuard
 56    from collections.abc import Iterable, Iterator, Callable, Generator
 57    from collections.abc import Sequence, Mapping, MutableMapping, Hashable
 58
 59##--|
 60
 61# isort: on
 62# ##-- end types
 63
 64##-- logging
 65logging = logmod.getLogger(__name__)
 66##-- end logging
 67
 68##--|
 69
[docs] 70class DuplicateFinder(IdenLibraryMiddleware): 71
[docs] 72 def transform(self, library:Library) -> Library: 73 keys = set() 74 for entry in library.entries: 75 if entry.key in keys: 76 # duplicate 77 pass 78 else: 79 keys.add(entry.key) 80 else: 81 return library
82 83
[docs] 84class DuplicateKeyHandler(IdenLibraryMiddleware): 85 """ take duplicate entries and edit their key to be unique """ 86
[docs] 87 def transform(self, library:Library): 88 if not bool(library.failed_blocks): 89 return library 90 91 key_count, field_count = 0, 0 92 self.logger().info("Handling %s failed blocks", len(library.failed_blocks)) 93 for failed in library.failed_blocks: 94 match failed: 95 case model.DuplicateBlockKeyBlock(): 96 self._dedup_key(failed, library) 97 key_count += 1 98 case model.DuplicateFieldKeyBlock(): 99 self._dedup_fields(failed, library) 100 field_count += 1 101 case _: 102 self.logger().info("Skipping block: %s", failed) 103 pass 104 else: 105 self.logger().info("Adjusted %s duplicate keys ", key_count) 106 self.logger().info("Adjusted %s duplicate fields ", field_count) 107 return library
108
[docs] 109 def _dedup_key(self, failed, library) -> None: 110 uuid = uuid1().hex 111 duplicate = failed.ignore_error_block 112 original = duplicate.key 113 duplicate.key = f"{duplicate.key}_dup_{uuid}" 114 library.add(duplicate) 115 library.remove(failed) 116 self.logger().warning("Duplicate Key found: %s -> %s", original, duplicate.key)
117
[docs] 118 def _dedup_fields(self, failed, library) -> None: 119 entry = failed.ignore_error_block 120 found = set() 121 duplicates = set() 122 for field in entry.fields: 123 if field.key not in found: 124 found.add(field.key) 125 continue 126 127 duplicates.add(field.key) 128 count = 2 129 130 while (curr:=f"{field.key}_{count}") in found: 131 duplicates.add(field.key) 132 count += 1 133 if 100 < count: 134 raise ValueError("Deduplicating fields is stuck") 135 else: 136 field.key = curr 137 found.add(curr) 138 else: 139 self.logger().warning("Duplicate Fields (%s): %s", 140 entry.key, 141 duplicates) 142 library.add(entry) 143 library.remove(failed)