1#!/usr/bin/env python3
2"""
3
4See EOF for license/metadata/notes as applicable
5"""
6
7# Imports:
8from __future__ import annotations
9
10# ##-- stdlib imports
11import datetime
12import enum
13import functools as ftz
14import itertools as itz
15import logging as logmod
16import pathlib as pl
17import re
18import time
19import types
20import weakref
21from uuid import UUID, uuid1
22
23# ##-- end stdlib imports
24
25# ##-- 3rd party imports
26from jgdv import Proto, Mixin
27import bibtexparser
28import bibtexparser.model as model
29from bibtexparser.library import Library
30from bibtexparser import middlewares as ms
31from bibtexparser.middlewares.middleware import (BlockMiddleware, LibraryMiddleware)
32
33# ##-- end 3rd party imports
34
35import bibble._interface as API
36from . import _interface as MAPI
37from bibble.util.middlecore import IdenLibraryMiddleware
38
39# ##-- types
40# isort: off
41import abc
42import collections.abc
43from typing import TYPE_CHECKING, cast, assert_type, assert_never
44from typing import Generic, NewType
45# Protocols:
46from typing import Protocol, runtime_checkable
47# Typing Decorators:
48from typing import no_type_check, final, override, overload
49
50if TYPE_CHECKING:
51 from jgdv import Maybe
52 from typing import Final
53 from typing import ClassVar, Any, LiteralString
54 from typing import Never, Self, Literal
55 from typing import TypeGuard
56 from collections.abc import Iterable, Iterator, Callable, Generator
57 from collections.abc import Sequence, Mapping, MutableMapping, Hashable
58
59##--|
60
61# isort: on
62# ##-- end types
63
64##-- logging
65logging = logmod.getLogger(__name__)
66##-- end logging
67
68##--|
69
[docs]
70class DuplicateFinder(IdenLibraryMiddleware):
71
82
83
[docs]
84class DuplicateKeyHandler(IdenLibraryMiddleware):
85 """ take duplicate entries and edit their key to be unique """
86
108
[docs]
109 def _dedup_key(self, failed, library) -> None:
110 uuid = uuid1().hex
111 duplicate = failed.ignore_error_block
112 original = duplicate.key
113 duplicate.key = f"{duplicate.key}_dup_{uuid}"
114 library.add(duplicate)
115 library.remove(failed)
116 self.logger().warning("Duplicate Key found: %s -> %s", original, duplicate.key)
117
[docs]
118 def _dedup_fields(self, failed, library) -> None:
119 entry = failed.ignore_error_block
120 found = set()
121 duplicates = set()
122 for field in entry.fields:
123 if field.key not in found:
124 found.add(field.key)
125 continue
126
127 duplicates.add(field.key)
128 count = 2
129
130 while (curr:=f"{field.key}_{count}") in found:
131 duplicates.add(field.key)
132 count += 1
133 if 100 < count:
134 raise ValueError("Deduplicating fields is stuck")
135 else:
136 field.key = curr
137 found.add(curr)
138 else:
139 self.logger().warning("Duplicate Fields (%s): %s",
140 entry.key,
141 duplicates)
142 library.add(entry)
143 library.remove(failed)