|
1 | 1 | import inspect
|
2 | 2 | import threading
|
3 | 3 | import uuid
|
| 4 | +from collections import Counter |
4 | 5 | from datetime import datetime, timedelta
|
5 | 6 | from dataclasses import dataclass, field, asdict
|
6 | 7 | from pathlib import Path
|
@@ -425,11 +426,52 @@ def load(cls, id):
|
425 | 426 | path = cls.get_root() / f"{id}.yml"
|
426 | 427 | if not path.exists():
|
427 | 428 | raise DoesNotExist
|
| 429 | + |
| 430 | + def _dedupe_products(raw_data): |
| 431 | + """On some rare occasions, different products get |
| 432 | + the same identifier (ref). |
| 433 | +
|
| 434 | + This function finds them and appends "-dedupe" to it. |
| 435 | + This is not ideal but fixes the problem before it causes more |
| 436 | + trouble (such as https://github.com/spiral-project/copanier/issues/136) |
| 437 | +
|
| 438 | + This function returns True if dupes have been found. |
| 439 | + """ |
| 440 | + if ('products' not in raw_data) or len(raw_data['products']) < 1: |
| 441 | + return False |
| 442 | + |
| 443 | + products = raw_data['products'] |
| 444 | + |
| 445 | + counter = Counter([p['ref'] for p in products]) |
| 446 | + most_common = counter.most_common(1)[0] |
| 447 | + number_of_dupes = most_common[1] |
| 448 | + |
| 449 | + if number_of_dupes < 2: |
| 450 | + return False |
| 451 | + |
| 452 | + dupe_id = most_common[0] |
| 453 | + # Reconstruct the products list but change the duplicated ID. |
| 454 | + counter = 0 |
| 455 | + new_products = [] |
| 456 | + for product in products: |
| 457 | + ref = product['ref'] |
| 458 | + if ref == dupe_id: |
| 459 | + counter = counter + 1 |
| 460 | + if counter == number_of_dupes: # Only change the last occurence. |
| 461 | + product['ref'] = f'{ref}-dedupe' |
| 462 | + new_products.append(product) |
| 463 | + raw_data['products'] = new_products |
| 464 | + return True |
| 465 | + |
428 | 466 | data = yaml.safe_load(path.read_text())
|
| 467 | + dupe_found = _dedupe_products(data) |
429 | 468 | # Tolerate extra fields (but we'll lose them if instance is persisted)
|
430 | 469 | data = {k: v for k, v in data.items() if k in cls.__dataclass_fields__}
|
431 | 470 | delivery = cls(**data)
|
432 | 471 | delivery.id = id
|
| 472 | + |
| 473 | + if dupe_found: |
| 474 | + delivery.persist() |
433 | 475 | return delivery
|
434 | 476 |
|
435 | 477 | @classmethod
|
|
0 commit comments