14fd0cf511
- Rewrite merge_blocklists.py to sync a single blacklist from upstream and subtract the locally-maintained whitelist - Replace whitelist contents with subtitle + webm seed - Remove blacklist_permissive, whitelist_with_subtitles, and all .prev files that are no longer needed - Rewrite README to reflect the two-file model and link to wiki
68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
"""Sync the blacklist from upstream Cleanuparr, preserving manual local
|
|
additions and stripping entries listed in the locally-maintained whitelist.
|
|
|
|
See the wiki (Sync) for the full algorithm and rationale.
|
|
"""
|
|
import urllib.request
|
|
|
|
UPSTREAM_URL = "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist"
|
|
BLACKLIST = "blacklist"
|
|
BLACKLIST_PREV = "blacklist.prev"
|
|
WHITELIST = "whitelist"
|
|
|
|
|
|
def read_lines(path):
|
|
"""Read a file into a set of non-empty stripped lines. Empty set if missing."""
|
|
try:
|
|
with open(path) as f:
|
|
return set(line.strip() for line in f if line.strip())
|
|
except FileNotFoundError:
|
|
return set()
|
|
|
|
|
|
def main():
|
|
# Fetch the current upstream blacklist
|
|
with urllib.request.urlopen(UPSTREAM_URL) as r:
|
|
upstream_new = set(
|
|
line.strip() for line in r.read().decode().splitlines() if line.strip()
|
|
)
|
|
|
|
# Previous upstream snapshot: baseline for detecting local additions.
|
|
# On first run (no snapshot on disk), use the current upstream as the
|
|
# baseline so nothing is treated as a local addition.
|
|
upstream_prev = read_lines(BLACKLIST_PREV)
|
|
if not upstream_prev:
|
|
upstream_prev = upstream_new.copy()
|
|
|
|
# Current committed blacklist (may contain manual local additions)
|
|
local = read_lines(BLACKLIST)
|
|
|
|
# Locally-maintained whitelist (exclusion source)
|
|
whitelist = read_lines(WHITELIST)
|
|
|
|
# Three-way merge: anything in local but not in the previous upstream
|
|
# snapshot is a manual local addition that must be preserved.
|
|
custom = local - upstream_prev
|
|
merged = upstream_new | custom
|
|
|
|
# Strip whitelist entries from the merged result.
|
|
result = merged - whitelist
|
|
|
|
# Reporting for the workflow log
|
|
print(f"[{BLACKLIST}] Upstream added: {sorted(upstream_new - upstream_prev)}")
|
|
print(f"[{BLACKLIST}] Upstream removed: {sorted(upstream_prev - upstream_new)}")
|
|
print(f"[{BLACKLIST}] Custom preserved: {sorted(custom)}")
|
|
print(f"[{BLACKLIST}] Whitelist stripped: {sorted(merged & whitelist)}")
|
|
|
|
# Write the merged blacklist, sorted for deterministic diffs
|
|
with open(BLACKLIST, "w") as f:
|
|
f.write("\n".join(sorted(result)) + "\n")
|
|
|
|
# Store the new upstream snapshot for the next run
|
|
with open(BLACKLIST_PREV, "w") as f:
|
|
f.write("\n".join(sorted(upstream_new)) + "\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|