From 14fd0cf511cbcacedf0d1bf9d792403d50c15b3b Mon Sep 17 00:00:00 2001 From: CodeX Date: Tue, 7 Apr 2026 01:09:17 +0200 Subject: [PATCH] Simplify to whitelist/blacklist model - Rewrite merge_blocklists.py to sync a single blacklist from upstream and subtract the locally-maintained whitelist - Replace whitelist contents with subtitle + webm seed - Remove blacklist_permissive, whitelist_with_subtitles, and all .prev files that are no longer needed - Rewrite README to reflect the two-file model and link to wiki --- README.md | 132 ++++++++--- blacklist_permissive | 410 ---------------------------------- blacklist_permissive.prev | 410 ---------------------------------- scripts/merge_blocklists.py | 84 ++++--- whitelist | 5 + whitelist.prev | 3 - whitelist_with_subtitles | 7 - whitelist_with_subtitles.prev | 7 - 8 files changed, 162 insertions(+), 896 deletions(-) delete mode 100644 blacklist_permissive delete mode 100644 blacklist_permissive.prev delete mode 100644 whitelist.prev delete mode 100644 whitelist_with_subtitles delete mode 100644 whitelist_with_subtitles.prev diff --git a/README.md b/README.md index 33c6c2e..420eed0 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,120 @@ -# ARR Stack Blocklists +# arr/blocklists -Automatically synchronized blocklists for use with Cleanuparr in the ARR media stack. +Curated blacklist and whitelist for the ARR media stack. The blacklist is +synced automatically from upstream Cleanuparr and stripped of anything +listed in the locally-maintained whitelist, so consumers like qBittorrent +and Cleanuparr can point at a single raw URL per list and stay in sync. -## Files +See the wiki for full technical reference: +- [Sync](https://git.hisp.no/arr/blocklists/wiki/Sync) + -- three-way merge, whitelist exclusion, `.prev` snapshot, edge cases +- [Lists](https://git.hisp.no/arr/blocklists/wiki/Lists) + -- the two-file model, pattern semantics, maintaining the whitelist +- [Consumers](https://git.hisp.no/arr/blocklists/wiki/Consumers) + -- qBittorrent and Cleanuparr integration, raw URLs, recommended modes +- [CI and Workflow](https://git.hisp.no/arr/blocklists/wiki/CI-and-Workflow) + -- scheduled Gitea Actions job, manual dispatch, commit behaviour -| File | Description | -|------|-------------| -| `blacklist` | Standard blocklist — blocks all known malicious and unwanted file types | -| `blacklist_permissive` | Permissive blocklist — blocks genuinely malicious types with fewer false positives | -| `whitelist` | Whitelist — only files matching these patterns are allowed | -| `whitelist_with_subtitles` | Whitelist with subtitle file types included | -| `*.prev` | Internal sync reference files — do not edit manually | +## How it works + +The repository contains two data files: + +| File | Role | Source | +|---|---|---| +| `blacklist` | Extensions blocked by downloaders and file cleaners | Synced from upstream, with the whitelist subtracted | +| `whitelist` | Extensions that must never be blocked or deleted | Locally maintained | + +On every scheduled run the sync script: + +1. Fetches the current upstream blacklist from Cleanuparr. +2. Detects any manual additions made directly to `blacklist` (three-way + merge against `blacklist.prev`). +3. Subtracts every entry listed in `whitelist`. +4. Writes the result back to `blacklist` and updates `blacklist.prev`. + +The whitelist is the single source of truth for "what I want kept." Adding +an extension to `whitelist` removes it from `blacklist` on the next sync +and prevents consumers from blocking or deleting it. See +[Sync](https://git.hisp.no/arr/blocklists/wiki/Sync) for the full algorithm. + +## Prerequisites + +- A consumer that reads a remote text file of glob patterns (qBittorrent + excluded file names, Cleanuparr blacklist/whitelist sync, etc.) +- Network access from that consumer to `git.hisp.no` + +## File structure + +| Path | Purpose | +|---|---| +| `blacklist` | Merged output: upstream blacklist minus the whitelist. Consumer-facing | +| `blacklist.prev` | Snapshot of the last upstream fetch. Baseline for the three-way merge. Do not edit | +| `whitelist` | Locally-maintained allow list. Edit directly to add or remove entries | +| `scripts/merge_blocklists.py` | Sync script executed by the scheduled workflow | +| `.gitea/workflows/sync.yml` | Scheduled Gitea Actions workflow | ## Usage -Point Cleanuparr's Malware Blocker and Blacklist Sync at the raw URL of your chosen file: +Point your consumer at the raw URL of the file it should use. + +### qBittorrent + +qBittorrent has no whitelist feature, so it consumes the blacklist directly. +Set the excluded file names list (Options -> Downloads -> Excluded file +names) to: + ``` -https://git.hisp.no/arr/blocklists/raw/branch/main/blacklist_permissive +https://git.hisp.no/arr/blocklists/raw/branch/main/blacklist ``` -## Sync +Because the whitelist is already subtracted from this file, any extension +you add to `whitelist` stops being blocked by qBittorrent on the next sync. -Files are automatically synchronized from the upstream [Cleanuparr](https://github.com/Cleanuparr/Cleanuparr) repository every 6 hours via Gitea Actions. +### Cleanuparr -The sync uses a three-way merge strategy: -- Upstream additions are automatically included -- Upstream removals are automatically removed -- Your custom additions are preserved across every sync +Cleanuparr supports both blacklist and whitelist modes. Use whichever +matches your setup: -## Custom Entries +- **Blacklist mode** -- point at the same `blacklist` raw URL as qBittorrent. +- **Whitelist mode** -- point at the `whitelist` raw URL: -To add your own entries, edit the relevant file directly in Gitea. Your additions will be detected as custom entries and preserved on every subsequent sync. - -## Upstream Source - -Blocklists are sourced from: ``` -https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/ -``` \ No newline at end of file +https://git.hisp.no/arr/blocklists/raw/branch/main/whitelist +``` + +See [Consumers](https://git.hisp.no/arr/blocklists/wiki/Consumers) for +recommended mode per feature. + +## Maintaining the whitelist + +Edit `whitelist` directly in Gitea or via a local clone. One glob pattern +per line, sorted, no blank lines. Patterns are matched against the blacklist +with exact-string set subtraction: + +- `*.srt` in `whitelist` removes `*.srt` from `blacklist`. +- `*sample.srt` in `blacklist` is not affected by `*.srt` in `whitelist`. + Sample-file patterns are preserved because exact-string subtraction only + removes identical entries. + +See [Lists](https://git.hisp.no/arr/blocklists/wiki/Lists) for the full +pattern rules and examples. + +## Sync schedule + +The Gitea Actions workflow runs every 7 days at 04:00 UTC and on manual +dispatch. Each run: + +1. Executes `scripts/merge_blocklists.py`. +2. Commits `blacklist` and `blacklist.prev` if either changed. +3. Pushes the commit to `main`. + +See [CI and Workflow](https://git.hisp.no/arr/blocklists/wiki/CI-and-Workflow) +for workflow details and manual dispatch instructions. + +## Upstream source + +The blacklist is sourced from: + +``` +https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist +``` diff --git a/blacklist_permissive b/blacklist_permissive deleted file mode 100644 index dee2fe8..0000000 --- a/blacklist_permissive +++ /dev/null @@ -1,410 +0,0 @@ -*.000 -*.001 -*.002 -*.004 -*.7z -*.7z.001 -*.7z.002 -*.a00 -*.a01 -*.a02 -*.ace -*.ain -*.alz -*.ana -*.apex -*.apk -*.apz -*.ar -*.arc -*.archiver -*.arduboy -*.arh -*.ari -*.arj -*.ark -*.asice -*.ayt -*.b1 -*.b64 -*.b6z -*.ba -*.bat -*.bdoc -*.bh -*.bin -*.bmp -*.bndl -*.boo -*.bundle -*.bz -*.bz2 -*.bza -*.bzabw -*.bzip -*.bzip2 -*.c00 -*.c01 -*.c02 -*.c10 -*.car -*.cb7 -*.cba -*.cbr -*.cbt -*.cbz -*.cdz -*.cit -*.cmd -*.com -*.comppkg.hauptwerk.rar -*.comppkg_hauptwerk_rar -*.conda -*.cp9 -*.cpgz -*.cpt -*.ctx -*.ctz -*.cxarchive -*.czip -*.daf -*.dar -*.db -*.dd -*.deb -*.dgc -*.dist -*.diz -*.dl_ -*.dll -*.dmg -*.dz -*.ecar -*.ecs -*.ecsbx -*.edz -*.efw -*.egg -*.epi -*.etc -*.exe -*.f -*.f3z -*.fcx -*.fp8 -*.fzpz -*.gar -*.gca -*.gif -*.gmz -*.gz -*.gz2 -*.gza -*.gzi -*.gzip -*.ha -*.hbc -*.hbc2 -*.hbe -*.hki -*.hki1 -*.hki2 -*.hki3 -*.hpk -*.hpkg -*.htm -*.htmi -*.html -*.hyp -*.iadproj -*.ice -*.ico -*.ini -*.ipg -*.ipk -*.ish -*.iso -*.isx -*.ita -*.ize -*.j -*.jar -*.jar.pack -*.jex -*.jgz -*.jhh -*.jic -*.jpg -*.js -*.jsonlz4 -*.kextraction -*.kgb -*.ksp -*.kwgt -*.kz -*.layout -*.lbr -*.lemon -*.lha -*.lhzd -*.libzip -*.link -*.lnk -*.lpkg -*.lqr -*.lz -*.lz4 -*.lzh -*.lzm -*.lzma -*.lzo -*.lzr -*.lzx -*.mar -*.mbz -*.md -*.memo -*.mint -*.mlproj -*.mou -*.movpkg -*.mozlz4 -*.mpkg -*.msi -*.mxc -*.mzp -*.nar -*.nex -*.nfo -*.npk -*.nz -*.oar -*.odlgz -*.opk -*.osf -*.oz -*.p01 -*.p19 -*.p7z -*.pa -*.pack.gz -*.package -*.pae -*.pak -*.paq6 -*.paq7 -*.paq8 -*.paq8f -*.paq8l -*.paq8p -*.par -*.par2 -*.pax -*.pbi -*.pcv -*.pea -*.perl -*.pet -*.pf -*.php -*.pim -*.pima -*.pit -*.piz -*.pkg -*.pkg.tar.xz -*.pkg.tar.zst -*.pkz -*.pl -*.png -*.prs -*.ps1 -*.psc1 -*.psd1 -*.psm1 -*.psz -*.pup -*.puz -*.pvmp -*.pvmz -*.pwa -*.pxl -*.py -*.pyd -*.q -*.qda -*.r0 -*.r00 -*.r01 -*.r02 -*.r03 -*.r04 -*.r1 -*.r2 -*.r21 -*.r30 -*.rar -*.rb -*.readme -*.reg -*.rev -*.rk -*.rnc -*.rp9 -*.rpm -*.rss -*.run -*.rz -*.s00 -*.s01 -*.s02 -*.s09 -*.s7z -*.sar -*.sbx -*.scr -*.sdc -*.sdn -*.sdoc -*.sdocx -*.sea -*.sen -*.sfg -*.sfm -*.sfs -*.sfx -*.sh -*.shar -*.shk -*.shr -*.sifz -*.sipa -*.sit -*.sitx -*.smpf -*.snagitstamps -*.snappy -*.snb -*.snz -*.spa -*.spd -*.spl -*.spm -*.spt -*.sqf -*.sql -*.sqx -*.sqz -*.srep -*.stg -*.stkdoodlz -*.stproj -*.sy_ -*.tar.bz2 -*.tar.gz -*.tar.gz2 -*.tar.lz -*.tar.lzma -*.tar.xz -*.tar.z -*.tar.zip -*.taz -*.tbz -*.tbz2 -*.tcx -*.text -*.tg -*.tgs -*.tgz -*.thumb -*.tlz -*.tlzma -*.torrent -*.tpsr -*.trs -*.tx_ -*.txt -*.txz -*.tz -*.tzst -*.ubz -*.uc2 -*.ufdr -*.ufs.uzip -*.uha -*.url -*.uue -*.uvm -*.uzed -*.uzip -*.vbs -*.vem -*.vfs -*.vib -*.vip -*.vmcz -*.vms -*.voca -*.vpk -*.vrpackage -*.vsi -*.vwi -*.wa -*.wacz -*.waff -*.war -*.wastickers -*.wdz -*.whl -*.wick -*.wlb -*.wot -*.wsf -*.wux -*.xapk -*.xar -*.xcf.bz2 -*.xcf.gz -*.xcf.xz -*.xcfbz2 -*.xcfgz -*.xcfxz -*.xez -*.xfp -*.xip -*.xmcdz -*.xml -*.xoj -*.xopp -*.xx -*.xz -*.xzm -*.y -*.yc -*.yz1 -*.z -*.z00 -*.z01 -*.z02 -*.z03 -*.z04 -*.zabw -*.zap -*.zed -*.zfsendtotarget -*.zhelp -*.zi -*.zi_ -*.zim -*.zip -*.zipx -*.zix -*.zl -*.zoo -*.zpaq -*.zpi -*.zsplit -*.zst -*.zw -*.zwi -*.zz diff --git a/blacklist_permissive.prev b/blacklist_permissive.prev deleted file mode 100644 index dee2fe8..0000000 --- a/blacklist_permissive.prev +++ /dev/null @@ -1,410 +0,0 @@ -*.000 -*.001 -*.002 -*.004 -*.7z -*.7z.001 -*.7z.002 -*.a00 -*.a01 -*.a02 -*.ace -*.ain -*.alz -*.ana -*.apex -*.apk -*.apz -*.ar -*.arc -*.archiver -*.arduboy -*.arh -*.ari -*.arj -*.ark -*.asice -*.ayt -*.b1 -*.b64 -*.b6z -*.ba -*.bat -*.bdoc -*.bh -*.bin -*.bmp -*.bndl -*.boo -*.bundle -*.bz -*.bz2 -*.bza -*.bzabw -*.bzip -*.bzip2 -*.c00 -*.c01 -*.c02 -*.c10 -*.car -*.cb7 -*.cba -*.cbr -*.cbt -*.cbz -*.cdz -*.cit -*.cmd -*.com -*.comppkg.hauptwerk.rar -*.comppkg_hauptwerk_rar -*.conda -*.cp9 -*.cpgz -*.cpt -*.ctx -*.ctz -*.cxarchive -*.czip -*.daf -*.dar -*.db -*.dd -*.deb -*.dgc -*.dist -*.diz -*.dl_ -*.dll -*.dmg -*.dz -*.ecar -*.ecs -*.ecsbx -*.edz -*.efw -*.egg -*.epi -*.etc -*.exe -*.f -*.f3z -*.fcx -*.fp8 -*.fzpz -*.gar -*.gca -*.gif -*.gmz -*.gz -*.gz2 -*.gza -*.gzi -*.gzip -*.ha -*.hbc -*.hbc2 -*.hbe -*.hki -*.hki1 -*.hki2 -*.hki3 -*.hpk -*.hpkg -*.htm -*.htmi -*.html -*.hyp -*.iadproj -*.ice -*.ico -*.ini -*.ipg -*.ipk -*.ish -*.iso -*.isx -*.ita -*.ize -*.j -*.jar -*.jar.pack -*.jex -*.jgz -*.jhh -*.jic -*.jpg -*.js -*.jsonlz4 -*.kextraction -*.kgb -*.ksp -*.kwgt -*.kz -*.layout -*.lbr -*.lemon -*.lha -*.lhzd -*.libzip -*.link -*.lnk -*.lpkg -*.lqr -*.lz -*.lz4 -*.lzh -*.lzm -*.lzma -*.lzo -*.lzr -*.lzx -*.mar -*.mbz -*.md -*.memo -*.mint -*.mlproj -*.mou -*.movpkg -*.mozlz4 -*.mpkg -*.msi -*.mxc -*.mzp -*.nar -*.nex -*.nfo -*.npk -*.nz -*.oar -*.odlgz -*.opk -*.osf -*.oz -*.p01 -*.p19 -*.p7z -*.pa -*.pack.gz -*.package -*.pae -*.pak -*.paq6 -*.paq7 -*.paq8 -*.paq8f -*.paq8l -*.paq8p -*.par -*.par2 -*.pax -*.pbi -*.pcv -*.pea -*.perl -*.pet -*.pf -*.php -*.pim -*.pima -*.pit -*.piz -*.pkg -*.pkg.tar.xz -*.pkg.tar.zst -*.pkz -*.pl -*.png -*.prs -*.ps1 -*.psc1 -*.psd1 -*.psm1 -*.psz -*.pup -*.puz -*.pvmp -*.pvmz -*.pwa -*.pxl -*.py -*.pyd -*.q -*.qda -*.r0 -*.r00 -*.r01 -*.r02 -*.r03 -*.r04 -*.r1 -*.r2 -*.r21 -*.r30 -*.rar -*.rb -*.readme -*.reg -*.rev -*.rk -*.rnc -*.rp9 -*.rpm -*.rss -*.run -*.rz -*.s00 -*.s01 -*.s02 -*.s09 -*.s7z -*.sar -*.sbx -*.scr -*.sdc -*.sdn -*.sdoc -*.sdocx -*.sea -*.sen -*.sfg -*.sfm -*.sfs -*.sfx -*.sh -*.shar -*.shk -*.shr -*.sifz -*.sipa -*.sit -*.sitx -*.smpf -*.snagitstamps -*.snappy -*.snb -*.snz -*.spa -*.spd -*.spl -*.spm -*.spt -*.sqf -*.sql -*.sqx -*.sqz -*.srep -*.stg -*.stkdoodlz -*.stproj -*.sy_ -*.tar.bz2 -*.tar.gz -*.tar.gz2 -*.tar.lz -*.tar.lzma -*.tar.xz -*.tar.z -*.tar.zip -*.taz -*.tbz -*.tbz2 -*.tcx -*.text -*.tg -*.tgs -*.tgz -*.thumb -*.tlz -*.tlzma -*.torrent -*.tpsr -*.trs -*.tx_ -*.txt -*.txz -*.tz -*.tzst -*.ubz -*.uc2 -*.ufdr -*.ufs.uzip -*.uha -*.url -*.uue -*.uvm -*.uzed -*.uzip -*.vbs -*.vem -*.vfs -*.vib -*.vip -*.vmcz -*.vms -*.voca -*.vpk -*.vrpackage -*.vsi -*.vwi -*.wa -*.wacz -*.waff -*.war -*.wastickers -*.wdz -*.whl -*.wick -*.wlb -*.wot -*.wsf -*.wux -*.xapk -*.xar -*.xcf.bz2 -*.xcf.gz -*.xcf.xz -*.xcfbz2 -*.xcfgz -*.xcfxz -*.xez -*.xfp -*.xip -*.xmcdz -*.xml -*.xoj -*.xopp -*.xx -*.xz -*.xzm -*.y -*.yc -*.yz1 -*.z -*.z00 -*.z01 -*.z02 -*.z03 -*.z04 -*.zabw -*.zap -*.zed -*.zfsendtotarget -*.zhelp -*.zi -*.zi_ -*.zim -*.zip -*.zipx -*.zix -*.zl -*.zoo -*.zpaq -*.zpi -*.zsplit -*.zst -*.zw -*.zwi -*.zz diff --git a/scripts/merge_blocklists.py b/scripts/merge_blocklists.py index 3eb9efb..be87019 100644 --- a/scripts/merge_blocklists.py +++ b/scripts/merge_blocklists.py @@ -1,49 +1,67 @@ +"""Sync the blacklist from upstream Cleanuparr, preserving manual local +additions and stripping entries listed in the locally-maintained whitelist. + +See the wiki (Sync) for the full algorithm and rationale. +""" import urllib.request -import os -files = { - "blacklist": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist", - "blacklist_permissive": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist_permissive", - "whitelist": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/whitelist", - "whitelist_with_subtitles": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/whitelist_with_subtitles", -} +UPSTREAM_URL = "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist" +BLACKLIST = "blacklist" +BLACKLIST_PREV = "blacklist.prev" +WHITELIST = "whitelist" -def merge_blocklist(filename, url): - prev_file = f"{filename}.prev" - # Fetch new upstream - with urllib.request.urlopen(url) as r: - upstream_new = set(line.strip() for line in r.read().decode().splitlines() if line.strip()) - - # Read previous upstream (empty set if first run) +def read_lines(path): + """Read a file into a set of non-empty stripped lines. Empty set if missing.""" try: - with open(prev_file) as f: - upstream_prev = set(line.strip() for line in f if line.strip()) + with open(path) as f: + return set(line.strip() for line in f if line.strip()) except FileNotFoundError: + return set() + + +def main(): + # Fetch the current upstream blacklist + with urllib.request.urlopen(UPSTREAM_URL) as r: + upstream_new = set( + line.strip() for line in r.read().decode().splitlines() if line.strip() + ) + + # Previous upstream snapshot: baseline for detecting local additions. + # On first run (no snapshot on disk), use the current upstream as the + # baseline so nothing is treated as a local addition. + upstream_prev = read_lines(BLACKLIST_PREV) + if not upstream_prev: upstream_prev = upstream_new.copy() - # Read current local file - try: - with open(filename) as f: - local = set(line.strip() for line in f if line.strip()) - except FileNotFoundError: - local = set() + # Current committed blacklist (may contain manual local additions) + local = read_lines(BLACKLIST) - # Three-way merge + # Locally-maintained whitelist (exclusion source) + whitelist = read_lines(WHITELIST) + + # Three-way merge: anything in local but not in the previous upstream + # snapshot is a manual local addition that must be preserved. custom = local - upstream_prev - result = upstream_new | custom + merged = upstream_new | custom - print(f"[{filename}] Custom preserved: {sorted(custom)}") - print(f"[{filename}] Upstream added: {sorted(upstream_new - upstream_prev)}") - print(f"[{filename}] Upstream removed: {sorted(upstream_prev - upstream_new)}") + # Strip whitelist entries from the merged result. + result = merged - whitelist - # Write merged result sorted - with open(filename, "w") as f: + # Reporting for the workflow log + print(f"[{BLACKLIST}] Upstream added: {sorted(upstream_new - upstream_prev)}") + print(f"[{BLACKLIST}] Upstream removed: {sorted(upstream_prev - upstream_new)}") + print(f"[{BLACKLIST}] Custom preserved: {sorted(custom)}") + print(f"[{BLACKLIST}] Whitelist stripped: {sorted(merged & whitelist)}") + + # Write the merged blacklist, sorted for deterministic diffs + with open(BLACKLIST, "w") as f: f.write("\n".join(sorted(result)) + "\n") - # Store new upstream as prev for next run - with open(prev_file, "w") as f: + # Store the new upstream snapshot for the next run + with open(BLACKLIST_PREV, "w") as f: f.write("\n".join(sorted(upstream_new)) + "\n") -for filename, url in files.items(): - merge_blocklist(filename, url) \ No newline at end of file + +if __name__ == "__main__": + main() diff --git a/whitelist b/whitelist index 06de3f8..8429dbc 100644 --- a/whitelist +++ b/whitelist @@ -1,3 +1,8 @@ +*.ass *.avi *.mkv *.mp4 +*.srt +*.ssa +*.sub +*.webm diff --git a/whitelist.prev b/whitelist.prev deleted file mode 100644 index 06de3f8..0000000 --- a/whitelist.prev +++ /dev/null @@ -1,3 +0,0 @@ -*.avi -*.mkv -*.mp4 diff --git a/whitelist_with_subtitles b/whitelist_with_subtitles deleted file mode 100644 index 8d799ff..0000000 --- a/whitelist_with_subtitles +++ /dev/null @@ -1,7 +0,0 @@ -*.ass -*.avi -*.mkv -*.mp4 -*.srt -*.ssa -*.sub diff --git a/whitelist_with_subtitles.prev b/whitelist_with_subtitles.prev deleted file mode 100644 index 8d799ff..0000000 --- a/whitelist_with_subtitles.prev +++ /dev/null @@ -1,7 +0,0 @@ -*.ass -*.avi -*.mkv -*.mp4 -*.srt -*.ssa -*.sub