diff --git a/.cspell.json b/.cspell.json index e54a9233..0c3c552b 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,17 +1,22 @@ { "words": [ + "aiter", + "anyio", "asyncio", - "bidi", - "commitizen", - "dtemp", + "dpath", "eifinger", "emby", "geoip", + "gstatic", "httpx", + "levelno", "liblaf", + "optim", "pycache", "pydantic", "pydocstyle", + "rcode", + "ubelt", "venv" ], "ignorePaths": ["**/*-lock.*", "**/*.lock*", "**/.cspell.json"], diff --git a/.envrc b/.envrc index 7b1d294e..e0b62934 100644 --- a/.envrc +++ b/.envrc @@ -1,2 +1,6 @@ # shellcheck disable=SC2148 -source_env_if_exists ./.venv/bin/activate +watch_file requirements{,-dev}.lock +if [[ ! -f .venv/bin/activate ]]; then + rye sync +fi +source_env .venv/bin/activate diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 432d803e..bd93197e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,24 +15,20 @@ jobs: - name: Setup Rye uses: eifinger/setup-rye@v4 - name: Install sing-box - # run: bash <(curl -fsSL https://sing-box.app/deb-install.sh) - # TODO: upstream issue: - run: |- - gh release --repo "SagerNet/sing-box" download "v1.10.0-alpha.28" --pattern "sing-box-*-linux-amd64.tar.gz" - tar --extract --file sing-box-*-linux-amd64.tar.gz --verbose - install -D --no-target-directory --verbose sing-box-*-linux-amd64/sing-box ~/.local/bin/sing-box + run: bash <(curl -fsSL https://sing-box.app/deb-install.sh) env: GH_TOKEN: ${{ github.token }} - - name: Install Python Dependencies + - name: Install Dependencies run: rye sync --no-lock - - name: Install Prettier - run: npm install --global prettier - name: Build Rule Sets - run: make --jobs + run: |- + source .venv/bin/activate + python src/build.py + npx prettier --write output/README.md - name: Upload Artifacts uses: actions/upload-artifact@v4 with: - name: rule-sets + name: sing path: output deploy: @@ -47,12 +43,12 @@ jobs: - name: Download Artifacts uses: actions/download-artifact@v4 with: - name: rule-sets - path: output + name: sing + path: sing - name: Deploy to GitHub Branch uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ github.token }} - publish_branch: rule-sets - publish_dir: output + publish_branch: sing + publish_dir: sing force_orphan: true diff --git a/.gitignore b/.gitignore index 4345f199..3693d6df 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,3 @@ wheels/ data/ output/ -rule-sets/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 9874a8d5..00000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,59 +0,0 @@ -ci: - autofix_commit_msg: "ci(pre-commit): auto fixes from pre-commit hooks" - autoupdate_commit_msg: "ci(pre-commit): update pre-commit hooks" - skip: - - cspell -repos: - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.6 - hooks: - - id: ruff - args: - - --fix - - id: ruff-format - - repo: https://github.com/commitizen-tools/commitizen - rev: v3.28.0 - hooks: - - id: commitizen - - repo: https://github.com/liblaf/pre-commit-hooks - rev: dev - hooks: - - id: prettier - exclude: (.*-lock\..*)|(.*\.lock)$ - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: check-added-large-files - - id: check-ast - - id: check-builtin-literals - - id: check-case-conflict - - id: check-docstring-first - - id: check-json - exclude: tsconfig\.json$ - - id: check-merge-conflict - - id: check-toml - - id: check-vcs-permalinks - - id: check-yaml - args: - - --unsafe - - id: debug-statements - - id: destroyed-symlinks - - id: detect-private-key - - id: end-of-file-fixer - - id: fix-byte-order-marker - - id: mixed-line-ending - - id: trailing-whitespace - - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.1 - hooks: - - id: check-github-workflows - - repo: https://github.com/sirosen/texthooks - rev: 0.6.6 - hooks: - - id: fix-ligatures - - id: fix-spaces - - id: forbid-bidi-controls - - repo: https://github.com/streetsidesoftware/cspell-cli - rev: v8.13.1 - hooks: - - id: cspell diff --git a/.ruff.toml b/.ruff.toml index 00a49470..c2bf9d7c 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,6 +1,6 @@ fix = true show-fixes = true -target-version = "py311" +target-version = "py312" [format] docstring-code-format = true @@ -16,9 +16,9 @@ ignore = [ "FIX", "INP", "ISC", - "N806", "PLR09", "PLR2004", + "RET504", "S", "T20", "TD", diff --git a/Makefile b/Makefile deleted file mode 100644 index caa91219..00000000 --- a/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -default: output - -include makefiles/*.mk diff --git a/README.md b/README.md index 17f99949..cf4ddf58 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,28 @@ # sing-box Rules -| Name | Download Link | -| ------------------------------------------ | ----------------------------------------------------------------------------------------------------- | -| [📵 RuleSet: ADs](#-ruleset-ads) | [rule-set/ads.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/ads.srs) | -| [🔒 RuleSet: Private](#-ruleset-private) | [rule-set/private.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/private.srs) | -| [🇨🇳 RuleSet: CN](#-ruleset-cn) | [rule-set/cn.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/cn.srs) | -| [🌐 RuleSet: Proxy](#-ruleset-proxy) | [rule-set/proxy.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/proxy.srs) | -| [🤖 RuleSet: AI](#-ruleset-ai) | [rule-set/ai.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/ai.srs) | -| [🍟 RuleSet: Emby](#-ruleset-emby) | [rule-set/emby.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/emby.srs) | -| [☁️ RuleSet: Download](#-ruleset-download) | [rule-set/download.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/download.srs) | -| [📺 RuleSet: Media](#-ruleset-media) | [rule-set/media.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/rule-set/media.srs) | -| [📵 GeoSite: ADs](#-ruleset-ads) | [geosite/ads.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/geosite/ads.srs) | -| [🔒 GeoSite: Private](#-ruleset-private) | [geosite/private.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/geosite/private.srs) | -| [🇨🇳 GeoSite: CN](#-ruleset-cn) | [geosite/cn.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/geosite/cn.srs) | -| [🌐 GeoSite: Proxy](#-ruleset-proxy) | [geosite/proxy.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/geosite/proxy.srs) | -| [🇨🇳 GeoIP: CN](#-ruleset-cn) | [geoip/cn.srs](https://github.com/liblaf/sing-box-rules/raw/rule-sets/geoip/cn.srs) | - -- [statistics](https://github.com/liblaf/sing-box-rules/blob/rule-sets/README.md) +| Name | Download Link | +| ------------------------------------------ | ------------------------------------------------------------------------------------------------ | +| [📵 RuleSet: ADs](#-ruleset-ads) | [rule-set/ads.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/ads.srs) | +| [🔒 RuleSet: Private](#-ruleset-private) | [rule-set/private.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/private.srs) | +| [🇨🇳 RuleSet: CN](#-ruleset-cn) | [rule-set/cn.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/cn.srs) | +| [🌐 RuleSet: Proxy](#-ruleset-proxy) | [rule-set/proxy.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/proxy.srs) | +| [🤖 RuleSet: AI](#-ruleset-ai) | [rule-set/ai.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/ai.srs) | +| [🍟 RuleSet: Emby](#-ruleset-emby) | [rule-set/emby.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/emby.srs) | +| [☁️ RuleSet: Download](#-ruleset-download) | [rule-set/download.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/download.srs) | +| [📺 RuleSet: Media](#-ruleset-media) | [rule-set/media.srs](https://github.com/liblaf/sing-box-rules/raw/sing/rule-set/media.srs) | +| [📵 GeoSite: ADs](#-ruleset-ads) | [geosite/ads.srs](https://github.com/liblaf/sing-box-rules/raw/sing/geosite/ads.srs) | +| [🔒 GeoSite: Private](#-ruleset-private) | [geosite/private.srs](https://github.com/liblaf/sing-box-rules/raw/sing/geosite/private.srs) | +| [🇨🇳 GeoSite: CN](#-ruleset-cn) | [geosite/cn.srs](https://github.com/liblaf/sing-box-rules/raw/sing/geosite/cn.srs) | +| [🌐 GeoSite: Proxy](#-ruleset-proxy) | [geosite/proxy.srs](https://github.com/liblaf/sing-box-rules/raw/sing/geosite/proxy.srs) | +| [🇨🇳 GeoIP: CN](#-ruleset-cn) | [geoip/cn.srs](https://github.com/liblaf/sing-box-rules/raw/sing/geoip/cn.srs) | + +- [statistics](https://github.com/liblaf/sing-box-rules/blob/sing/README.md) - `GeoSite: *` does not contain `IP-CIDR` rules, useful for DNS Rule. - `GeoIP: *` does not contain `DOMAIN*` rules, useful for DNS Rule. ## Optimization -[optimization results](https://github.com/liblaf/sing-box-rules/blob/rule-sets/README.md) +[optimization results](https://github.com/liblaf/sing-box-rules/blob/sing/README.md) - remove duplicate rules - merge `DOMAIN` with `DOMAIN-SUFFIX` @@ -31,7 +31,7 @@ - merge `DOMAIN-SUFFIX` with `DOMAIN-KEYWORD` - merge `IP-CIDR` -## Example [sing-box] Config +## Example [sing-box](https://sing-box.sagernet.org) Config ### DNS Rules @@ -44,34 +44,20 @@ "address": "https://cloudflare-dns.com/dns-query", "address_resolver": "dns:bootstrap" }, - { - "tag": "dns:cn", - "address": "https://dns.alidns.com/dns-query", - "address_resolver": "dns:bootstrap" - }, - { "tag": "dns:bootstrap", "address": "223.5.5.5", "detour": "DIRECT" }, { "tag": "dns:local", "address": "local" }, { "tag": "dns:reject", "address": "rcode://refused" } ], "rules": [ - { "outbound": "any", "server": "dns:bootstrap" }, + { "outbound": "any", "server": "dns:local" }, { "rule_set": "geosite:ads", "server": "dns:reject", "disable_cache": true }, { "rule_set": "geosite:private", "server": "dns:local" }, - { "clash_mode": "direct", "server": "dns:cn" }, + { "clash_mode": "direct", "server": "dns:local" }, { "clash_mode": "global", "server": "dns:proxy" }, - { - "type": "logical", - "mode": "and", - "rules": [ - { "rule_set": "geosite:proxy", "invert": true }, - { "rule_set": "geosite:cn" } - ], - "server": "dns:cn" - }, + { "rule_set": "geosite:cn", "server": "dns:local" }, { "type": "logical", "mode": "and", @@ -184,9 +170,9 @@ - [DustinWin/geosite-all.db](https://github.com/DustinWin/ruleset_geodata): `proxy` - [MetaCubeX/geosite.db](https://github.com/MetaCubeX/meta-rules-dat): `*!cn*` - exclude: + - [🇨🇳 RuleSet: CN](#-ruleset-cn) - [📵 RuleSet: ADs](#-ruleset-ads) - [🔒 RuleSet: Private](#-ruleset-private) - - [🇨🇳 RuleSet: CN](#-ruleset-cn) ### 🤖 RuleSet: AI @@ -198,12 +184,18 @@ - [DustinWin/geosite-all.db](https://github.com/DustinWin/ruleset_geodata): `ai` - [MetaCubeX/geosite.db](https://github.com/MetaCubeX/meta-rules-dat): `openai` - exclude: + - [🇨🇳 RuleSet: CN](#-ruleset-cn) - [📵 RuleSet: ADs](#-ruleset-ads) + - [🔒 RuleSet: Private](#-ruleset-private) ### 🍟 RuleSet: Emby - include: - [NotSFC/Emby.json](https://github.com/NotSFC/rulelist/blob/main/sing-box/Emby/Emby.json) +- exclude: + - [🇨🇳 RuleSet: CN](#-ruleset-cn) + - [📵 RuleSet: ADs](#-ruleset-ads) + - [🔒 RuleSet: Private](#-ruleset-private) ### ☁️ RuleSet: Download @@ -216,8 +208,9 @@ - [liblaf/download.json](https://github.com/liblaf/sing-box-rules/blob/main/custom/download.json) - [MetaCubeX/geosite.db](https://github.com/MetaCubeX/meta-rules-dat): `category-dev` | `onedrive` - exclude: - - [📵 RuleSet: ADs](#-ruleset-ads) - [🇨🇳 RuleSet: CN](#-ruleset-cn) + - [📵 RuleSet: ADs](#-ruleset-ads) + - [🔒 RuleSet: Private](#-ruleset-private) ### 📺 RuleSet: Media @@ -229,8 +222,9 @@ - [MetaCubeX/geosite-lite.db](https://github.com/MetaCubeX/meta-rules-dat): `proxymedia`, `youtube` - [MetaCubeX/geosite.db](https://github.com/MetaCubeX/meta-rules-dat): `youtube` - exclude: - - [📵 RuleSet: ADs](#-ruleset-ads) - [🇨🇳 RuleSet: CN](#-ruleset-cn) + - [📵 RuleSet: ADs](#-ruleset-ads) + - [🔒 RuleSet: Private](#-ruleset-private) ## Acknowledgement diff --git a/custom/cn.json b/custom/cn.json deleted file mode 100644 index 0a27ca3d..00000000 --- a/custom/cn.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "version": 1, - "rules": [ - { - "domain_suffix": ["nextdns.io"] - } - ] -} diff --git a/custom/download.json b/custom/download.json deleted file mode 100644 index a870ec67..00000000 --- a/custom/download.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "version": 1, - "rules": [ - { - "process_name": ["curl", "wget", "xh", "xhs"] - } - ] -} diff --git a/makefiles/data.mk b/makefiles/data.mk deleted file mode 100644 index 0c24b444..00000000 --- a/makefiles/data.mk +++ /dev/null @@ -1,39 +0,0 @@ -DATA += data/blackmatrix7/Advertising.list -DATA += data/blackmatrix7/ChinaMax.list -DATA += data/blackmatrix7/Claude.list -DATA += data/blackmatrix7/Copilot.list -DATA += data/blackmatrix7/Developer.list -DATA += data/blackmatrix7/Direct.list -DATA += data/blackmatrix7/Download.list -DATA += data/blackmatrix7/Gemini.list -DATA += data/blackmatrix7/Global.list -DATA += data/blackmatrix7/GlobalMedia.list -DATA += data/blackmatrix7/Lan.list -DATA += data/blackmatrix7/NTPService.list -DATA += data/blackmatrix7/OneDrive.list -DATA += data/blackmatrix7/OpenAI.list -DATA += data/DustinWin/geoip-all.db -DATA += data/DustinWin/geosite-all.db -DATA += data/MetaCubeX/geoip.db -DATA += data/MetaCubeX/geosite-lite.db -DATA += data/MetaCubeX/geosite.db -DATA += data/NotSFC/Emby.json - -.PHONY: data -data: $(DATA) - -data/blackmatrix7/%.list: - @ mkdir --parents --verbose "$(@D)" - wget --output-document="$@" "https://github.com/blackmatrix7/ios_rule_script/raw/master/rule/Clash/$*/$*.list" - -data/DustinWin/%.db: - @ mkdir --parents --verbose "$(@D)" - wget --output-document="$@" "https://github.com/DustinWin/ruleset_geodata/releases/download/sing-box/$*.db" - -data/MetaCubeX/%.db: - @ mkdir --parents --verbose "$(@D)" - wget --output-document="$@" "https://github.com/MetaCubeX/meta-rules-dat/releases/download/latest/$*.db" - -data/NotSFC/%.json: - @ mkdir --parents --verbose "$(@D)" - wget --output-document="$@" "https://github.com/NotSFC/rulelist/raw/main/sing-box/$*/$*.json" diff --git a/makefiles/output.mk b/makefiles/output.mk deleted file mode 100644 index 6794a665..00000000 --- a/makefiles/output.mk +++ /dev/null @@ -1,25 +0,0 @@ -SING_BOX ?= sing-box - -OUTPUTS += output/geoip/cn.srs -OUTPUTS += output/geosite/ads.srs -OUTPUTS += output/geosite/cn.srs -OUTPUTS += output/geosite/private.srs -OUTPUTS += output/geosite/proxy.srs -OUTPUTS += output/rule-set/ads.srs -OUTPUTS += output/rule-set/ai.srs -OUTPUTS += output/rule-set/cn.srs -OUTPUTS += output/rule-set/download.srs -OUTPUTS += output/rule-set/emby.srs -OUTPUTS += output/rule-set/media.srs -OUTPUTS += output/rule-set/private.srs -OUTPUTS += output/rule-set/proxy.srs - -.PHONY: output -output: output/README.md $(OUTPUTS) $(OUTPUTS:.srs=.json) - -output/README.md $(OUTPUTS:.srs=.json) &: scripts/build.py $(DATA) - python "$<" - prettier --write --ignore-path "" "output/README.md" - -output/%.srs: output/%.json - $(SING_BOX) rule-set compile "$<" --output "$@" diff --git a/pyproject.toml b/pyproject.toml index 9af2797f..400d239b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,17 +7,24 @@ authors = [ { email = "30631553+liblaf@users.noreply.github.com", name = "liblaf" }, ] dependencies = [ - "aiocache>=0.12.2", - "cachetools>=5.4.0", - "httpx[socks]>=0.27.0", - "pydantic>=2.7.3", + "anyio>=4.4.0", + "boltons>=24.0.0", + "httpx[socks]>=0.27.2", + "humanize>=4.10.0", + "lazy-loader>=0.4", + "loguru>=0.7.2", + "prettytable>=3.11.0", + "pydantic>=2.8.2", + "rich>=13.8.0", + "ubelt>=1.3.6", + "validators>=0.34.0", ] description = "Add your description here" license = { text = "MIT" } name = "sing-box-rules" readme = "README.md" requires-python = ">= 3.12" -version = "0.0.0" +version = "0.1.0" [project.scripts] "sing-box-rules" = "sbr:main" @@ -29,5 +36,5 @@ packages = ["src/sbr"] allow-direct-references = true [tool.rye] -dev-dependencies = ["icecream>=2.1.3", "marimo>=0.7.19"] +dev-dependencies = ["icecream>=2.1.3", "marimo>=0.8.9"] managed = true diff --git a/pyrightconfig.json b/pyrightconfig.json index be7eb933..3f13fc2c 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -1,8 +1,3 @@ { - "typeCheckingMode": "strict", - - "reportMissingTypeStubs": "none", - "reportUnknownArgumentType": "none", - "reportUnknownMemberType": "none", - "reportUnknownVariableType": "none" + "typeCheckingMode": "standard" } diff --git a/requirements-dev.lock b/requirements-dev.lock index d3191c9b..f958f3d5 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,18 +10,17 @@ # universal: false -e file:. -aiocache==0.12.2 - # via sing-box-rules annotated-types==0.7.0 # via pydantic anyio==4.4.0 # via httpx + # via sing-box-rules # via starlette asttokens==2.4.1 # via icecream -cachetools==5.4.0 +boltons==24.0.0 # via sing-box-rules -certifi==2024.6.2 +certifi==2024.8.30 # via httpcore # via httpx click==8.1.7 @@ -31,46 +30,62 @@ colorama==0.4.6 # via icecream docutils==0.21.2 # via marimo -executing==2.0.1 +executing==2.1.0 # via icecream h11==0.14.0 # via httpcore # via uvicorn httpcore==1.0.5 # via httpx -httpx==0.27.0 +httpx==0.27.2 + # via sing-box-rules +humanize==4.10.0 # via sing-box-rules icecream==2.1.3 -idna==3.7 +idna==3.8 # via anyio # via httpx itsdangerous==2.2.0 # via marimo jedi==0.19.1 # via marimo -marimo==0.7.19 -markdown==3.6 +lazy-loader==0.4 + # via sing-box-rules +loguru==0.7.2 + # via sing-box-rules +marimo==0.8.9 +markdown==3.7 # via marimo # via pymdown-extensions +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py packaging==24.1 + # via lazy-loader # via marimo parso==0.8.4 # via jedi +prettytable==3.11.0 + # via sing-box-rules psutil==6.0.0 # via marimo -pydantic==2.7.3 +pydantic==2.8.2 # via sing-box-rules -pydantic-core==2.18.4 +pydantic-core==2.20.1 # via pydantic pygments==2.18.0 # via icecream # via marimo + # via rich pymdown-extensions==10.9 # via marimo pyyaml==6.0.2 # via marimo # via pymdown-extensions -ruff==0.5.7 +rich==13.8.0 + # via sing-box-rules +ruff==0.6.3 # via marimo six==1.16.0 # via asttokens @@ -79,14 +94,20 @@ sniffio==1.3.1 # via httpx socksio==1.0.0 # via httpx -starlette==0.38.2 +starlette==0.38.4 # via marimo -tomlkit==0.13.0 +tomlkit==0.13.2 # via marimo typing-extensions==4.12.2 # via pydantic # via pydantic-core -uvicorn==0.30.5 +ubelt==1.3.6 + # via sing-box-rules +uvicorn==0.30.6 # via marimo +validators==0.34.0 + # via sing-box-rules +wcwidth==0.2.13 + # via prettytable websockets==12.0 # via marimo diff --git a/requirements.lock b/requirements.lock index 049fc49e..1eddfec2 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,30 +10,47 @@ # universal: false -e file:. -aiocache==0.12.2 - # via sing-box-rules annotated-types==0.7.0 # via pydantic anyio==4.4.0 # via httpx -cachetools==5.4.0 # via sing-box-rules -certifi==2024.6.2 +boltons==24.0.0 + # via sing-box-rules +certifi==2024.8.30 # via httpcore # via httpx h11==0.14.0 # via httpcore httpcore==1.0.5 # via httpx -httpx==0.27.0 +httpx==0.27.2 + # via sing-box-rules +humanize==4.10.0 # via sing-box-rules -idna==3.7 +idna==3.8 # via anyio # via httpx -pydantic==2.7.3 +lazy-loader==0.4 + # via sing-box-rules +loguru==0.7.2 + # via sing-box-rules +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +packaging==24.1 + # via lazy-loader +prettytable==3.11.0 # via sing-box-rules -pydantic-core==2.18.4 +pydantic==2.8.2 + # via sing-box-rules +pydantic-core==2.20.1 # via pydantic +pygments==2.18.0 + # via rich +rich==13.8.0 + # via sing-box-rules sniffio==1.3.1 # via anyio # via httpx @@ -42,3 +59,9 @@ socksio==1.0.0 typing-extensions==4.12.2 # via pydantic # via pydantic-core +ubelt==1.3.6 + # via sing-box-rules +validators==0.34.0 + # via sing-box-rules +wcwidth==0.2.13 + # via prettytable diff --git a/scripts/build.py b/scripts/build.py deleted file mode 100644 index a870a282..00000000 --- a/scripts/build.py +++ /dev/null @@ -1,96 +0,0 @@ -import asyncio -import dataclasses -import datetime -import functools -import pathlib -from collections.abc import Callable, Coroutine -from typing import TextIO - -from icecream import ic -from sbr import Rule, preset - - -@dataclasses.dataclass(kw_only=True) -class Config: - fn: Callable[[], Coroutine[None, None, Rule]] - geosite: bool = False - geoip: bool = False - id: str - name: str - - -RULE_SETS: list[Config] = [ - Config(id="ads", fn=preset.ads, name="📵 RuleSet: ADs", geosite=True), - Config(id="private", fn=preset.private, name="🔒 RuleSet: Private", geosite=True), - Config(id="cn", fn=preset.cn, name="🇨🇳 RuleSet: CN", geosite=True, geoip=True), - Config(id="proxy", fn=preset.proxy, name="🌐 RuleSet: Proxy", geosite=True), - Config(id="ai", fn=preset.ai, name="🤖 RuleSet: AI"), - Config(id="emby", fn=preset.emby, name="🍟 RuleSet: Emby"), - Config(id="download", fn=preset.download, name="☁️ RuleSet: Download"), - Config(id="media", fn=preset.media, name="📺 RuleSet: Media"), -] - - -async def main() -> None: - summary_filename = pathlib.Path("output/README.md") - summary_filename.parent.mkdir(parents=True, exist_ok=True) - with summary_filename.open("w") as fp: # noqa: ASYNC230 - fprint = functools.partial(print, file=fp) - fprint("# sing-box Rules") - now: datetime.datetime = datetime.datetime.now(datetime.UTC) - fprint("Updated At:", now.strftime("%Y-%m-%d %H:%M:%S")) - for cfg in RULE_SETS: - raw: Rule = await cfg.fn() - rule: Rule = raw.model_copy(deep=True) - rule.optimize() - print_summary(cfg.name, raw, rule, fp) - rule.process_name.clear() # TODO: split `PROCESS-NAME` to another file - rule.save(f"output/rule-set/{cfg.id}.json") - ic(cfg.name, rule) - if cfg.geosite: - geosite: Rule = rule.model_copy(deep=True) - geosite.ip_cidr.clear() - geosite.save(f"output/geosite/{cfg.id}.json") - if cfg.geoip: - geoip = Rule(ip_cidr=rule.ip_cidr) - geoip.save(f"output/geoip/{cfg.id}.json") - - -def print_summary(name: str, raw: Rule, rule: Rule, file: TextIO) -> None: - fprint = functools.partial(print, file=file) - fprint("##", name) - fprint("| Type | Count (Raw) | Count (Opt) |") - fprint("| ---- | ----------: | ----------: |") - if raw.domain: - fprint("| DOMAIN |", len(raw.domain), "|", len(rule.domain), "|") - if raw.domain_suffix: - fprint( - "| DOMAIN-SUFFIX |", - len(raw.domain_suffix), - "|", - len(rule.domain_suffix), - "|", - ) - if raw.domain_keyword: - fprint( - "| DOMAIN-KEYWORD |", - len(raw.domain_keyword), - "|", - len(rule.domain_keyword), - "|", - ) - if raw.domain_regex: - fprint( - "| DOMAIN-REGEX |", len(raw.domain_regex), "|", len(rule.domain_regex), "|" - ) - if raw.ip_cidr: - fprint("| IP-CIDR |", len(raw.ip_cidr), "|", len(rule.ip_cidr), "|") - if raw.process_name: - fprint( - "| PROCESS-NAME |", len(raw.process_name), "|", len(rule.process_name), "|" - ) - fprint("| TOTAL |", len(raw), "|", len(rule), "|") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/src/sbr/__init__.py b/src/sbr/__init__.py index 5049d7bf..62d86f77 100644 --- a/src/sbr/__init__.py +++ b/src/sbr/__init__.py @@ -1,5 +1,3 @@ -from sbr.geoip import GeoIP -from sbr.geosite import GeoSite -from sbr.rule import Rule, RuleSet +import lazy_loader as lazy -__all__ = ["GeoIP", "GeoSite", "Rule", "RuleSet"] +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/__init__.pyi b/src/sbr/__init__.pyi new file mode 100644 index 00000000..d41c3406 --- /dev/null +++ b/src/sbr/__init__.pyi @@ -0,0 +1,18 @@ +from . import container, logging, preset, source, utils +from .container import Rule, RuleSet +from .source import PRESETS, PresetConfig, Source, get_rule, get_source + +__all__ = [ + "container", + "logging", + "preset", + "source", + "utils", + "Rule", + "RuleSet", + "PRESETS", + "PresetConfig", + "Source", + "get_rule", + "get_source", +] diff --git a/src/sbr/container/__init__.py b/src/sbr/container/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/container/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/container/__init__.pyi b/src/sbr/container/__init__.pyi new file mode 100644 index 00000000..1e219d51 --- /dev/null +++ b/src/sbr/container/__init__.pyi @@ -0,0 +1,4 @@ +from ._rule import Rule +from ._rule_set import RuleSet + +__all__ = ["Rule", "RuleSet"] diff --git a/src/sbr/container/_rule.py b/src/sbr/container/_rule.py new file mode 100644 index 00000000..2fd5bd3b --- /dev/null +++ b/src/sbr/container/_rule.py @@ -0,0 +1,84 @@ +import operator +from collections.abc import Callable +from typing import Annotated, Any + +import pydantic +from pydantic import BaseModel, ConfigDict + +import sbr +from sbr.container import optim +from sbr.typing import StrPath + +Set = Annotated[set[str], pydantic.BeforeValidator(sbr.utils.as_set)] + + +class Rule(BaseModel): + model_config = ConfigDict(extra="forbid") + domain: Set = set() + domain_suffix: Set = set() + domain_keyword: Set = set() + domain_regex: Set = set() + ip_cidr: Set = set() + + @classmethod + def from_file(cls, path: StrPath) -> "Rule": + rule_set: sbr.RuleSet = sbr.RuleSet.from_file(path) + return Rule().union(*rule_set.rules) + + def __getitem__(self, key: str) -> set[str]: + return getattr(self, key) + + def __len__(self) -> int: + return sum(len(v) for k, v in self) + + def __or__(self, other: "Rule") -> "Rule": + return self.op(operator.or_, other) + + def __sub__(self, other: "Rule") -> "Rule": + return self.op(operator.sub, other) + + def union(self, *others: "Rule") -> "Rule": + return Rule(**{k: v.union(*(r[k] for r in others)) for k, v in self}) + + def difference(self, *others: "Rule") -> "Rule": + return Rule(**{k: v.difference(*(r[k] for r in others)) for k, v in self}) + + def geoip(self) -> "Rule": + return Rule(ip_cidr=self.ip_cidr) + + def geosite(self) -> "Rule": + return Rule( + domain=self.domain, + domain_suffix=self.domain_suffix, + domain_keyword=self.domain_keyword, + domain_regex=self.domain_regex, + ) + + def op(self, op: Callable[[Any, Any], Any], other: "Rule") -> "Rule": + return Rule(**{k: op(self[k], other[k]) for k, v in self}) + + def optimize(self) -> None: + self.domain = optim.remove_unresolvable(self.domain) + self.domain, self.domain_suffix = optim.merge_domain_with_suffix( + self.domain, self.domain_suffix + ) + self.domain_suffix = optim.merge_between_suffix(self.domain_suffix) + self.domain, self.domain_keyword = optim.merge_domain_with_keyword( + self.domain, self.domain_keyword + ) + self.domain_suffix, self.domain_keyword = optim.merge_suffix_with_keyword( + self.domain_suffix, self.domain_keyword + ) + self.ip_cidr = optim.merge_ip_cidr(self.ip_cidr) + + def save(self, path: StrPath) -> None: + sbr.RuleSet(version=2, rules=[self]).save(path) + + def summary(self) -> str: + res: str = "" + for k, v in self: + if v: + name: str = k.upper().replace("_", "-") + res += f"{name}: {len(v)}\n" + res += f"TOTAL: {len(self)}" + return res diff --git a/src/sbr/container/_rule_set.py b/src/sbr/container/_rule_set.py new file mode 100644 index 00000000..9be8e594 --- /dev/null +++ b/src/sbr/container/_rule_set.py @@ -0,0 +1,23 @@ +from pathlib import Path +from typing import Literal + +from pydantic import BaseModel + +import sbr +from sbr.typing import StrPath + + +class RuleSet(BaseModel): + version: Literal[1, 2] + rules: list[sbr.Rule] + + @classmethod + def from_file(cls, path: StrPath) -> "RuleSet": + fpath: Path = Path(path) + return RuleSet.model_validate_json(fpath.read_text()) + + def save(self, path: StrPath) -> None: + json: str = self.model_dump_json(exclude_defaults=True) + fpath: Path = Path(path) + fpath.parent.mkdir(parents=True, exist_ok=True) + fpath.write_text(json) diff --git a/src/sbr/container/optim/__init__.py b/src/sbr/container/optim/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/container/optim/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/container/optim/__init__.pyi b/src/sbr/container/optim/__init__.pyi new file mode 100644 index 00000000..c8f80553 --- /dev/null +++ b/src/sbr/container/optim/__init__.pyi @@ -0,0 +1,15 @@ +from ._merge_between_suffix import merge_between_suffix +from ._merge_domain_with_keyword import merge_domain_with_keyword +from ._merge_domain_with_suffix import merge_domain_with_suffix +from ._merge_ip_cidr import merge_ip_cidr +from ._merge_suffix_with_keyword import merge_suffix_with_keyword +from ._remove_unresolvable import remove_unresolvable + +__all__ = [ + "merge_between_suffix", + "merge_domain_with_keyword", + "merge_domain_with_suffix", + "merge_ip_cidr", + "merge_suffix_with_keyword", + "remove_unresolvable", +] diff --git a/src/sbr/container/optim/_merge_between_suffix.py b/src/sbr/container/optim/_merge_between_suffix.py new file mode 100644 index 00000000..65b665b7 --- /dev/null +++ b/src/sbr/container/optim/_merge_between_suffix.py @@ -0,0 +1,11 @@ +import sbr.container.optim._utils as u + + +def merge_between_suffix(domain_suffix: set[str]) -> set[str]: + suffix_list: list[str] = sorted(domain_suffix, key=len) + suffix: set[str] = set() + for s in suffix_list: + if u.match_domain_suffix(s, suffix): + continue + suffix.add(s) + return suffix diff --git a/src/sbr/container/optim/_merge_domain_with_keyword.py b/src/sbr/container/optim/_merge_domain_with_keyword.py new file mode 100644 index 00000000..c29e9a19 --- /dev/null +++ b/src/sbr/container/optim/_merge_domain_with_keyword.py @@ -0,0 +1,13 @@ +import sbr.container.optim._utils as u + + +def merge_domain_with_keyword( + domain: set[str], domain_keyword: set[str] +) -> tuple[set[str], set[str]]: + domain_new: set[str] = set() + keyword_new: set[str] = set(domain_keyword) + for d in domain: + if u.match_domain_keyword(d, keyword_new): + continue + domain_new.add(d) + return domain_new, keyword_new diff --git a/src/sbr/container/optim/_merge_domain_with_suffix.py b/src/sbr/container/optim/_merge_domain_with_suffix.py new file mode 100644 index 00000000..52bb40ac --- /dev/null +++ b/src/sbr/container/optim/_merge_domain_with_suffix.py @@ -0,0 +1,17 @@ +import sbr.container.optim._utils as u + + +def merge_domain_with_suffix( + domain: set[str], domain_suffix: set[str] +) -> tuple[set[str], set[str]]: + domain_new: set[str] = set() + suffix_new: set[str] = domain_suffix.copy() + for d in domain: + if "." + d in suffix_new: + suffix_new.remove("." + d) + suffix_new.add(d) + continue + if u.match_domain_suffix(d, suffix_new): + continue + domain_new.add(d) + return domain_new, suffix_new diff --git a/src/sbr/container/optim/_merge_ip_cidr.py b/src/sbr/container/optim/_merge_ip_cidr.py new file mode 100644 index 00000000..186ce9a8 --- /dev/null +++ b/src/sbr/container/optim/_merge_ip_cidr.py @@ -0,0 +1,22 @@ +import ipaddress +import itertools +from ipaddress import IPv4Network, IPv6Network + + +def merge_ip_cidr(ip_cidr: set[str]) -> set[str]: + ipv4_networks: list[IPv4Network] = [] + ipv6_networks: list[IPv6Network] = [] + for cidr in ip_cidr: + network: IPv4Network | IPv6Network = ipaddress.ip_network(cidr) + if network.version == 4: + ipv4_networks.append(network) + elif network.version == 6: + ipv6_networks.append(network) + result: set[str] = { + str(network) + for network in itertools.chain( + ipaddress.collapse_addresses(ipv4_networks), + ipaddress.collapse_addresses(ipv6_networks), + ) + } + return result diff --git a/src/sbr/container/optim/_merge_suffix_with_keyword.py b/src/sbr/container/optim/_merge_suffix_with_keyword.py new file mode 100644 index 00000000..213d65ec --- /dev/null +++ b/src/sbr/container/optim/_merge_suffix_with_keyword.py @@ -0,0 +1,13 @@ +import sbr.container.optim._utils as u + + +def merge_suffix_with_keyword( + domain_suffix: set[str], domain_keyword: set[str] +) -> tuple[set[str], set[str]]: + suffix_new: set[str] = set() + keyword_new: set[str] = domain_keyword.copy() + for s in domain_suffix: + if u.match_domain_keyword(s, keyword_new): + continue + suffix_new.add(s) + return suffix_new, keyword_new diff --git a/src/sbr/container/optim/_remove_unresolvable.py b/src/sbr/container/optim/_remove_unresolvable.py new file mode 100644 index 00000000..d9a5f6a3 --- /dev/null +++ b/src/sbr/container/optim/_remove_unresolvable.py @@ -0,0 +1,5 @@ +import validators + + +def remove_unresolvable(domain: set[str]) -> set[str]: + return {d for d in domain if validators.domain(d)} diff --git a/src/sbr/container/optim/_utils.py b/src/sbr/container/optim/_utils.py new file mode 100644 index 00000000..b67eb2d7 --- /dev/null +++ b/src/sbr/container/optim/_utils.py @@ -0,0 +1,20 @@ +import itertools +import re + + +def split_domain(domain: str) -> list[str]: + return re.split(r"(\.)", domain) + + +def domain_suffixes(domain: str) -> list[str]: + labels: list[str] = split_domain(domain[::-1]) + suffixes: list[str] = [suffix[::-1] for suffix in itertools.accumulate(labels)] + return suffixes + + +def match_domain_suffix(domain: str, suffix: set[str]) -> bool: + return any((s in suffix) for s in domain_suffixes(domain)) + + +def match_domain_keyword(domain: str, keyword: set[str]) -> bool: + return any((k in domain) for k in keyword) diff --git a/src/sbr/geoip.py b/src/sbr/geoip.py deleted file mode 100644 index ef4dad82..00000000 --- a/src/sbr/geoip.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -import asyncio.subprocess as asp -import functools -import pathlib -import shlex -import subprocess as sp -import tempfile - -import cachetools - -from sbr import utils -from sbr.rule import Rule -from sbr.typing import StrPath - - -class GeoIP: - file: pathlib.Path - _export_cache: cachetools.Cache[str, Rule] - _dtemp: tempfile.TemporaryDirectory[str] - - @classmethod - async def from_url(cls, url: StrPath) -> "GeoIP": - geoip: GeoIP = cls() - geoip.file = await utils.download_file(url, geoip.dtemp) - return geoip - - def __init__(self) -> None: - self._export_cache = cachetools.LRUCache(128) - self._dtemp = tempfile.TemporaryDirectory() - - def __repr__(self) -> str: - return self.summary - - @functools.cached_property - def args(self) -> list[str]: - return ["sing-box", "geoip", "--file", str(self.file)] - - @functools.cached_property - def countries(self) -> list[str]: - proc: sp.CompletedProcess[str] = sp.run( - [*self.args, "list"], - stdin=sp.DEVNULL, - stdout=sp.PIPE, - text=True, - check=True, - ) - countries: list[str] = proc.stdout.splitlines() - return countries - - @functools.cached_property - def dtemp(self) -> pathlib.Path: - return pathlib.Path(self._dtemp.name) - - async def export(self, country: str) -> Rule: - if country not in self._export_cache: - output: pathlib.Path = self.dtemp / f"geoip-{country}.json" - args: list[str] = [*self.args, "export", country, "--output", str(output)] - proc: asp.Process = await asyncio.create_subprocess_exec( - *args, stdin=asp.DEVNULL - ) - retcode: int = await proc.wait() - if retcode != 0: - raise sp.CalledProcessError(retcode, shlex.join(args)) - self._export_cache[country] = await Rule.from_json_url(output) - return self._export_cache[country] - - @functools.cached_property - def summary(self) -> str: - return "\n".join(self.countries) diff --git a/src/sbr/geosite.py b/src/sbr/geosite.py deleted file mode 100644 index 72cb494d..00000000 --- a/src/sbr/geosite.py +++ /dev/null @@ -1,83 +0,0 @@ -import asyncio -import asyncio.subprocess as asp -import functools -import pathlib -import re -import shlex -import subprocess as sp -import tempfile - -import cachetools - -from sbr import utils -from sbr.rule import Rule -from sbr.typing import StrPath - - -class GeoSite: - file: pathlib.Path - _dtemp: tempfile.TemporaryDirectory[str] - _export_cache: cachetools.Cache[str, Rule] - - @classmethod - async def from_url(cls, url: StrPath) -> "GeoSite": - geosite: GeoSite = cls() - geosite.file = await utils.download_file(url, geosite.dtemp) - return geosite - - def __init__(self) -> None: - self._export_cache = cachetools.LRUCache(128) - self._dtemp = tempfile.TemporaryDirectory() - - def __repr__(self) -> str: - return self.summary - - @functools.cached_property - def args(self) -> list[str]: - return ["sing-box", "geosite", "--file", str(self.file)] - - @functools.cached_property - def categories(self) -> list[str]: - return list(self._categories.keys()) - - @functools.cached_property - def dtemp(self) -> pathlib.Path: - return pathlib.Path(self._dtemp.name) - - async def export(self, category: str) -> Rule: - if category not in self._export_cache: - output: pathlib.Path = self.dtemp / f"geosite-{category}.json" - args: list[str] = [*self.args, "export", category, "--output", str(output)] - proc: asp.Process = await asyncio.create_subprocess_exec( - *args, stdin=asp.DEVNULL - ) - retcode: int = await proc.wait() - if retcode != 0: - raise sp.CalledProcessError(retcode, shlex.join(args)) - self._export_cache[category] = await Rule.from_json_url(output) - return self._export_cache[category] - - @functools.cached_property - def summary(self) -> str: - res: list[str] = [] - for category, size in self._categories.items(): - res.append(f"{category} ({size})") - return "\n".join(res) - - @functools.cached_property - def _categories(self) -> dict[str, int]: - proc: sp.CompletedProcess[str] = sp.run( - [*self.args, "list"], - stdin=sp.DEVNULL, - stdout=sp.PIPE, - text=True, - check=True, - ) - categories: dict[str, int] = {} - for line in proc.stdout.splitlines(): - match: re.Match[str] | None = re.fullmatch( - r"(?P.+) \((?P\d+)\)", line - ) - assert match - categories[match["name"]] = int(match["size"]) - return categories diff --git a/src/sbr/logging/__init__.py b/src/sbr/logging/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/logging/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/logging/__init__.pyi b/src/sbr/logging/__init__.pyi new file mode 100644 index 00000000..5d967774 --- /dev/null +++ b/src/sbr/logging/__init__.pyi @@ -0,0 +1,3 @@ +from ._init import init + +__all__ = ["init"] diff --git a/src/sbr/logging/_init.py b/src/sbr/logging/_init.py new file mode 100644 index 00000000..3dc8644d --- /dev/null +++ b/src/sbr/logging/_init.py @@ -0,0 +1,37 @@ +import inspect +import logging +import sys + +import rich.traceback +from loguru import logger + + +class InterceptHandler(logging.Handler): + def emit(self, record: logging.LogRecord) -> None: + # Get corresponding Loguru level if it exists. + level: str | int + try: + level = logger.level(record.levelname).name + except ValueError: + level = record.levelno + + # Find caller from where originated the logged message. + frame, depth = inspect.currentframe(), 0 + while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__): + frame = frame.f_back + depth += 1 + + logger.opt(depth=depth, exception=record.exc_info).log( + level, record.getMessage() + ) + + +def init(level: str | int = logging.NOTSET) -> None: + logger.remove() + logger.add( + sys.stderr, + level=level, + filter={"httpcore": logging.INFO, "httpx": logging.INFO}, + ) + logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True) + rich.traceback.install(show_locals=True) diff --git a/src/sbr/optim.py b/src/sbr/optim.py deleted file mode 100644 index c843c767..00000000 --- a/src/sbr/optim.py +++ /dev/null @@ -1,95 +0,0 @@ -import ipaddress -import itertools -import re -from collections.abc import Iterable - - -def _split_domain(domain: str) -> list[str]: - return re.split(r"(\.)", domain) - - -def _domain_suffixes(domain: str) -> list[str]: - labels: list[str] = _split_domain(domain[::-1]) - suffixes: list[str] = [suffix[::-1] for suffix in itertools.accumulate(labels)] - return suffixes - - -def _match_domain_suffix(domain: str, suffix: set[str]) -> bool: - return any((s in suffix) for s in _domain_suffixes(domain)) - - -def _match_domain_keyword(domain: str, keyword: set[str]) -> bool: - return any((k in domain) for k in keyword) - - -def merge_domain_with_suffix( - domain: Iterable[str], domain_suffix: Iterable[str] -) -> tuple[set[str], set[str]]: - domain_result: set[str] = set() - suffix: set[str] = set(domain_suffix) - for d in domain: - if "." + d in suffix: - suffix.remove("." + d) - suffix.add(d) - continue - if _match_domain_suffix(d, suffix): - continue - domain_result.add(d) - return domain_result, suffix - - -def merge_between_domain_suffix(domain_suffix: Iterable[str]) -> set[str]: - domain_suffix = sorted(domain_suffix, key=len) - suffix: set[str] = set() - for s in domain_suffix: - if _match_domain_suffix(s, suffix): - continue - suffix.add(s) - return suffix - - -def merge_domain_with_keyword( - domain: Iterable[str], domain_keyword: Iterable[str] -) -> tuple[set[str], set[str]]: - domain_result: set[str] = set() - keyword: set[str] = set(domain_keyword) - for d in domain: - if _match_domain_keyword(d, keyword): - continue - domain_result.add(d) - return domain_result, keyword - - -def merge_domain_suffix_with_keyword( - domain_suffix: Iterable[str], domain_keyword: Iterable[str] -) -> tuple[set[str], set[str]]: - suffix: set[str] = set() - keyword: set[str] = set(domain_keyword) - for d in domain_suffix: - if _match_domain_keyword(d, keyword): - continue - suffix.add(d) - return suffix, keyword - - -def merge_ip_cidr(ip_cidr: Iterable[str]) -> list[str]: - ipv4_networks: list[ipaddress.IPv4Network] = [] - ipv6_networks: list[ipaddress.IPv6Network] = [] - for cidr in ip_cidr: - network: ipaddress.IPv4Network | ipaddress.IPv6Network = ipaddress.ip_network( - cidr - ) - if network.version == 4: - ipv4_networks.append(network) - elif network.version == 6: - ipv6_networks.append(network) - ipv4_networks = list(ipaddress.collapse_addresses(ipv4_networks)) - ipv6_networks = list(ipaddress.collapse_addresses(ipv6_networks)) - result: list[str] = [ - str(network) - for network in itertools.chain( - ipaddress.collapse_addresses(ipv4_networks), - ipaddress.collapse_addresses(ipv6_networks), - ) - ] - return result diff --git a/src/sbr/preset/__init__.py b/src/sbr/preset/__init__.py deleted file mode 100644 index 9d72c92c..00000000 --- a/src/sbr/preset/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from sbr.preset._ads import ads -from sbr.preset._ai import ai -from sbr.preset._cn import cn -from sbr.preset._download import download -from sbr.preset._emby import emby -from sbr.preset._media import media -from sbr.preset._private import private -from sbr.preset._proxy import proxy - -__all__ = ["ads", "ai", "cn", "download", "emby", "media", "private", "proxy"] diff --git a/src/sbr/preset/_ads.py b/src/sbr/preset/_ads.py deleted file mode 100644 index 1fb83d76..00000000 --- a/src/sbr/preset/_ads.py +++ /dev/null @@ -1,24 +0,0 @@ -import asyncio - -import aiocache - -from sbr import GeoSite, Rule - - -@aiocache.cached() -async def ads() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/Advertising.list") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("ads") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - categories: list[str] = [ - category - for category in geosite.categories - if category.endswith(("-ads", "-ads-all", "@ads")) - ] - rule: Rule = sum( - await asyncio.gather(*[geosite.export(category) for category in categories]), - start=rule, - ) - return rule diff --git a/src/sbr/preset/_ai.py b/src/sbr/preset/_ai.py deleted file mode 100644 index 10e9810c..00000000 --- a/src/sbr/preset/_ai.py +++ /dev/null @@ -1,19 +0,0 @@ -import aiocache - -from sbr import GeoSite, Rule -from sbr.preset._ads import ads - - -@aiocache.cached() -async def ai() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/Claude.list") - rule += await Rule.from_list_url("data/blackmatrix7/Copilot.list") - rule += await Rule.from_list_url("data/blackmatrix7/Gemini.list") - rule += await Rule.from_list_url("data/blackmatrix7/OpenAI.list") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("ai") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - rule += await geosite.export("openai") - rule -= await ads() - return rule diff --git a/src/sbr/preset/_cn.py b/src/sbr/preset/_cn.py deleted file mode 100644 index 62cec091..00000000 --- a/src/sbr/preset/_cn.py +++ /dev/null @@ -1,36 +0,0 @@ -import asyncio - -import aiocache - -from sbr import GeoSite, Rule -from sbr.geoip import GeoIP -from sbr.preset._ads import ads -from sbr.preset._private import private - - -@aiocache.cached() -async def cn() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/ChinaMax.list") - rule += await Rule.from_list_url("data/blackmatrix7/Direct.list") - geoip: GeoIP = await GeoIP.from_url("data/DustinWin/geoip-all.db") - rule += await geoip.export("cn") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("cn") - rule += await Rule.from_json_url("custom/cn.json") - geoip = await GeoIP.from_url("data/MetaCubeX/geoip.db") - rule += await geoip.export("cn") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - rule += await geosite.export("cn") - categories: list[str] = [ - category - for category in geosite.categories - if category.endswith(("-cn", "-cn", "@cn")) - ] - rule: Rule = sum( - await asyncio.gather(*[geosite.export(category) for category in categories]), - start=rule, - ) - rule -= await ads() - rule -= await private() - return rule diff --git a/src/sbr/preset/_download.py b/src/sbr/preset/_download.py deleted file mode 100644 index 328d7371..00000000 --- a/src/sbr/preset/_download.py +++ /dev/null @@ -1,20 +0,0 @@ -import aiocache - -from sbr import GeoSite, Rule -from sbr.preset._ads import ads -from sbr.preset._cn import cn - - -@aiocache.cached() -async def download() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/Developer.list") - rule += await Rule.from_list_url("data/blackmatrix7/Download.list") - rule += await Rule.from_list_url("data/blackmatrix7/OneDrive.list") - rule += await Rule.from_json_url("custom/download.json") - geosite: GeoSite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - rule += await geosite.export("category-dev") - rule += await geosite.export("onedrive") - rule -= await ads() - rule -= await cn() - return rule diff --git a/src/sbr/preset/_emby.py b/src/sbr/preset/_emby.py deleted file mode 100644 index 9dcbdc63..00000000 --- a/src/sbr/preset/_emby.py +++ /dev/null @@ -1,10 +0,0 @@ -import aiocache - -from sbr import Rule - - -@aiocache.cached() -async def emby() -> Rule: - rule = Rule() - rule += await Rule.from_json_url("data/NotSFC/Emby.json") - return rule diff --git a/src/sbr/preset/_media.py b/src/sbr/preset/_media.py deleted file mode 100644 index 4c7a85e3..00000000 --- a/src/sbr/preset/_media.py +++ /dev/null @@ -1,19 +0,0 @@ -import aiocache - -from sbr import GeoSite, Rule -from sbr.preset._ads import ads - - -@aiocache.cached() -async def media() -> None: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/GlobalMedia.list") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("youtube") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite-lite.db") - rule += await geosite.export("proxymedia") - rule += await geosite.export("youtube") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - rule += await geosite.export("youtube") - rule -= await ads() - return rule diff --git a/src/sbr/preset/_private.py b/src/sbr/preset/_private.py deleted file mode 100644 index 800d077d..00000000 --- a/src/sbr/preset/_private.py +++ /dev/null @@ -1,33 +0,0 @@ -import asyncio - -import aiocache - -from sbr import GeoSite, Rule -from sbr.geoip import GeoIP -from sbr.preset._ads import ads - - -@aiocache.cached() -async def private() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/Lan.list") - rule += await Rule.from_list_url("data/blackmatrix7/NTPService.list") - geoip: GeoIP = await GeoIP.from_url("data/DustinWin/geoip-all.db") - rule += await geoip.export("private") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("private") - geoip = await GeoIP.from_url("data/MetaCubeX/geoip.db") - rule += await geoip.export("private") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - rule += await geosite.export("private") - categories: list[str] = [ - category - for category in geosite.categories - if category.startswith("category-ntp") - ] - rule: Rule = sum( - await asyncio.gather(*[geosite.export(category) for category in categories]), - start=rule, - ) - rule -= await ads() - return rule diff --git a/src/sbr/preset/_proxy.py b/src/sbr/preset/_proxy.py deleted file mode 100644 index f7a770ec..00000000 --- a/src/sbr/preset/_proxy.py +++ /dev/null @@ -1,28 +0,0 @@ -import asyncio - -import aiocache - -from sbr import GeoSite, Rule -from sbr.preset._ads import ads -from sbr.preset._cn import cn -from sbr.preset._private import private - - -@aiocache.cached() -async def proxy() -> Rule: - rule = Rule() - rule += await Rule.from_list_url("data/blackmatrix7/Global.list") - geosite: GeoSite = await GeoSite.from_url("data/DustinWin/geosite-all.db") - rule += await geosite.export("proxy") - geosite = await GeoSite.from_url("data/MetaCubeX/geosite.db") - categories: list[str] = [ - category for category in geosite.categories if "!cn" in category - ] - rule: Rule = sum( - await asyncio.gather(*[geosite.export(category) for category in categories]), - start=rule, - ) - rule -= await ads() - rule -= await private() - rule -= await cn() - return rule diff --git a/src/sbr/rule.py b/src/sbr/rule.py deleted file mode 100644 index f6253228..00000000 --- a/src/sbr/rule.py +++ /dev/null @@ -1,170 +0,0 @@ -import json -import os -import pathlib -from typing import Annotated, Literal, Self - -import pydantic - -from sbr import optim, utils -from sbr.typing import StrPath - -StrSet = Annotated[set[str], pydantic.BeforeValidator(utils.as_list)] - - -class Rule(pydantic.BaseModel): - model_config = pydantic.ConfigDict( - extra="forbid", - ) - domain: StrSet = set() - domain_suffix: StrSet = set() - domain_keyword: StrSet = set() - domain_regex: StrSet = set() - ip_cidr: StrSet = set() - process_name: StrSet = set() - - @classmethod - def from_json(cls, text: str) -> "Rule": - rule_set = RuleSet(**json.loads(text)) - return rule_set.rule - - @classmethod - async def from_json_url(cls, url: StrPath) -> "Rule": - text: str = await utils.text_from_url(url) - return Rule.from_json(text) - - @classmethod - def from_list(cls, text: str) -> "Rule": - rule = Rule() - for line in utils.strip_comments(text): - words: list[str] = line.split(",") - match words[0]: - case "DOMAIN": - rule.domain.add(words[1]) - case "DOMAIN-SUFFIX": - rule.domain_suffix.add(words[1]) - case "DOMAIN-KEYWORD": - rule.domain_keyword.add(words[1]) - case "DOMAIN-REGEX": - rule.domain_regex.add(words[1]) - case "IP-CIDR": - rule.ip_cidr.add(words[1]) - case "IP-CIDR6": - rule.ip_cidr.add(words[1]) - case "IP-ASN": - pass # TODO - case "PROCESS-NAME": - rule.process_name.add(words[1]) - case _: - msg: str = f"Unknown rule: {line}" - raise ValueError(msg) - return rule - - @classmethod - async def from_list_url(cls, url: StrPath) -> "Rule": - text: str = await utils.text_from_url(url) - return Rule.from_list(text) - - def __add__(self, other: "Rule") -> "Rule": - return Rule( - domain=self.domain | other.domain, - domain_suffix=self.domain_suffix | other.domain_suffix, - domain_keyword=self.domain_keyword | other.domain_keyword, - domain_regex=self.domain_regex | other.domain_regex, - ip_cidr=self.ip_cidr | other.ip_cidr, - process_name=self.process_name | other.process_name, - ) - - def __and__(self, other: "Rule") -> "Rule": - return Rule( - domain=self.domain & other.domain, - domain_suffix=self.domain_suffix & other.domain_suffix, - domain_keyword=self.domain_keyword & other.domain_keyword, - domain_regex=self.domain_regex & other.domain_regex, - ip_cidr=self.ip_cidr & other.ip_cidr, - process_name=self.process_name & other.process_name, - ) - - def __len__(self) -> int: - return self.size - - def __repr__(self) -> str: - return self.summary - - def __str__(self) -> str: - return self.summary - - def __sub__(self, other: "Rule") -> "Rule": - return Rule( - domain=self.domain - other.domain, - domain_suffix=self.domain_suffix - other.domain_suffix, - domain_keyword=self.domain_keyword - other.domain_keyword, - domain_regex=self.domain_regex - other.domain_regex, - ip_cidr=self.ip_cidr - other.ip_cidr, - process_name=self.process_name - other.process_name, - ) - - def optimize(self) -> Self: - # TODO: remove this workaround - self.domain_suffix = {d.lstrip(".") for d in self.domain_suffix} - self.domain, self.domain_suffix = optim.merge_domain_with_suffix( - self.domain, self.domain_suffix - ) - self.domain_suffix = optim.merge_between_domain_suffix(self.domain_suffix) - self.domain, self.domain_keyword = optim.merge_domain_with_keyword( - self.domain, self.domain_keyword - ) - self.domain_suffix, self.domain_keyword = ( - optim.merge_domain_suffix_with_keyword( - self.domain_suffix, self.domain_keyword - ) - ) - self.ip_cidr = set(optim.merge_ip_cidr(self.ip_cidr)) - return self - - def save(self, filename: str | os.PathLike[str]) -> None: - filename = pathlib.Path(filename) - rule_set = RuleSet(version=1, rules=[self]) - text: str = rule_set.model_dump_json(exclude_defaults=True) - filename.parent.mkdir(parents=True, exist_ok=True) - filename.write_text(text) - - @property - def size(self) -> int: - return sum( - [ - len(self.domain), - len(self.domain_suffix), - len(self.domain_keyword), - len(self.domain_regex), - len(self.ip_cidr), - len(self.process_name), - ] - ) - - @property - def summary(self) -> str: - res: str = "" - if self.domain: - res += f"DOMAIN: {len(self.domain)}\n" - if self.domain_suffix: - res += f"DOMAIN-SUFFIX: {len(self.domain_suffix)}\n" - if self.domain_keyword: - res += f"DOMAIN-KEYWORD: {len(self.domain_keyword)}\n" - if self.domain_regex: - res += f"DOMAIN-REGEX: {len(self.domain_regex)}\n" - if self.ip_cidr: - res += f"IP-CIDR: {len(self.ip_cidr)}\n" - if self.process_name: - res += f"PROCESS-NAME: {len(self.process_name)}\n" - res += f"TOTAL: {self.size}" - return res - - -class RuleSet(pydantic.BaseModel): - model_config = pydantic.ConfigDict(extra="forbid") - version: Literal[1, 2] - rules: list[Rule] - - @property - def rule(self) -> Rule: - return sum(self.rules, start=Rule()) diff --git a/src/sbr/source/__init__.py b/src/sbr/source/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/source/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/source/__init__.pyi b/src/sbr/source/__init__.pyi new file mode 100644 index 00000000..28b1ef58 --- /dev/null +++ b/src/sbr/source/__init__.pyi @@ -0,0 +1,22 @@ +from . import preset +from ._abc import Source +from ._clash import ClashClassicalText +from ._const import get_source +from ._geoip import GeoIP +from ._geosite import GeoSite +from ._singbox import SingBoxRuleSet +from .preset import PRESETS, Preset, PresetConfig, get_rule + +__all__ = [ + "preset", + "Source", + "ClashClassicalText", + "get_source", + "GeoIP", + "GeoSite", + "SingBoxRuleSet", + "PRESETS", + "Preset", + "PresetConfig", + "get_rule", +] diff --git a/src/sbr/source/_abc.py b/src/sbr/source/_abc.py new file mode 100644 index 00000000..71873363 --- /dev/null +++ b/src/sbr/source/_abc.py @@ -0,0 +1,37 @@ +import abc +import asyncio + +from boltons.cacheutils import LRU + +from sbr import Rule + + +class Source(abc.ABC): + name: str + _key_cache: list[str] | None = None + _rule_cache: LRU[str, Rule] + + def __init__(self) -> None: + self._rule_cache = LRU() + + async def get(self, *key: str) -> Rule: + return Rule().union(*(await asyncio.gather(*(self._get(k) for k in key)))) + + async def keys(self) -> list[str]: + if self._key_cache is not None: + return self._key_cache + self._key_cache = await self._keys_nocache() + return self._key_cache + + @abc.abstractmethod + async def _get_nocache(self, key: str) -> Rule: ... + + async def _get(self, key: str) -> Rule: + if (r := self._rule_cache.get(key)) is not None: + return r + rule: Rule = await self._get_nocache(key) + self._rule_cache[key] = rule + return rule + + @abc.abstractmethod + async def _keys_nocache(self) -> list[str]: ... diff --git a/src/sbr/source/_clash.py b/src/sbr/source/_clash.py new file mode 100644 index 00000000..b3aad2e4 --- /dev/null +++ b/src/sbr/source/_clash.py @@ -0,0 +1,56 @@ +from pathlib import Path +from string import Template + +import sbr +from sbr import Rule, Source +from sbr.typing import StrPath + + +class ClashClassicalText(Source): + name: str + dpath: Path + url: Template + + def __init__(self, name: str, url: str | Template, dpath: StrPath) -> None: + super().__init__() + self.name = name + if isinstance(url, str): + self.url = Template(url) + else: + self.url = url + self.dpath = Path(dpath) + + async def _get_nocache(self, key: str) -> Rule: + filepath: Path = await sbr.utils.download( + self.url.substitute({"key": key}), self.dpath / f"{key}.list" + ) + return ClashClassicalText.from_file(filepath) + + async def _keys_nocache(self) -> list[str]: + raise NotImplementedError + + @staticmethod + def from_file(fpath: StrPath) -> Rule: + text: str = Path(fpath).read_text() + rule: Rule = Rule() + for line in sbr.utils.strip_comments(text): + words: list[str] = sbr.utils.split_strip(line) + match words[0]: + case "DOMAIN": + rule.domain.add(words[1]) + case "DOMAIN-SUFFIX": + rule.domain_suffix.add(words[1]) + case "DOMAIN-KEYWORD": + rule.domain_keyword.add(words[1]) + case "DOMAIN-REGEX": + rule.domain_regex.add(words[1]) + case "IP-CIDR" | "IP-CIDR6": + rule.ip_cidr.add(words[1]) + case "IP-ASN": + pass # TODO + case "PROCESS-NAME": + pass # TODO + case _: + msg: str = f"Unknown rule: {line}" + raise ValueError(msg) + return rule diff --git a/src/sbr/source/_const.py b/src/sbr/source/_const.py new file mode 100644 index 00000000..694ee76c --- /dev/null +++ b/src/sbr/source/_const.py @@ -0,0 +1,48 @@ +from sbr.source import ClashClassicalText, GeoIP, GeoSite, Preset, Source +from sbr.source._singbox import SingBoxRuleSet + +SOURCES: list[Source] = [ + Preset(), + ClashClassicalText( + "blackmatrix7", + "https://github.com/blackmatrix7/ios_rule_script/raw/master/rule/Clash/${key}/${key}.list", + "data/blackmatrix7/", + ), + GeoIP( + "DustinWin/geoip-all", + "https://github.com/DustinWin/ruleset_geodata/releases/download/sing-box/geoip-all.db", + "data/DustinWin/geoip-all/", + ), + GeoSite( + "DustinWin/geosite-all", + "https://github.com/DustinWin/ruleset_geodata/releases/download/sing-box/geosite-all.db", + "data/DustinWin/geosite-all/", + ), + GeoIP( + "MetaCubeX/geoip", + "https://github.com/MetaCubeX/meta-rules-dat/releases/download/latest/geoip.db", + "data/MetaCubeX/geoip/", + ), + GeoSite( + "MetaCubeX/geosite", + "https://github.com/MetaCubeX/meta-rules-dat/releases/download/latest/geosite.db", + "data/MetaCubeX/geosite/", + ), + GeoSite( + "MetaCubeX/geosite-lite", + "https://github.com/MetaCubeX/meta-rules-dat/releases/download/latest/geosite-lite.db", + "data/MetaCubeX/geosite-lite/", + ), + SingBoxRuleSet( + "NotSFC", + "https://github.com/NotSFC/rulelist/raw/main/sing-box/${key}/${key}.json", + "data/NotSFC", + ), +] + + +def get_source(name: str) -> Source: + for source in SOURCES: + if source.name == name: + return source + raise KeyError(name) diff --git a/src/sbr/source/_geoip.py b/src/sbr/source/_geoip.py new file mode 100644 index 00000000..2def0934 --- /dev/null +++ b/src/sbr/source/_geoip.py @@ -0,0 +1,63 @@ +import asyncio +import asyncio.subprocess as asp +import re +import subprocess as sp +from pathlib import Path + +import sbr +from sbr import Rule, Source +from sbr.typing import StrPath + + +class GeoIP(Source): + name: str + dpath: Path + url: str + + def __init__(self, name: str, url: str, dpath: StrPath) -> None: + super().__init__() + self.name = name + self.dpath = Path(dpath) + self.url = url + + @property + def fpath(self) -> Path: + return self.dpath / "geoip.db" + + async def _get_nocache(self, key: str) -> Rule: + await sbr.utils.download(self.url, self.fpath) + output: Path = self.dpath / f"{key}.json" + args: list[StrPath] = [ + "sing-box", + "geoip", + "export", + key, + "--output", + output, + "--file", + self.fpath, + ] + proc: asp.Process = await asyncio.create_subprocess_exec( + *args, stdin=asp.DEVNULL + ) + ret: int = await proc.wait() + if ret != 0: + raise sp.CalledProcessError(ret, args) + return Rule.from_file(output) + + async def _keys_nocache(self) -> list[str]: + await sbr.utils.download(self.url, self.fpath) + args: list[StrPath] = ["sing-box", "geoip", "list", "--file", self.fpath] + proc: asp.Process = await asyncio.create_subprocess_exec( + *args, stdin=asp.DEVNULL, stdout=asp.PIPE + ) + stdout: bytes + stdout, _ = await proc.communicate() + ret: int = await proc.wait() + if ret != 0: + raise sp.CalledProcessError(ret, args) + categories: dict[str, int] = {} + for line in stdout.decode().splitlines(): + if m := re.match(r"(?P.*) \((?P\d+)\)", line): + categories[m["name"]] = int(m["count"]) + return list(categories.keys()) diff --git a/src/sbr/source/_geosite.py b/src/sbr/source/_geosite.py new file mode 100644 index 00000000..55ddbff1 --- /dev/null +++ b/src/sbr/source/_geosite.py @@ -0,0 +1,63 @@ +import asyncio +import asyncio.subprocess as asp +import re +import subprocess as sp +from pathlib import Path + +import sbr +from sbr import Rule, Source +from sbr.typing import StrPath + + +class GeoSite(Source): + name: str + dpath: Path + url: str + + def __init__(self, name: str, url: str, dpath: StrPath) -> None: + super().__init__() + self.name = name + self.dpath = Path(dpath) + self.url = url + + @property + def fpath(self) -> Path: + return self.dpath / "geosite.db" + + async def _get_nocache(self, key: str) -> Rule: + await sbr.utils.download(self.url, self.fpath) + output: Path = self.dpath / f"{key}.json" + args: list[StrPath] = [ + "sing-box", + "geosite", + "export", + key, + "--output", + output, + "--file", + self.fpath, + ] + proc: asp.Process = await asyncio.create_subprocess_exec( + *args, stdin=asp.DEVNULL + ) + ret: int = await proc.wait() + if ret != 0: + raise sp.CalledProcessError(ret, args) + return Rule.from_file(output) + + async def _keys_nocache(self) -> list[str]: + await sbr.utils.download(self.url, self.fpath) + args: list[StrPath] = ["sing-box", "geosite", "list", "--file", self.fpath] + proc: asp.Process = await asyncio.create_subprocess_exec( + *args, stdin=asp.DEVNULL, stdout=asp.PIPE + ) + stdout: bytes + stdout, _ = await proc.communicate() + ret: int = await proc.wait() + if ret != 0: + raise sp.CalledProcessError(ret, args) + categories: dict[str, int] = {} + for line in stdout.decode().splitlines(): + if m := re.match(r"(?P.*) \((?P\d+)\)", line): + categories[m["name"]] = int(m["count"]) + return list(categories.keys()) diff --git a/src/sbr/source/_singbox.py b/src/sbr/source/_singbox.py new file mode 100644 index 00000000..e88f74b7 --- /dev/null +++ b/src/sbr/source/_singbox.py @@ -0,0 +1,30 @@ +from pathlib import Path +from string import Template + +import sbr +from sbr import Rule, Source +from sbr.typing import StrPath + + +class SingBoxRuleSet(Source): + name: str + dpath: Path + url: Template + + def __init__(self, name: str, url: str | Template, dpath: StrPath) -> None: + super().__init__() + self.name = name + if isinstance(url, str): + self.url = Template(url) + else: + self.url = url + self.dpath = Path(dpath) + + async def _get_nocache(self, key: str) -> Rule: + filepath: Path = await sbr.utils.download( + self.url.substitute({"key": key}), self.dpath / f"{key}.json" + ) + return Rule.from_file(filepath) + + async def _keys_nocache(self) -> list[str]: + raise NotImplementedError diff --git a/src/sbr/source/preset/__init__.py b/src/sbr/source/preset/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/source/preset/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/source/preset/__init__.pyi b/src/sbr/source/preset/__init__.pyi new file mode 100644 index 00000000..7bfb8203 --- /dev/null +++ b/src/sbr/source/preset/__init__.pyi @@ -0,0 +1,5 @@ +from ._const import PRESETS, PresetConfig, get_preset +from ._preset import Preset +from ._rule import get_rule + +__all__ = ["PRESETS", "get_preset", "Preset", "get_rule", "PresetConfig"] diff --git a/src/sbr/source/preset/_const.py b/src/sbr/source/preset/_const.py new file mode 100644 index 00000000..20ef9511 --- /dev/null +++ b/src/sbr/source/preset/_const.py @@ -0,0 +1,109 @@ +from typing import NamedTuple + +import sbr +from sbr.container import Rule + + +class PresetConfig(NamedTuple): + id: str + name: str + include: list[str] + exclude: list[str] + + +PRESETS: list[PresetConfig] = [ + PresetConfig( + "ads", + "🛑 ADs", + [ + "blackmatrix7:Advertising", + "DustinWin/geosite-all:ads", + "MetaCubeX/geosite:*-ads,*-ads-all,*@ads", + ], + [], + ), + PresetConfig( + "private", + "🔒 Private", + [ + "blackmatrix7:Lan,NTPService", + "DustinWin/geoip-all:private", + "DustinWin/geosite-all:private", + "MetaCubeX/geoip:private", + "MetaCubeX/geosite:category-ntp*,private", + ], + ["preset:ads"], + ), + PresetConfig( + "cn", + "🇨🇳 CN", + [ + "blackmatrix7:ChinaMax,Direct", + "DustinWin/geoip-all:cn", + "DustinWin/geosite-all:cn", + "MetaCubeX/geoip:cn", + "MetaCubeX/geosite:cn,*-cn,*@cn", + ], + ["preset:ads", "preset:private"], + ), + PresetConfig( + "proxy", + "🔗 Proxy", + [ + "blackmatrix7:Global", + "DustinWin/geosite-all:proxy", + "MetaCubeX/geosite:*!cn*", + ], + ["preset:ads", "preset:cn", "preset:private"], + ), + PresetConfig( + "ai", + "🤖 AI", + [ + "blackmatrix7:Claude,Copilot,Gemini,OpenAI", + "DustinWin/geosite-all:ai", + "MetaCubeX/geosite:openai", + ], + ["preset:ads", "preset:cn", "preset:private"], + ), + PresetConfig( + "emby", + "🍟 Emby", + ["NotSFC:Emby"], + ["preset:ads", "preset:cn", "preset:private"], + ), + PresetConfig( + "download", + "☁️ Download", + [ + "blackmatrix7:Developer,Download,OneDrive", + "MetaCubeX/geosite:category-dev,onedrive", + ], + ["preset:ads", "preset:cn", "preset:private"], + ), + PresetConfig( + "media", + "🎬 Media", + [ + "blackmatrix7:GlobalMedia", + "DustinWin/geosite-all:youtube", + "MetaCubeX/geosite-lite:proxymedia,youtube", + "MetaCubeX/geosite:youtube", + ], + ["preset:ads", "preset:cn", "preset:private"], + ), +] + + +async def get_preset(_id: str, *, exclude: bool = True) -> Rule: + for cfg in PRESETS: + if cfg.id == _id: + return await _get_preset(cfg, exclude=exclude) + raise KeyError(_id) + + +async def _get_preset(cfg: PresetConfig, *, exclude: bool = True) -> Rule: + rule: Rule = await sbr.get_rule(*cfg.include) + if exclude: + rule -= await sbr.get_rule(*cfg.exclude) + return rule diff --git a/src/sbr/source/preset/_preset.py b/src/sbr/source/preset/_preset.py new file mode 100644 index 00000000..dbc1ebcb --- /dev/null +++ b/src/sbr/source/preset/_preset.py @@ -0,0 +1,21 @@ +from typing import NamedTuple + +from sbr import Rule, Source +from sbr.source.preset._const import PRESETS, get_preset + + +class PresetConfig(NamedTuple): + id: str + name: str + include: list[str] + exclude: list[str] + + +class Preset(Source): + name: str = "preset" + + async def _get_nocache(self, key: str) -> Rule: + return await get_preset(key) + + async def _keys_nocache(self) -> list[str]: + return [preset.id for preset in PRESETS] diff --git a/src/sbr/source/preset/_rule.py b/src/sbr/source/preset/_rule.py new file mode 100644 index 00000000..ba4917db --- /dev/null +++ b/src/sbr/source/preset/_rule.py @@ -0,0 +1,26 @@ +import asyncio +import fnmatch + +from loguru import logger + +import sbr +from sbr import Rule, Source + + +async def get_rule(*spec: str) -> Rule: + return Rule().union(*(await asyncio.gather(*(_get_rule(s) for s in spec)))) + + +async def _get_rule(spec: str) -> Rule: + source_name: str + key_spec: str + source_name, _, key_spec = spec.partition(":") + source: Source = sbr.get_source(source_name) + keys: list[str] = [] + for k in sbr.utils.split_strip(key_spec): + if "*" in k: + keys += fnmatch.filter(await source.keys(), k) + else: + keys.append(k) + logger.debug("{} -> {}", spec, keys) + return await source.get(*keys) diff --git a/src/sbr/typing/__init__.py b/src/sbr/typing/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/typing/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/typing/__init__.pyi b/src/sbr/typing/__init__.pyi new file mode 100644 index 00000000..9a3baccf --- /dev/null +++ b/src/sbr/typing/__init__.pyi @@ -0,0 +1,3 @@ +from ._alias import StrPath + +__all__ = ["StrPath"] diff --git a/src/sbr/typing.py b/src/sbr/typing/_alias.py similarity index 100% rename from src/sbr/typing.py rename to src/sbr/typing/_alias.py diff --git a/src/sbr/utils.py b/src/sbr/utils.py deleted file mode 100644 index 7154f4b7..00000000 --- a/src/sbr/utils.py +++ /dev/null @@ -1,66 +0,0 @@ -import functools -import pathlib -import urllib.parse -from collections.abc import Generator - -import httpx -import pydantic.alias_generators - -from sbr.typing import StrPath - - -def as_list(x: str | list[str]) -> list[str]: - if isinstance(x, str): - return [x] - return x - - -async def download_file(url: StrPath, directory: StrPath) -> pathlib.Path: - match url := path_or_url(url): - case pathlib.Path(): - return url - case str(): - parse: urllib.parse.ParseResult = urllib.parse.urlparse(url) - path = pathlib.Path(parse.path) - filename: pathlib.Path = pathlib.Path(directory) / path.name - client: httpx.AsyncClient = _client() - resp: httpx.Response = await client.get(url) - resp = resp.raise_for_status() - filename.write_bytes(resp.content) - return filename - - -def path_or_url(x: StrPath) -> pathlib.Path | str: - if isinstance(x, str): - parse: urllib.parse.ParseResult = urllib.parse.urlparse(x) - if parse.scheme in ("http", "https"): - return x - return pathlib.Path(x) - - -def strip_comments(text: str) -> Generator[str, None, None]: - for line in text.splitlines(): - stripped: str = line.partition("#")[0].strip() - if stripped: - yield stripped - - -async def text_from_url(url: StrPath) -> str: - match url := path_or_url(url): - case pathlib.Path(): - return url.read_text() - case str(): - client: httpx.AsyncClient = _client() - resp: httpx.Response = await client.get(url) - resp = resp.raise_for_status() - return resp.text - - -def to_kebab(s: str) -> str: - s = pydantic.alias_generators.to_snake(s) - return s.replace("_", "-") - - -@functools.cache -def _client() -> httpx.AsyncClient: - return httpx.AsyncClient(follow_redirects=True) diff --git a/src/sbr/utils/__init__.py b/src/sbr/utils/__init__.py new file mode 100644 index 00000000..62d86f77 --- /dev/null +++ b/src/sbr/utils/__init__.py @@ -0,0 +1,3 @@ +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__) diff --git a/src/sbr/utils/__init__.pyi b/src/sbr/utils/__init__.pyi new file mode 100644 index 00000000..bba653ae --- /dev/null +++ b/src/sbr/utils/__init__.pyi @@ -0,0 +1,5 @@ +from ._as import as_set +from ._download import download +from ._str import split_strip, strip_comments + +__all__ = ["as_set", "download", "split_strip", "strip_comments"] diff --git a/src/sbr/utils/_as.py b/src/sbr/utils/_as.py new file mode 100644 index 00000000..f608d14e --- /dev/null +++ b/src/sbr/utils/_as.py @@ -0,0 +1,7 @@ +from collections.abc import Iterable + + +def as_set(obj: str | Iterable[str]) -> set[str]: + if isinstance(obj, str): + return {obj} + return set(obj) diff --git a/src/sbr/utils/_download.py b/src/sbr/utils/_download.py new file mode 100644 index 00000000..f8e0c746 --- /dev/null +++ b/src/sbr/utils/_download.py @@ -0,0 +1,101 @@ +import datetime +import functools +import os +import time +from pathlib import Path + +import httpx +import humanize +import ubelt as ub +from icecream import ic +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TaskID, + TaskProgressColumn, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) + +from sbr.typing import StrPath + + +@functools.cache +def _client() -> httpx.AsyncClient: + return httpx.AsyncClient(follow_redirects=True) + + +@functools.cache +def _progress() -> Progress: + return Progress( + TextColumn("[progress.description]{task.description}"), + "[", + BarColumn(), + "]", + TaskProgressColumn(), + "(", + DownloadColumn(binary_units=True), + ")", + TimeRemainingColumn(), + TransferSpeedColumn(), + transient=True, + ) + + +async def _download(url: str, fpath: Path) -> None: + ic(url) + client: httpx.AsyncClient = _client() + prog: Progress = _progress() + task_id: TaskID = prog.add_task(fpath.name) + start: float = time.perf_counter() + async with client.stream("GET", url) as r: + length: int = int(r.headers["Content-Length"]) + prog.reset(task_id, total=length) + prog.start() + with fpath.open("wb") as fp: + length = 0 + async for chunk in r.aiter_bytes(): + bytes_written: int = fp.write(chunk) + prog.advance(task_id, bytes_written) + length += bytes_written + prog.update(task_id, total=length, completed=length) + end: float = time.perf_counter() + prog.remove_task(task_id) + size: str = humanize.naturalsize(length, binary=True) + delta: datetime.timedelta = datetime.timedelta(seconds=end - start) + minutes: int = delta.seconds // 60 + seconds: float = delta.seconds % 60 + delta.microseconds / 1000000 + delta_str: str = f"{minutes:02}:{seconds:08.5f}" + speed: str = humanize.naturalsize(length / (end - start), binary=True) + prog.console.log(f"Downloaded to '{fpath}'. {size} in {delta_str} ({speed}/s).") + if len(prog.tasks) == 0: + prog.stop() + + +async def download( + url: str, + _fpath: StrPath | None = None, + *, + redo: bool = False, + verbose: bool | None = True, + expires: str | int | datetime.datetime | datetime.timedelta | None = None, +) -> Path: + if _fpath is None: + _fpath = os.path.basename(url) # noqa: PTH119 + fpath: Path = Path(_fpath) + fname: str = fpath.name + fpath.parent.mkdir(parents=True, exist_ok=True) + stamp = ub.CacheStamp( + fname + ".stamp", + dpath=fpath.parent, + product=fpath, + verbose=verbose, + expires=expires, + ext=".json", + ) + if redo or stamp.expired(): + await _download(url, fpath) + stamp.renew() + return fpath diff --git a/src/sbr/utils/_str.py b/src/sbr/utils/_str.py new file mode 100644 index 00000000..eba3434e --- /dev/null +++ b/src/sbr/utils/_str.py @@ -0,0 +1,14 @@ +from collections.abc import Generator + + +def strip_comments(text: str) -> Generator[str, None, None]: + for line in text.splitlines(): + s: str + s, _, _ = line.partition("#") + s = s.strip() + if s: + yield s + + +def split_strip(text: str, sep: str | None = ",") -> list[str]: + return [s.strip() for s in text.split(sep)] diff --git a/tools/build.py b/tools/build.py new file mode 100644 index 00000000..695d781b --- /dev/null +++ b/tools/build.py @@ -0,0 +1,50 @@ +import asyncio +import datetime +from pathlib import Path + +import anyio +import prettytable +from prettytable import PrettyTable + +import sbr +from sbr import PRESETS, Rule, Source + + +async def gen_optimization_summary(preset: Source) -> dict[str, Rule]: + fpath: Path = Path("output/README.md") + fpath.parent.mkdir(parents=True, exist_ok=True) + rules: dict[str, Rule] = {} + async with await anyio.open_file(fpath, "w") as fp: + await fp.write("# sing-box Rules\n") + now: datetime.datetime = datetime.datetime.now(datetime.UTC) + await fp.write(f"Updated at: {now.isoformat()}\n") + for cfg in PRESETS: + rule_raw: Rule = await preset.get(cfg.id) + rule_opt: Rule = rule_raw.model_copy(deep=True) + rule_opt.optimize() + rules[cfg.id] = rule_opt + table: PrettyTable = PrettyTable(["Type", "Count (Raw)", "Count (Opt)"]) + table.align.update({"Type": "l", "Count (Raw)": "r", "Count (Opt)": "r"}) + for k, v in rule_raw: + name: str = k.upper().replace("_", "-") + table.add_row([name, len(v), len(rule_opt[k])]) + table.add_row(["TOTAL", len(rule_raw), len(rule_opt)]) + await fp.write(f"## {cfg.name}\n") + table.set_style(prettytable.MARKDOWN) + await fp.write(table.get_string()) + await fp.write("\n") + return rules + + +async def main() -> None: + preset: Source = sbr.get_source("preset") + rules: dict[str, Rule] = await gen_optimization_summary(preset) + for k, r in rules.items(): + r.save(f"output/rule-set/{k}.json") + r.geoip().save(f"output/geoip/{k}.json") + r.geosite().save(f"output/geosite/{k}.json") + + +if __name__ == "__main__": + sbr.logging.init() + asyncio.run(main())